Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import json | |
| from typing import Dict, List, Any, Tuple | |
| from dataclasses import dataclass | |
| from datetime import datetime, timedelta | |
| import sqlite3 | |
| class SkillMastery: | |
| skill: str | |
| theta: float # IRT ability parameter (-3 to +3) | |
| sem: float # Standard error of measurement | |
| last_practiced: datetime | |
| practice_count: int | |
| success_rate: float | |
| class ItemResponse: | |
| item_id: str | |
| skill: str | |
| correct: bool | |
| response_time: float | |
| hints_used: int | |
| difficulty: float | |
| timestamp: datetime | |
| class KnowledgeTracer: | |
| """Knowledge tracing system using Item Response Theory and Bayesian updating.""" | |
| def __init__(self, db_path: str = "knowledge_tracing.sqlite"): | |
| self.db_path = db_path | |
| self._init_database() | |
| self.skill_masteries: Dict[str, SkillMastery] = {} | |
| self.response_history: List[ItemResponse] = [] | |
| def _init_database(self): | |
| """Initialize database for storing tracing data.""" | |
| with sqlite3.connect(self.db_path) as conn: | |
| conn.execute(""" | |
| CREATE TABLE IF NOT EXISTS skill_mastery ( | |
| skill TEXT PRIMARY KEY, | |
| theta REAL DEFAULT 0.0, | |
| sem REAL DEFAULT 1.0, | |
| last_practiced TIMESTAMP, | |
| practice_count INTEGER DEFAULT 0, | |
| success_rate REAL DEFAULT 0.0 | |
| ) | |
| """) | |
| conn.execute(""" | |
| CREATE TABLE IF NOT EXISTS item_responses ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| item_id TEXT, | |
| skill TEXT, | |
| correct BOOLEAN, | |
| response_time REAL, | |
| hints_used INTEGER, | |
| difficulty REAL, | |
| timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| """) | |
| conn.execute(""" | |
| CREATE INDEX IF NOT EXISTS idx_skill_responses ON item_responses(skill) | |
| """) | |
| def update_mastery(self, response: ItemResponse) -> float: | |
| """Update skill mastery using Bayesian updating with IRT.""" | |
| skill = response.skill | |
| # Load current mastery if exists | |
| if skill not in self.skill_masteries: | |
| self._load_skill_mastery(skill) | |
| current = self.skill_masteries.get(skill, SkillMastery( | |
| skill=skill, theta=0.0, sem=1.0, | |
| last_practiced=datetime.now(), | |
| practice_count=0, success_rate=0.0 | |
| )) | |
| # IRT 2-parameter model update | |
| # P(correct) = 1 / (1 + exp(-a*(theta - b))) | |
| # where a = discrimination (fixed at 1.0), b = difficulty | |
| # Calculate likelihood of response given current theta | |
| logit = current.theta - response.difficulty | |
| p_correct = 1.0 / (1.0 + np.exp(-logit)) | |
| # Bayesian update using response as evidence | |
| # Posterior precision = prior precision + information | |
| prior_precision = 1.0 / (current.sem ** 2) | |
| # Information function for 2PL IRT | |
| information = p_correct * (1 - p_correct) | |
| posterior_precision = prior_precision + information | |
| posterior_sem = np.sqrt(1.0 / posterior_precision) | |
| # Update theta based on response | |
| if response.correct: | |
| # Correct response increases theta | |
| theta_update = (current.theta / (current.sem ** 2) + | |
| information * response.difficulty) / posterior_precision | |
| else: | |
| # Incorrect response decreases theta | |
| theta_update = (current.theta / (current.sem ** 2) - | |
| information * (1 - response.difficulty)) / posterior_precision | |
| # Apply forgetting factor for time since last practice | |
| days_since_practice = (response.timestamp - current.last_practiced).days | |
| forgetting_factor = np.exp(-0.05 * days_since_practice) # 5% decay per day | |
| theta_update *= forgetting_factor | |
| # Update mastery | |
| updated = SkillMastery( | |
| skill=skill, | |
| theta=np.clip(theta_update, -3.0, 3.0), | |
| sem=posterior_sem, | |
| last_practiced=response.timestamp, | |
| practice_count=current.practice_count + 1, | |
| success_rate=self._update_success_rate(current.success_rate, current.practice_count, response.correct) | |
| ) | |
| self.skill_masteries[skill] = updated | |
| self.response_history.append(response) | |
| # Save to database | |
| self._save_skill_mastery(updated) | |
| self._save_response(response) | |
| return updated.theta | |
| def _update_success_rate(self, current_rate: float, count: int, correct: bool) -> float: | |
| """Update exponential moving average of success rate.""" | |
| alpha = 0.1 # Learning rate for EMA | |
| if count == 0: | |
| return 1.0 if correct else 0.0 | |
| return alpha * (1.0 if correct else 0.0) + (1 - alpha) * current_rate | |
| def get_mastery_probability(self, skill: str) -> float: | |
| """Convert theta to mastery probability (0-1 scale).""" | |
| if skill not in self.skill_masteries: | |
| self._load_skill_mastery(skill) | |
| # Use default theta if skill not found | |
| theta = self.skill_masteries.get(skill, SkillMastery( | |
| skill=skill, theta=0.0, sem=1.0, | |
| last_practiced=datetime.now(), | |
| practice_count=0, success_rate=0.0 | |
| )).theta | |
| # Logistic transformation: theta=0 -> 0.5, theta=+2 -> 0.88, theta=-2 -> 0.12 | |
| return 1.0 / (1.0 + np.exp(-theta)) | |
| def calculate_information_gain(self, skill: str, difficulty: float) -> float: | |
| """Calculate expected information gain for an item.""" | |
| if skill not in self.skill_masteries: | |
| self._load_skill_mastery(skill) | |
| # Use default theta if skill not found | |
| theta = self.skill_masteries.get(skill, SkillMastery( | |
| skill=skill, theta=0.0, sem=1.0, | |
| last_practiced=datetime.now(), | |
| practice_count=0, success_rate=0.0 | |
| )).theta | |
| # Expected information = I(theta) where I is Fisher information | |
| logit = theta - difficulty | |
| p_correct = 1.0 / (1.0 + np.exp(-logit)) | |
| information = p_correct * (1 - p_correct) | |
| return information | |
| def get_next_item_recommendations(self, candidate_items: List[Dict[str, Any]], | |
| max_items: int = 5) -> List[Dict[str, Any]]: | |
| """Recommend next items based on information gain and spacing.""" | |
| scored_items = [] | |
| for item in candidate_items: | |
| skill = item['skill'] | |
| difficulty = item['difficulty'] | |
| # Calculate information gain | |
| info_gain = self.calculate_information_gain(skill, difficulty) | |
| # Calculate spacing benefit (higher for items not practiced recently) | |
| if skill in self.skill_masteries: | |
| days_since = (datetime.now() - self.skill_masteries[skill].last_practiced).days | |
| spacing_bonus = min(days_since / 7.0, 1.0) # Max bonus after 1 week | |
| else: | |
| spacing_bonus = 1.0 # New skill gets max bonus | |
| # Calculate mastery urgency (higher for lower mastery) | |
| mastery = self.get_mastery_probability(skill) | |
| urgency = 1.0 - mastery | |
| # Combined score | |
| score = 0.4 * info_gain + 0.3 * spacing_bonus + 0.3 * urgency | |
| scored_items.append({ | |
| **item, | |
| 'score': score, | |
| 'information_gain': info_gain, | |
| 'spacing_bonus': spacing_bonus, | |
| 'urgency': urgency, | |
| 'current_mastery': mastery | |
| }) | |
| # Sort by score and return top items | |
| scored_items.sort(key=lambda x: x['score'], reverse=True) | |
| return scored_items[:max_items] | |
| def get_research_metrics(self, skill: str = None) -> Dict[str, Any]: | |
| """Calculate research metrics for evaluation.""" | |
| if skill: | |
| responses = [r for r in self.response_history if r.skill == skill] | |
| else: | |
| responses = self.response_history | |
| if not responses: | |
| return {} | |
| # Basic metrics | |
| total_responses = len(responses) | |
| correct_responses = sum(1 for r in responses if r.correct) | |
| accuracy = correct_responses / total_responses | |
| # Time metrics | |
| avg_response_time = np.mean([r.response_time for r in responses]) | |
| # Hint metrics | |
| hints_per_response = np.mean([r.hints_used for r in responses]) | |
| # Learning gain (compare first vs last 10 responses) | |
| if len(responses) >= 20: | |
| early_responses = responses[:10] | |
| late_responses = responses[-10:] | |
| early_accuracy = sum(1 for r in early_responses if r.correct) / len(early_responses) | |
| late_accuracy = sum(1 for r in late_responses if r.correct) / len(late_responses) | |
| learning_gain = late_accuracy - early_accuracy | |
| else: | |
| learning_gain = 0.0 | |
| # Retention (performance on items practiced > 3 days ago) | |
| retention_items = [r for r in responses | |
| if (datetime.now() - r.timestamp).days > 3] | |
| if retention_items: | |
| retention_rate = sum(1 for r in retention_items if r.correct) / len(retention_items) | |
| else: | |
| retention_rate = None | |
| return { | |
| 'total_responses': total_responses, | |
| 'accuracy': accuracy, | |
| 'avg_response_time': avg_response_time, | |
| 'hints_per_response': hints_per_response, | |
| 'learning_gain': learning_gain, | |
| 'retention_rate': retention_rate, | |
| 'skill_masteries': len(self.skill_masteries) | |
| } | |
| def _load_skill_mastery(self, skill: str): | |
| """Load skill mastery from database.""" | |
| with sqlite3.connect(self.db_path) as conn: | |
| conn.row_factory = sqlite3.Row | |
| cursor = conn.execute( | |
| "SELECT * FROM skill_mastery WHERE skill = ?", (skill,) | |
| ) | |
| row = cursor.fetchone() | |
| if row: | |
| self.skill_masteries[skill] = SkillMastery( | |
| skill=row['skill'], | |
| theta=row['theta'], | |
| sem=row['sem'], | |
| last_practiced=datetime.fromisoformat(row['last_practiced']), | |
| practice_count=row['practice_count'], | |
| success_rate=row['success_rate'] | |
| ) | |
| def _save_skill_mastery(self, mastery: SkillMastery): | |
| """Save skill mastery to database.""" | |
| with sqlite3.connect(self.db_path) as conn: | |
| conn.execute(""" | |
| INSERT OR REPLACE INTO skill_mastery | |
| (skill, theta, sem, last_practiced, practice_count, success_rate) | |
| VALUES (?, ?, ?, ?, ?, ?) | |
| """, ( | |
| mastery.skill, | |
| mastery.theta, | |
| mastery.sem, | |
| mastery.last_practiced.isoformat(), | |
| mastery.practice_count, | |
| mastery.success_rate | |
| )) | |
| def _save_response(self, response: ItemResponse): | |
| """Save item response to database.""" | |
| with sqlite3.connect(self.db_path) as conn: | |
| conn.execute(""" | |
| INSERT INTO item_responses | |
| (item_id, skill, correct, response_time, hints_used, difficulty, timestamp) | |
| VALUES (?, ?, ?, ?, ?, ?, ?) | |
| """, ( | |
| response.item_id, | |
| response.skill, | |
| response.correct, | |
| response.response_time, | |
| response.hints_used, | |
| response.difficulty, | |
| response.timestamp.isoformat() | |
| )) | |