Spaces:

BonelliLab
/

Eidolon-CognitiveTutor

Sleeping

File size: 12,232 Bytes

cd8c2bb

import numpy as np
import json
from typing import Dict, List, Any, Tuple
from dataclasses import dataclass
from datetime import datetime, timedelta
import sqlite3

@dataclass
class SkillMastery:
    skill: str
    theta: float  # IRT ability parameter (-3 to +3)
    sem: float    # Standard error of measurement
    last_practiced: datetime
    practice_count: int
    success_rate: float

@dataclass
class ItemResponse:
    item_id: str
    skill: str
    correct: bool
    response_time: float
    hints_used: int
    difficulty: float
    timestamp: datetime

class KnowledgeTracer:
    """Knowledge tracing system using Item Response Theory and Bayesian updating."""
    
    def __init__(self, db_path: str = "knowledge_tracing.sqlite"):
        self.db_path = db_path
        self._init_database()
        self.skill_masteries: Dict[str, SkillMastery] = {}
        self.response_history: List[ItemResponse] = []
    
    def _init_database(self):
        """Initialize database for storing tracing data."""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS skill_mastery (
                    skill TEXT PRIMARY KEY,
                    theta REAL DEFAULT 0.0,
                    sem REAL DEFAULT 1.0,
                    last_practiced TIMESTAMP,
                    practice_count INTEGER DEFAULT 0,
                    success_rate REAL DEFAULT 0.0
                )
            """)
            conn.execute("""
                CREATE TABLE IF NOT EXISTS item_responses (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    item_id TEXT,
                    skill TEXT,
                    correct BOOLEAN,
                    response_time REAL,
                    hints_used INTEGER,
                    difficulty REAL,
                    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
            """)
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_skill_responses ON item_responses(skill)
            """)
    
    def update_mastery(self, response: ItemResponse) -> float:
        """Update skill mastery using Bayesian updating with IRT."""
        skill = response.skill
        
        # Load current mastery if exists
        if skill not in self.skill_masteries:
            self._load_skill_mastery(skill)
        
        current = self.skill_masteries.get(skill, SkillMastery(
            skill=skill, theta=0.0, sem=1.0, 
            last_practiced=datetime.now(), 
            practice_count=0, success_rate=0.0
        ))
        
        # IRT 2-parameter model update
        # P(correct) = 1 / (1 + exp(-a*(theta - b)))
        # where a = discrimination (fixed at 1.0), b = difficulty
        
        # Calculate likelihood of response given current theta
        logit = current.theta - response.difficulty
        p_correct = 1.0 / (1.0 + np.exp(-logit))
        
        # Bayesian update using response as evidence
        # Posterior precision = prior precision + information
        prior_precision = 1.0 / (current.sem ** 2)
        
        # Information function for 2PL IRT
        information = p_correct * (1 - p_correct)
        
        posterior_precision = prior_precision + information
        posterior_sem = np.sqrt(1.0 / posterior_precision)
        
        # Update theta based on response
        if response.correct:
            # Correct response increases theta
            theta_update = (current.theta / (current.sem ** 2) + 
                          information * response.difficulty) / posterior_precision
        else:
            # Incorrect response decreases theta
            theta_update = (current.theta / (current.sem ** 2) - 
                          information * (1 - response.difficulty)) / posterior_precision
        
        # Apply forgetting factor for time since last practice
        days_since_practice = (response.timestamp - current.last_practiced).days
        forgetting_factor = np.exp(-0.05 * days_since_practice)  # 5% decay per day
        
        theta_update *= forgetting_factor
        
        # Update mastery
        updated = SkillMastery(
            skill=skill,
            theta=np.clip(theta_update, -3.0, 3.0),
            sem=posterior_sem,
            last_practiced=response.timestamp,
            practice_count=current.practice_count + 1,
            success_rate=self._update_success_rate(current.success_rate, current.practice_count, response.correct)
        )
        
        self.skill_masteries[skill] = updated
        self.response_history.append(response)
        
        # Save to database
        self._save_skill_mastery(updated)
        self._save_response(response)
        
        return updated.theta
    
    def _update_success_rate(self, current_rate: float, count: int, correct: bool) -> float:
        """Update exponential moving average of success rate."""
        alpha = 0.1  # Learning rate for EMA
        if count == 0:
            return 1.0 if correct else 0.0
        return alpha * (1.0 if correct else 0.0) + (1 - alpha) * current_rate
    
    def get_mastery_probability(self, skill: str) -> float:
        """Convert theta to mastery probability (0-1 scale)."""
        if skill not in self.skill_masteries:
            self._load_skill_mastery(skill)
        
        # Use default theta if skill not found
        theta = self.skill_masteries.get(skill, SkillMastery(
            skill=skill, theta=0.0, sem=1.0,
            last_practiced=datetime.now(),
            practice_count=0, success_rate=0.0
        )).theta
        
        # Logistic transformation: theta=0 -> 0.5, theta=+2 -> 0.88, theta=-2 -> 0.12
        return 1.0 / (1.0 + np.exp(-theta))
    
    def calculate_information_gain(self, skill: str, difficulty: float) -> float:
        """Calculate expected information gain for an item."""
        if skill not in self.skill_masteries:
            self._load_skill_mastery(skill)
        
        # Use default theta if skill not found
        theta = self.skill_masteries.get(skill, SkillMastery(
            skill=skill, theta=0.0, sem=1.0,
            last_practiced=datetime.now(),
            practice_count=0, success_rate=0.0
        )).theta
        
        # Expected information = I(theta) where I is Fisher information
        logit = theta - difficulty
        p_correct = 1.0 / (1.0 + np.exp(-logit))
        information = p_correct * (1 - p_correct)
        
        return information
    
    def get_next_item_recommendations(self, candidate_items: List[Dict[str, Any]], 
                                     max_items: int = 5) -> List[Dict[str, Any]]:
        """Recommend next items based on information gain and spacing."""
        scored_items = []
        
        for item in candidate_items:
            skill = item['skill']
            difficulty = item['difficulty']
            
            # Calculate information gain
            info_gain = self.calculate_information_gain(skill, difficulty)
            
            # Calculate spacing benefit (higher for items not practiced recently)
            if skill in self.skill_masteries:
                days_since = (datetime.now() - self.skill_masteries[skill].last_practiced).days
                spacing_bonus = min(days_since / 7.0, 1.0)  # Max bonus after 1 week
            else:
                spacing_bonus = 1.0  # New skill gets max bonus
            
            # Calculate mastery urgency (higher for lower mastery)
            mastery = self.get_mastery_probability(skill)
            urgency = 1.0 - mastery
            
            # Combined score
            score = 0.4 * info_gain + 0.3 * spacing_bonus + 0.3 * urgency
            
            scored_items.append({
                **item,
                'score': score,
                'information_gain': info_gain,
                'spacing_bonus': spacing_bonus,
                'urgency': urgency,
                'current_mastery': mastery
            })
        
        # Sort by score and return top items
        scored_items.sort(key=lambda x: x['score'], reverse=True)
        return scored_items[:max_items]
    
    def get_research_metrics(self, skill: str = None) -> Dict[str, Any]:
        """Calculate research metrics for evaluation."""
        if skill:
            responses = [r for r in self.response_history if r.skill == skill]
        else:
            responses = self.response_history
        
        if not responses:
            return {}
        
        # Basic metrics
        total_responses = len(responses)
        correct_responses = sum(1 for r in responses if r.correct)
        accuracy = correct_responses / total_responses
        
        # Time metrics
        avg_response_time = np.mean([r.response_time for r in responses])
        
        # Hint metrics
        hints_per_response = np.mean([r.hints_used for r in responses])
        
        # Learning gain (compare first vs last 10 responses)
        if len(responses) >= 20:
            early_responses = responses[:10]
            late_responses = responses[-10:]
            
            early_accuracy = sum(1 for r in early_responses if r.correct) / len(early_responses)
            late_accuracy = sum(1 for r in late_responses if r.correct) / len(late_responses)
            learning_gain = late_accuracy - early_accuracy
        else:
            learning_gain = 0.0
        
        # Retention (performance on items practiced > 3 days ago)
        retention_items = [r for r in responses 
                          if (datetime.now() - r.timestamp).days > 3]
        if retention_items:
            retention_rate = sum(1 for r in retention_items if r.correct) / len(retention_items)
        else:
            retention_rate = None
        
        return {
            'total_responses': total_responses,
            'accuracy': accuracy,
            'avg_response_time': avg_response_time,
            'hints_per_response': hints_per_response,
            'learning_gain': learning_gain,
            'retention_rate': retention_rate,
            'skill_masteries': len(self.skill_masteries)
        }
    
    def _load_skill_mastery(self, skill: str):
        """Load skill mastery from database."""
        with sqlite3.connect(self.db_path) as conn:
            conn.row_factory = sqlite3.Row
            cursor = conn.execute(
                "SELECT * FROM skill_mastery WHERE skill = ?", (skill,)
            )
            row = cursor.fetchone()
            if row:
                self.skill_masteries[skill] = SkillMastery(
                    skill=row['skill'],
                    theta=row['theta'],
                    sem=row['sem'],
                    last_practiced=datetime.fromisoformat(row['last_practiced']),
                    practice_count=row['practice_count'],
                    success_rate=row['success_rate']
                )
    
    def _save_skill_mastery(self, mastery: SkillMastery):
        """Save skill mastery to database."""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                INSERT OR REPLACE INTO skill_mastery 
                (skill, theta, sem, last_practiced, practice_count, success_rate)
                VALUES (?, ?, ?, ?, ?, ?)
            """, (
                mastery.skill,
                mastery.theta,
                mastery.sem,
                mastery.last_practiced.isoformat(),
                mastery.practice_count,
                mastery.success_rate
            ))
    
    def _save_response(self, response: ItemResponse):
        """Save item response to database."""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                INSERT INTO item_responses 
                (item_id, skill, correct, response_time, hints_used, difficulty, timestamp)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            """, (
                response.item_id,
                response.skill,
                response.correct,
                response.response_time,
                response.hints_used,
                response.difficulty,
                response.timestamp.isoformat()
            ))