Eidolon-CognitiveTutor / cognitive_llm.py
BonelliLab's picture
Push existing cognitive tutor project
cd8c2bb
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from typing import Optional, Dict, Any
class CognitiveLLM:
def __init__(self, model_name: str = "Qwen/Qwen3-7B-Instruct", device: str = None):
"""
Initialize the Cognitive LLM with the specified model.
Args:
model_name: Name of the model to use (default: Qwen/Qwen3-7B-Instruct)
device: Device to run the model on ('cuda', 'mps', or 'cpu'). Auto-detects if None.
"""
self.model_name = model_name
self.device = device if device else 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f"Loading {model_name} on {self.device}...")
# Load tokenizer and model
self.tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
# Load model with 4-bit quantization for efficiency
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2" if self.device.startswith('cuda') else None,
load_in_4bit=True
)
# Create text generation pipeline
self.pipe = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
device_map="auto"
)
print(f"Model {model_name} loaded successfully on {self.device}")
def generate(
self,
prompt: str,
max_new_tokens: int = 512,
temperature: float = 0.7,
top_p: float = 0.9,
**generation_kwargs
) -> str:
"""
Generate text from a prompt using the loaded model.
Args:
prompt: Input text prompt
max_new_tokens: Maximum number of tokens to generate
temperature: Sampling temperature (lower = more focused, higher = more creative)
top_p: Nucleus sampling parameter
**generation_kwargs: Additional generation parameters
Returns:
Generated text
"""
# Format the prompt for Qwen3 chat
messages = [
{"role": "user", "content": prompt}
]
# Generate response
response = self.pipe(
messages,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
**generation_kwargs
)
# Extract and return the generated text
return response[0]["generated_text"][-1]["content"]
def main():
# Initialize the cognitive LLM
llm = CognitiveLLM()
print("\nCognitive LLM initialized. Type 'quit' to exit.")
print("Enter your prompt:")
# Interactive loop
while True:
try:
user_input = input(">> ")
if user_input.lower() in ['quit', 'exit', 'q']:
break
if user_input.strip() == '':
continue
# Generate response
response = llm.generate(user_input)
print("\nResponse:")
print(response)
print("\n---\nEnter another prompt or 'quit' to exit:")
except KeyboardInterrupt:
print("\nExiting...")
break
except Exception as e:
print(f"\nError: {str(e)}")
continue
if __name__ == "__main__":
main()