Spaces:

BonelliLab
/

Eidolon-CognitiveTutor

Sleeping

App Files Files Community

Eidolon-CognitiveTutor / cognitive_llm.py

BonelliLab

Push existing cognitive tutor project

cd8c2bb about 1 month ago

raw

history blame contribute delete

3.7 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from typing import Optional, Dict, Any

	class CognitiveLLM:
	def __init__(self, model_name: str = "Qwen/Qwen3-7B-Instruct", device: str = None):
	"""
	Initialize the Cognitive LLM with the specified model.

	Args:
	model_name: Name of the model to use (default: Qwen/Qwen3-7B-Instruct)
	device: Device to run the model on ('cuda', 'mps', or 'cpu'). Auto-detects if None.
	"""
	self.model_name = model_name
	self.device = device if device else 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

	print(f"Loading {model_name} on {self.device}...")

	# Load tokenizer and model
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)

	# Load model with 4-bit quantization for efficiency
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	trust_remote_code=True,
	torch_dtype=torch.bfloat16,
	attn_implementation="flash_attention_2" if self.device.startswith('cuda') else None,
	load_in_4bit=True
	)

	# Create text generation pipeline
	self.pipe = pipeline(
	"text-generation",
	model=self.model,
	tokenizer=self.tokenizer,
	device_map="auto"
	)

	print(f"Model {model_name} loaded successfully on {self.device}")

	def generate(
	self,
	prompt: str,
	max_new_tokens: int = 512,
	temperature: float = 0.7,
	top_p: float = 0.9,
	**generation_kwargs
	) -> str:
	"""
	Generate text from a prompt using the loaded model.

	Args:
	prompt: Input text prompt
	max_new_tokens: Maximum number of tokens to generate
	temperature: Sampling temperature (lower = more focused, higher = more creative)
	top_p: Nucleus sampling parameter
	**generation_kwargs: Additional generation parameters

	Returns:
	Generated text
	"""
	# Format the prompt for Qwen3 chat
	messages = [
	{"role": "user", "content": prompt}
	]

	# Generate response
	response = self.pipe(
	messages,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	**generation_kwargs
	)

	# Extract and return the generated text
	return response[0]["generated_text"][-1]["content"]


	def main():
	# Initialize the cognitive LLM
	llm = CognitiveLLM()

	print("\nCognitive LLM initialized. Type 'quit' to exit.")
	print("Enter your prompt:")

	# Interactive loop
	while True:
	try:
	user_input = input(">> ")
	if user_input.lower() in ['quit', 'exit', 'q']:
	break

	if user_input.strip() == '':
	continue

	# Generate response
	response = llm.generate(user_input)
	print("\nResponse:")
	print(response)
	print("\n---\nEnter another prompt or 'quit' to exit:")

	except KeyboardInterrupt:
	print("\nExiting...")
	break
	except Exception as e:
	print(f"\nError: {str(e)}")
	continue


	if __name__ == "__main__":
	main()