Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| from datasets import load_dataset | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.vector_stores.faiss import FaissVectorStore | |
| import faiss | |
| import os | |
| from huggingface_hub import login, snapshot_download | |
| from huggingface_hub.utils import configure_http_backend | |
| from requests.adapters import HTTPAdapter | |
| from urllib3.util.retry import Retry | |
| import requests | |
| app = FastAPI() | |
| # Set HF_HOME to a writable directory | |
| os.environ["HF_HOME"] = "/app/.cache" | |
| # Configure requests with retries and a longer timeout | |
| def custom_http_backend(): | |
| session = requests.Session() | |
| retries = Retry( | |
| total=3, # Retry 3 times | |
| backoff_factor=1, # Wait 1, 2, 4 seconds between retries | |
| status_forcelist=[429, 500, 502, 503, 504], # Retry on these HTTP status codes | |
| ) | |
| adapter = HTTPAdapter(max_retries=retries) | |
| session.mount("https://", adapter) | |
| session.timeout = 60 # Increase timeout to 60 seconds | |
| return session | |
| # Set the custom HTTP backend for huggingface_hub | |
| configure_http_backend(backend_factory=custom_http_backend) | |
| # Log in to Hugging Face | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN environment variable not set") | |
| login(hf_token) | |
| # Load Dataset and Prepare Knowledge Base | |
| ds = load_dataset("codeparrot/apps", "all", split="train") | |
| os.makedirs("knowledge_base", exist_ok=True) | |
| for i, example in enumerate(ds.select(range(50))): # Reduced to 50 for memory | |
| solution = example['solutions'][0] if example['solutions'] else "No solution available" | |
| with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f: | |
| f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}") | |
| documents = SimpleDirectoryReader("knowledge_base").load_data() | |
| # Setup RAG | |
| embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| Settings.embed_model = embed_model | |
| d = 384 | |
| faiss_index = faiss.IndexFlatL2(d) | |
| vector_store = FaissVectorStore(faiss_index=faiss_index) | |
| index = VectorStoreIndex.from_documents(documents, vector_store=vector_store) | |
| # Load LLaMA Model (without quantization, on CPU) | |
| model_name = "meta-llama/Llama-3.2-1B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = "cpu" # Force CPU usage | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="cpu", # Explicitly map to CPU | |
| torch_dtype=torch.float32 # Use float32 for CPU compatibility | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| async def solve_problem(problem: str, top_k: int = 1): | |
| retriever = index.as_retriever(similarity_top_k=top_k) | |
| retrieved_nodes = retriever.retrieve(problem) | |
| context = retrieved_nodes[0].text if retrieved_nodes else "No relevant context found." | |
| prompt = f"Given the following competitive programming problem:\n\n{problem}\n\nRelevant context:\n{context}\n\nGenerate a solution in Python:" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=200, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| solution = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return {"solution": solution, "context": context} |