Spaces:

Javedalam
/

Qvac_genesis_i

Sleeping

App Files Files Community

Javedalam commited on Nov 21, 2025

Commit

55dbe5b

verified ·

1 Parent(s): f7faccc

Create app.py

Browse files

Files changed (1) hide show

app.py +171 -0

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import gradio as gr
+MODEL_ID = "qvac/genesis-i-model"
+# ----------------------
+# Load tokenizer & model
+# ----------------------
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# Make sure we have a pad token
+if tokenizer.pad_token is None and tokenizer.eos_token is not None:
+    tokenizer.pad_token = tokenizer.eos_token
+print("Detecting device & dtype...")
+if torch.cuda.is_available():
+    # On ZeroGPU / real GPU: use bf16 if supported, else fp16
+    try:
+        bf16_ok = torch.cuda.is_bf16_supported()
+    except AttributeError:
+        bf16_ok = False
+    torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
+    device_map = "auto"
+else:
+    # CPU fallback
+    torch_dtype = torch.float32
+    device_map = "cpu"
+print(f"Loading model on {device_map} with dtype={torch_dtype}...")
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch_dtype,
+    device_map=device_map,
+)
+model.eval()
+# ----------------------
+# Helper: build chat input
+# ----------------------
+def build_inputs(prompt: str):
+    """
+    Build input_ids using the model's chat_template.
+    We give it a simple system + user conversation and
+    ask tokenizer to add the assistant generation prompt.
+    """
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an educational AI tutor. "
+                "Explain clearly and precisely, focusing on math, science, "
+                "engineering, programming, and medical education. "
+                "Show intermediate steps when useful, but avoid rambling."
+            ),
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt",
+    )
+    return input_ids.to(model.device)
+# ----------------------
+# Generation function
+# ----------------------
+def generate(
+    prompt: str,
+    temperature: float = 0.7,
+    top_p: float = 0.9,
+    max_new_tokens: int = 256,
+):
+    if not prompt.strip():
+        return "Please enter a prompt."
+    input_ids = build_inputs(prompt)
+    with torch.no_grad():
+        output_ids = model.generate(
+            input_ids=input_ids,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=1.1,  # light anti-repetition
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+        )
+    # Keep only the newly generated tokens (assistant part)
+    new_tokens = output_ids[0, input_ids.shape[-1]:]
+    text = tokenizer.decode(new_tokens, skip_special_tokens=True)
+    text = text.strip()
+    if not text:
+        text = "[Empty response]"
+    return text
+# ----------------------
+# Gradio UI
+# ----------------------
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # QVAC Genesis I – Educational LLM Demo
+        Model: **qvac/genesis-i-model**
+        Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
+        Ask it math, science, engineering, or medical education questions.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Example: Explain why 2 + 2 = 4 in a way a 10-year-old can understand.",
+                lines=6,
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.2,
+                value=0.7,
+                step=0.05,
+                label="Temperature (creativity)",
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.9,
+                step=0.05,
+                label="Top-p (nucleus sampling)",
+            )
+            max_new_tokens = gr.Slider(
+                minimum=16,
+                maximum=512,
+                value=256,
+                step=16,
+                label="Max new tokens",
+            )
+            submit = gr.Button("Generate")
+        with gr.Column(scale=4):
+            output = gr.Textbox(
+                label="Model output",
+                lines=18,
+            )
+    submit.click(
+        fn=generate,
+        inputs=[prompt, temperature, top_p, max_new_tokens],
+        outputs=output,
+    )
+    # Press Enter in the prompt box to generate
+    prompt.submit(
+        fn=generate,
+        inputs=[prompt, temperature, top_p, max_new_tokens],
+        outputs=output,
+    )
+demo.queue().launch()