Spaces:

traversaal-internal
/

Alif-1.0-8B-Model

Sleeping

App Files Files Community

alishafique commited on Feb 24

Commit

62c7530

verified ·

1 Parent(s): 8315568

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -68

app.py CHANGED Viewed

@@ -1,34 +1,19 @@
-# import torch
-# print(torch.cuda.is_available())  # Should return True
-# print(torch.cuda.get_device_name(0))  # Should return 'Tesla T4'
-# print(torch.cuda.get_device_capability(0)
-import llama_cpp
-from llama_cpp import Llama
-# import llama_cpp.llama_tokenizer
 import gradio as gr
 from huggingface_hub import hf_hub_download
-model_name = "large-traversaal/Alif-1.0-8B-Instruct"
-model_file = "model-Q8_0.gguf"
-model_path_file = hf_hub_download(model_name,
-                             filename=model_file,)
-# llama = llama_cpp.Llama.from_pretrained(
-#     repo_id="large-traversaal/Alif-1.0-8B-Instruct",
-#     filename="*model-Q6_K.gguf",
-#     tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
-#         "large-traversaal/Alif-1.0-8B-Instruct"
-#     ),
-#     verbose=False,
-# )
-# llama = Llama(model_path="./model-Q8_0.gguf", verbose=False)
 llama = Llama(
     model_path=model_path_file,
     n_gpu_layers=40,  # Adjust based on VRAM
@@ -38,38 +23,13 @@ llama = Llama(
     verbose=True  # Enable debug logging
 )
-chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
-# prompt = "قابل تجدید توانائی کیا ہے؟"
-prompt = "شہر کراچی کے بارے میں بتاؤ"
-# prompt = chat_prompt.format(inp=prompt)
-# response = llama(prompt, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
-# # prompt = "قابل تجدید توانائی کیا ہے؟"
-# stop_tokens = ["\n\n", "<|end_of_text|>"]  # Stops after natural pauses or end-of-text token
-# Function to generate text with streaming output
-def chat_with_ai(prompt):
-    query = chat_prompt.format(inp=prompt)
-    #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True)  # Enable streaming
-    response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
-    # response = llama.create_chat_completion(
-    #     messages = [
-    #         {"role": "system", "content": "You are a Urdu Chatbot."},
-    #         {
-    #             "role": "user",
-    #             "content": prompt
-    #         }
-    #     ],
-    #     stream=True
-    # )
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
@@ -77,15 +37,72 @@ def chat_with_ai(prompt):
             text += content
             yield text
-# Gradio UI setup
-demo = gr.Interface(
-    fn=chat_with_ai,  # Streaming function
-    inputs="text",  # User input
-    outputs="text",  # Model response
-    title="💬 Streaming AI Chatbot",
-    description="Enter a prompt and get a streamed response from Llama.cpp (GGUF)."
-)
-# Launch the Gradio app
-demo.launch(share=True)

+import os
+import json
+import subprocess
 import gradio as gr
+from threading import Thread
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from datetime import datetime
+# Load model from Hugging Face Hub
+MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
+MODEL_FILE = "model-Q8_0.gguf"
+model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
+# Initialize Llama model
 llama = Llama(
     model_path=model_path_file,
     n_gpu_layers=40,  # Adjust based on VRAM
     verbose=True  # Enable debug logging
 )
+# Function to generate responses
+def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+    # chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
+    chat_prompt = f"{system_prompt}\n ### Instruction: {message}\n ### Response:"
+    response = llama(chat_prompt, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
             text += content
             yield text
+# def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+#     """Generates a streaming response from the Llama model."""
+#     messages = [
+#         {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
+#     ]
+#     # Add history and the current message
+#     #for user, bot in history:
+#         #messages.append({"role": "user", "content": user})
+#         #messages.append({"role": "assistant", "content": bot})
+#     messages.append({"role": "user", "content": message})
+#     response = llama.create_chat_completion(
+#         messages=messages,
+#         stream=True,
+#     )
+#     partial_message = ""
+#     for part in response:
+#         content = part["choices"][0]["delta"].get("content", "")
+#         partial_message += content
+#         yield partial_message
+# JavaScript function for `on_load`
+on_load = """
+async()=>{ alert("Welcome to the Traversaal Alif 1.0 Chatbot! This is an experimental AI model. Please use responsibly."); }
+"""
+placeholder = """
+<center><h1>10 Questions</h1><br>Think of a person, place, or thing. I'll ask you 10 yes/no questions to try and guess it.
+</center>
+"""
+# Create custom chat UI using `gr.Blocks`
+with gr.Blocks(js=on_load, theme=gr.themes.Default()) as demo:
+    with gr.Column(scale=1, elem_id="center-content"):
+        gr.Markdown(
+            """
+            <div style="text-align: center;">
+                <h1>Alif 1.0 Urdu & English Chatbot 🚀</h1>
+                <p>Alif 1.0 8B Instruct is an open-source model with highly advanced multilingual reasoning capabilities. It utilizes human refined multilingual synthetic data paired with reasoning to enhance cultural nuance and reasoning capabilities in english and urdu languages.</p>
+            </div>
+            """,
+        )
+    chat = gr.ChatInterface(
+        generate_response,
+        #chatbot=gr.Chatbot(placeholder=placeholder),
+        #title="🚀" + " " + "Alif-1.0 Chatbot",
+        #description="Urdu AI Chatbot powered by Llama.cpp",
+        examples=[
+            ["شہر کراچی کے بارے میں بتاؤ"],
+            ["قابل تجدید توانائی کیا ہے؟"],
+            ["پاکستان کے بارے میں بتائیں"]
+        ],
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
+        additional_inputs=[
+            gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu.", label="System prompt", render=False),
+            gr.Slider(0, 1, 0.8, label="Temperature", render=False),
+            gr.Slider(128, 4096, 512, label="Max new tokens", render=False),
+            gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
+            gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
+            gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
+        ],
+    )
+demo.queue(max_size=10).launch(share=True)