Spaces:

lab2-as
/

lab2-ui

Sleeping

App Files Files Community

MyNameIsSimon commited on Dec 9, 2024

Commit

c9760a6

1 Parent(s): 4b3754b

code cleanup

Browse files

Files changed (1) hide show

app.py +37 -33

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from llama_cpp.llama_chat_format import MoondreamChatHandler
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-# client = InferenceClient()
 class MyModel:
     def __init__(self):
         self.client = None
@@ -21,18 +21,18 @@ class MyModel:
         system_message,
         max_tokens,
         temperature,
-        min_p,
     ):
         if model != self.current_model or self.current_model is None:
             client = Llama.from_pretrained(
-                repo_id="lab2-as/lora_model_gguf",
-                filename='*Q4_K_M.gguf',
-                n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
             )
             self.client = client
             self.current_model = model
         messages = [{"role": "system", "content": system_message}]
@@ -45,54 +45,58 @@ class MyModel:
         messages.append({"role": "user", "content": message})
         response = ""
         for message in self.client.create_chat_completion(
-                messages,
-                temperature=temperature,
-                top_p=min_p,
-                stream=True,
-                max_tokens=max_tokens
-            ):
             delta = message["choices"][0]["delta"]
             if "content" in delta:
                 response += delta["content"]
                 yield response
-        # for message in client.chat_completion(
-        #     messages,
-        #     max_tokens=max_tokens,
-        #     stream=True,
-        #     temperature=temperature,
-        #     top_p=top_p,
-        #     model=model,
-        # ):
-        #     token = message.choices[0].delta.content
-        #     response += token
-        #     yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 my_model = MyModel()
 model_choices = [
-    "lab2-as/lora_model",
-    "lab2-as/lora_model_no_quant",
 ]
 demo = gr.ChatInterface(
     my_model.respond,
     additional_inputs=[
-        gr.Dropdown(choices=model_choices, value=model_choices[0], label="Select Model"),
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=128, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
             value=0.95,
             step=0.05,
-            label="Min-p (nucleus sampling)",
         ),
     ],
 )

 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 class MyModel:
     def __init__(self):
         self.client = None
         system_message,
         max_tokens,
         temperature,
+        top_p,
     ):
         if model != self.current_model or self.current_model is None:
+            model_id, filename = model.split(",")
             client = Llama.from_pretrained(
+                repo_id=model_id.strip(),
+                filename=f"*{filename.strip()}*.gguf",
+                n_ctx=2048,  # n_ctx should be increased to accommodate the image embedding
             )
             self.client = client
             self.current_model = model
         messages = [{"role": "system", "content": system_message}]
         messages.append({"role": "user", "content": message})
         response = ""
         for message in self.client.create_chat_completion(
+            messages,
+            temperature=temperature,
+            top_p=top_p,
+            stream=True,
+            max_tokens=max_tokens,
+        ):
             delta = message["choices"][0]["delta"]
             if "content" in delta:
                 response += delta["content"]
                 yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 my_model = MyModel()
 model_choices = [
+    "lab2-as/lora_model_gguf, Q4",
 ]
 demo = gr.ChatInterface(
     my_model.respond,
     additional_inputs=[
+        gr.Dropdown(
+            choices=model_choices,
+            value=model_choices[0],
+            label="Select Model",
+        ),
+        gr.Textbox(
+            value="You are a friendly Chatbot.",
+            label="System message",
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=2048,
+            value=128,
+            step=1,
+            label="Max new tokens",
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature",
+        ),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
             value=0.95,
             step=0.05,
+            label="Top-p (Nucleus sampling)",
         ),
     ],
 )