Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

App Files Files Community

VanguardAI commited on Jul 8, 2024

Commit

67b1882

verified ·

1 Parent(s): d5262d8

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -29

app.py CHANGED Viewed

@@ -4,9 +4,21 @@ import re
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
-READ_HF = os.environ["read_hf"]
 from unsloth import FastLanguageModel
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
@@ -65,43 +77,63 @@ Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Gr
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
-    print("Loading model and tokenizer...")
-    model, tokenizer = FastLanguageModel.from_pretrained(
-        model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
-        max_seq_length = 2048,
-        dtype = torch.bfloat16,
-        load_in_4bit = True,
-        token = READ_HF
-    )
-    print("Model and tokenizer loaded.")
-    print("Enabling native 2x faster inference...")
-    FastLanguageModel.for_inference(model)
-    print("Inference enabled.")
     formatted_prompt = alpaca_prompt.format(
         string + inventory_list,  # instruction
         user_input_text,  # input
         "",  # output - leave this blank for generation!
     )
-    print("Formatted prompt: ", formatted_prompt)
-    inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
-    print("Tokenized inputs: ", inputs)
-    print("Generating output...")
-    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
-    print("Output generated.")
-    reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    print("Decoded output: ", reply)
     # Uncomment the following lines if further processing of the reply is needed
     # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
     # match = re.search(pattern, reply[0], re.DOTALL)
     # reply = match.group(1).strip()
-    print("Final reply: ", reply)
     return reply
 # Interface for inputs
@@ -115,6 +147,9 @@ iface = gr.Interface(
     title="Testing",
 )
-print("Launching Gradio interface...")
-iface.launch(inline=False)
-print("Gradio interface launched.")

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
+import logging
 from unsloth import FastLanguageModel
+# Set up logging
+logging.basicConfig(
+    level=logging.DEBUG,  # Set the logging level to DEBUG to capture all messages
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler()  # Logs will be output to the console
+    ]
+)
+logger = logging.getLogger(__name__)
+READ_HF = os.environ["read_hf"]
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
+    logger.info("Loading model and tokenizer...")
+    try:
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
+            max_seq_length = 2048,
+            dtype = torch.bfloat16,
+            load_in_4bit = True,
+            token = READ_HF
+        )
+        logger.info("Model and tokenizer loaded.")
+    except Exception as e:
+        logger.error(f"Failed to load model and tokenizer: {e}")
+        raise
+    logger.info("Enabling native 2x faster inference...")
+    try:
+        FastLanguageModel.for_inference(model)
+        logger.info("Inference enabled.")
+    except Exception as e:
+        logger.error(f"Failed to enable native inference: {e}")
+        raise
     formatted_prompt = alpaca_prompt.format(
         string + inventory_list,  # instruction
         user_input_text,  # input
         "",  # output - leave this blank for generation!
     )
+    logger.debug(f"Formatted prompt: {formatted_prompt}")
+    try:
+        inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
+        logger.debug(f"Tokenized inputs: {inputs}")
+    except Exception as e:
+        logger.error(f"Failed to tokenize inputs: {e}")
+        raise
+    logger.info("Generating output...")
+    try:
+        outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
+        logger.info("Output generated.")
+    except Exception as e:
+        logger.error(f"Failed to generate output: {e}")
+        raise
+    try:
+        reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        logger.debug(f"Decoded output: {reply}")
+    except Exception as e:
+        logger.error(f"Failed to decode output: {e}")
+        raise
     # Uncomment the following lines if further processing of the reply is needed
     # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
     # match = re.search(pattern, reply[0], re.DOTALL)
     # reply = match.group(1).strip()
+    logger.debug(f"Final reply: {reply}")
     return reply
 # Interface for inputs
     title="Testing",
 )
+logger.info("Launching Gradio interface...")
+try:
+    iface.launch(inline=False)
+    logger.info("Gradio interface launched.")
+except Exception as e:
+    logger.error(f"Failed to launch Gradio interface: {e}")