Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Paused

App Files Files Community

VanguardAI commited on Jun 10, 2024

Commit

c974ae6

verified ·

1 Parent(s): 4a19484

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -37

app.py CHANGED Viewed

@@ -6,35 +6,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import LoraConfig, PeftModel, get_peft_model
 import gradio as gr
-# Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")
-# Configuration for 4-bit quantization
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-# Load base model with quantization (replace 'your-username' if needed)
-base_model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
-    quantization_config=bnb_config,
-    use_auth_token=HF_TOKEN,
-)
-# Apply LoRA adapters
-peft_config = LoraConfig(
-    r=16,
-    lora_alpha=16,
-    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
-    lora_dropout=0,
-    bias="none",
-    task_type="CAUSAL_LM"
-)
-model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)
 condition = '''
 ALWAYS provide output in a JSON format.
@@ -51,7 +34,7 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
 {}"""
-@spaces.GPU(duration=300)
 def chunk_it(inventory_list, user_input_text):
     model.to('cuda')
     inputs = tokenizer(
@@ -93,10 +76,7 @@ def chunk_it(inventory_list, user_input_text):
                   ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
                 The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
-                ''' + inventory_list +
-                '''
-                ALWAYS provide output in a JSON format.
-                ''',  # instruction
                 user_input_text,  # input
                 "",  # output - leave this blank for generation!
             )
@@ -105,8 +85,12 @@ def chunk_it(inventory_list, user_input_text):
     # Generation with a longer max_length and better sampling
     outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
-    content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    return content[0]
 # Interface for inputs
 iface = gr.Interface(
@@ -116,7 +100,7 @@ iface = gr.Interface(
         gr.Textbox(label="inventory_list", lines=5)
     ],
     outputs="text",
-    title="Formatter Pro",
 )
 iface.launch(inline=False)

 from peft import LoraConfig, PeftModel, get_peft_model
 import gradio as gr
+tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters", trust_remote_code=True)
+quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16)
+model = AutoModelForCausalLM.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters",
+                                             quantization_config=quantization_config,
+                                             torch_dtype =torch.bfloat16,
+                                             low_cpu_mem_usage=True,
+                                             use_safetensors=True,
+                                             trust_remote_code=True)
 condition = '''
 ALWAYS provide output in a JSON format.
 {}"""
+@spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
     model.to('cuda')
     inputs = tokenizer(
                   ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
                 The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
+                ''' + inventory_list + condition,  # instruction
                 user_input_text,  # input
                 "",  # output - leave this blank for generation!
             )
     # Generation with a longer max_length and better sampling
     outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
+    reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
+    # Search for the pattern in the text
+    match = re.search(pattern, reply[0], re.DOTALL)  # re.DOTALL allows '.' to match newlines
+    reply = match.group(1).strip()
+    return reply
 # Interface for inputs
 iface = gr.Interface(
         gr.Textbox(label="inventory_list", lines=5)
     ],
     outputs="text",
+    title="Bhashini_Ki",
 )
 iface.launch(inline=False)