Spaces:

TeamTonic
/

VLChat-OfficialDemo

Paused

App Files Files Community

Tonic commited on Dec 3, 2023

Commit

d0aa198

1 Parent(s): 215954f

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -10

app.py CHANGED Viewed

@@ -2,20 +2,23 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 import re
 from pathlib import Path
 import secrets
 import torch
-# Initialize the model and tokenizer
 model_name = "qwen/Qwen-VL-Chat"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
 model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
-# Set device for model
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def save_image(image_file, upload_dir: str) -> str:
     Path(upload_dir).mkdir(parents=True, exist_ok=True)
     filename = secrets.token_hex(10) + Path(image_file.name).suffix
@@ -29,27 +32,51 @@ def clean_response(response: str) -> str:
     return response
 def chat_with_model(image_path=None, text_query=None, history=None):
     query_elements = []
     if image_path:
         query_elements.append({'image': image_path})
     if text_query:
         query_elements.append({'text': text_query})
     query = tokenizer.from_list_format(query_elements)
     tokenized_inputs = tokenizer(query, return_tensors='pt').to(device)
     output = model.generate(**tokenized_inputs)
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     cleaned_response = clean_response(response)
     return cleaned_response
-def process_input(text, file):
     image_path = None
     if file is not None:
         image_path = save_image(file, "uploaded_images")
-    response = chat_with_model(image_path=image_path, text_query=text)
-    # Format the response for the Chatbot component
-    return [("bot", response)]
 with gr.Blocks() as demo:
     gr.Markdown("""
 # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot！🚀
@@ -69,10 +96,12 @@ Join us:  TeamTonic  is always making cool demos! Join our active builder's comm
                 file_upload = gr.File(label="Upload Image")
                 submit_btn = gr.Button("Submit")
     submit_btn.click(
         fn=process_input,
-        inputs=[query, file_upload],
-        outputs=chatbot
     )
     gr.Markdown("""
@@ -81,5 +110,7 @@ Join us:  TeamTonic  is always making cool demos! Join our active builder's comm
 Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
 including hate speech, violence, pornography, deception, etc. (Note: This demo is subject to the license agreement of Qwen-VL. We strongly advise users not to disseminate or allow others to disseminate the following content, including but not limited to hate speech, violence, pornography, and fraud-related harmful information.)
 """)
     demo.queue().launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 import re
+import copy
 from pathlib import Path
 import secrets
 import torch
+from PIL import Image, ImageDraw
 model_name = "qwen/Qwen-VL-Chat"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
 model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+BOX_TAG_PATTERN = r"<box>([\s\S]*?)</box>"
+PUNCTUATION = "！？。＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』&#8203;``【oaicite:0】``&#8203;〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
 def save_image(image_file, upload_dir: str) -> str:
     Path(upload_dir).mkdir(parents=True, exist_ok=True)
     filename = secrets.token_hex(10) + Path(image_file.name).suffix
     return response
 def chat_with_model(image_path=None, text_query=None, history=None):
+    # Modify this function to use 'history' if your model requires it
     query_elements = []
     if image_path:
         query_elements.append({'image': image_path})
     if text_query:
         query_elements.append({'text': text_query})
+    # Add history processing here if needed
     query = tokenizer.from_list_format(query_elements)
     tokenized_inputs = tokenizer(query, return_tensors='pt').to(device)
     output = model.generate(**tokenized_inputs)
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     cleaned_response = clean_response(response)
     return cleaned_response
+def draw_boxes(image_path, response):
+    image = Image.open(image_path)
+    draw = ImageDraw.Draw(image)
+    boxes = re.findall(r'<box>\((\d+),(\d+)\),\((\d+),(\d+)\)</box>', response)
+    for box in boxes:
+        x1, y1, x2, y2 = map(int, box)
+        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
+    return image
+def process_input(text=None, file=None, task_history=None):
+    if task_history is None:
+        task_history = []
     image_path = None
     if file is not None:
         image_path = save_image(file, "uploaded_images")
+    response = chat_with_model(image_path=image_path, text_query=text, history=task_history)
+    task_history.append((text, response))
+    if "<box>" in response:
+        if image_path:
+            image_with_boxes = draw_boxes(image_path, response)
+            image_with_boxes_path = image_path.replace(".jpg", "_boxed.jpg")
+            image_with_boxes.save(image_with_boxes_path)
+            return [("bot", response), "image", image_with_boxes_path], task_history
+        else:
+            return [("bot", response), "text", None], task_history
+    else:
+        # Clean the response if it contains any box-like annotations
+        clean_response = re.sub(r'<ref>(.*?)</ref>(?:<box>.*?</box>)*(?:<quad>.*?</quad>)*', r'\1', response).strip()
+        return [("bot", clean_response), "text", None], task_history
+# Define Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("""
 # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot！🚀
                 file_upload = gr.File(label="Upload Image")
                 submit_btn = gr.Button("Submit")
+    task_history = []
     submit_btn.click(
         fn=process_input,
+        inputs=[query, file_upload, task_history],
+        outputs=[chatbot, task_history]
     )
     gr.Markdown("""
 Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
 including hate speech, violence, pornography, deception, etc. (Note: This demo is subject to the license agreement of Qwen-VL. We strongly advise users not to disseminate or allow others to disseminate the following content, including but not limited to hate speech, violence, pornography, and fraud-related harmful information.)
 """)
     demo.queue().launch()
+if __name__ == "__main__":
+    demo.launch()