ariG23498 HF Staff commited on
Commit
a4f8500
·
verified ·
1 Parent(s): a151d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -5,15 +5,12 @@
5
  import os
6
  import re
7
  import gradio as gr
8
- import dashscope
9
- from dashscope import MultiModalConversation
10
  from argparse import ArgumentParser
11
- from http import HTTPStatus
12
- from urllib3.exceptions import HTTPError
13
 
14
  # Set API key
15
- API_KEY = os.environ['API_KEY']
16
- dashscope.api_key = API_KEY
17
 
18
  # Constants
19
  REVISION = 'v1.0.4'
@@ -30,6 +27,11 @@ def _get_args():
30
  help="Demo server name.")
31
  return parser.parse_args()
32
 
 
 
 
 
 
33
  def parse_bounding_boxes(text):
34
  """Parse bounding box coordinates from model output."""
35
  matches = re.findall(BOX_TAG_PATTERN, text)
@@ -44,28 +46,34 @@ def predict(image, prompt):
44
  if image is None or not prompt:
45
  return None, "Please upload an image and provide a prompt."
46
 
47
- # Prepare message for the model
48
- messages = [{
49
- 'role': 'user',
50
- 'content': [
51
- {'image': f'file://{image}'},
52
- {'text': prompt}
 
 
 
 
 
 
 
 
53
  ]
54
- }]
55
 
56
- # Call the Qwen2.5-VL model
57
- try:
58
- responses = MultiModalConversation.call(
59
- model='qwen2.5-vl-32b-instruct',
60
  messages=messages,
61
- stream=False
62
  )
63
- if responses.status_code != HTTPStatus.OK:
64
- return None, f"Error: {responses.message}"
65
 
66
- # Extract response text
67
- response = responses.output.choices[0].message.content
68
- response_text = ''.join([ele['text'] if 'text' in ele else ele.get('box', '') for ele in response])
 
 
69
 
70
  # Parse bounding boxes
71
  bboxes = parse_bounding_boxes(response_text)
@@ -75,8 +83,6 @@ def predict(image, prompt):
75
  # Return the image and annotations for AnnotatedImage
76
  return (image, bboxes), None
77
 
78
- except HTTPError as e:
79
- return None, f"HTTP Error: {str(e)}"
80
  except Exception as e:
81
  return None, f"Error: {str(e)}"
82
 
@@ -86,7 +92,7 @@ def clear_inputs():
86
 
87
  def _launch_demo(args):
88
  with gr.Blocks() as demo:
89
- gr.Markdown("""<center><font size=3> Qwen2.5-VL-32B-Instruct Bounding Box Demo </center>""")
90
 
91
  with gr.Row():
92
  with gr.Column():
 
5
  import os
6
  import re
7
  import gradio as gr
8
+ import base64
9
+ from huggingface_hub import InferenceClient
10
  from argparse import ArgumentParser
 
 
11
 
12
  # Set API key
13
+ client = InferenceClient(provider="hf-inference")
 
14
 
15
  # Constants
16
  REVISION = 'v1.0.4'
 
27
  help="Demo server name.")
28
  return parser.parse_args()
29
 
30
+ def image_to_base64(image_path):
31
+ """Convert an image file to base64 string."""
32
+ with open(image_path, "rb") as image_file:
33
+ return base64.b64encode(image_file.read()).decode('utf-8')
34
+
35
  def parse_bounding_boxes(text):
36
  """Parse bounding box coordinates from model output."""
37
  matches = re.findall(BOX_TAG_PATTERN, text)
 
46
  if image is None or not prompt:
47
  return None, "Please upload an image and provide a prompt."
48
 
49
+ try:
50
+ # Convert image to base64
51
+ image_base64 = image_to_base64(image)
52
+ image_url = f"data:image/jpeg;base64,{image_base64}"
53
+
54
+ # Prepare message for the model
55
+ messages = [
56
+ {
57
+ "role": "user",
58
+ "content": [
59
+ {"type": "text", "text": prompt},
60
+ {"type": "image_url", "image_url": {"url": image_url}}
61
+ ]
62
+ }
63
  ]
 
64
 
65
+ # Call the Hugging Face Inference API
66
+ stream = client.chat.completions.create(
67
+ model="Qwen/Qwen2.5-VL-32B-Instruct",
 
68
  messages=messages,
69
+ stream=True
70
  )
 
 
71
 
72
+ # Aggregate streaming response
73
+ response_text = ""
74
+ for chunk in stream:
75
+ if chunk.choices[0].delta.content:
76
+ response_text += chunk.choices[0].delta.content
77
 
78
  # Parse bounding boxes
79
  bboxes = parse_bounding_boxes(response_text)
 
83
  # Return the image and annotations for AnnotatedImage
84
  return (image, bboxes), None
85
 
 
 
86
  except Exception as e:
87
  return None, f"Error: {str(e)}"
88
 
 
92
 
93
  def _launch_demo(args):
94
  with gr.Blocks() as demo:
95
+ gr.Markdown("""<center><font size=3> Qwen2.5-VL-32B-Instruct Bounding Box Demo (Hugging Face) </center>""")
96
 
97
  with gr.Row():
98
  with gr.Column():