Flourish commited on
Commit
106328a
·
verified ·
1 Parent(s): 44cb7bc

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +145 -106
  2. requirements.txt +4 -6
  3. setup.py +9 -0
app.py CHANGED
@@ -1,154 +1,193 @@
 
 
1
  import gradio as gr
2
- import numpy as np
3
  import random
 
 
 
4
 
5
- import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
 
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
 
 
16
 
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
 
 
20
  MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
  ]
59
 
60
- css = """
61
  #col-container {
62
  margin: 0 auto;
63
- max-width: 640px;
64
  }
65
  """
66
 
67
- with gr.Blocks() as demo:
 
68
  with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
 
 
71
  with gr.Row():
 
72
  prompt = gr.Text(
73
  label="Prompt",
74
  show_label=False,
75
  max_lines=1,
76
- placeholder="Enter your prompt",
77
  container=False,
78
  )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
  result = gr.Image(label="Result", show_label=False)
83
-
84
  with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
  with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
 
105
  minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
  step=32,
108
- value=1024, # Replace with defaults that work for your model
109
  )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
  minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
  step=32,
116
- value=1024, # Replace with defaults that work for your model
117
  )
118
-
119
  with gr.Row():
 
120
  guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
  step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
  )
127
-
128
  num_inference_steps = gr.Slider(
129
  label="Number of inference steps",
130
  minimum=1,
131
- maximum=50,
132
  step=1,
133
- value=2, # Replace with defaults that work for your model
134
  )
135
 
136
- gr.Examples(examples=examples, inputs=[prompt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  gr.on(
138
  triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
  )
152
 
153
- if __name__ == "__main__":
154
- demo.launch()
 
1
+ import os
2
+ import torch
3
  import gradio as gr
4
+ import spaces
5
  import random
6
+ import numpy as np
7
+ from safetensors.torch import load_file
8
+ from huggingface_hub import hf_hub_download
9
 
10
+ from diffusers.utils import logging
11
+ from PIL import Image
 
 
 
 
12
 
13
+ from ovis_image.model.tokenizer import build_ovis_tokenizer
14
+ from ovis_image.model.autoencoder import load_ae
15
+ from ovis_image.model.hf_embedder import OvisEmbedder
16
+ from ovis_image.model.model import OvisImageModel
17
+ from ovis_image.sampling import generate_image
18
+ from ovis_image import ovis_image_configs
19
 
20
+ logging.set_verbosity_error()
 
21
 
22
+ # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
  MAX_SEED = np.iinfo(np.int32).max
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ device = "cuda"
26
+ _dtype = torch.bfloat16
27
+ hf_token = os.getenv("HF_TOKEN")
28
+
29
+ # init ovis_image
30
+ model_config = ovis_image_configs["ovis-image-7b"]
31
+ ovis_image = OvisImageModel(model_config)
32
+ ovis_image_path = hf_hub_download(
33
+ repo_id="AIDC-AI/Ovis-Image-7B",
34
+ filename="ovis_image.safetensors",
35
+ token=hf_token,
36
+ )
37
+ model_state_dict = load_file(ovis_image_path)
38
+ missing_keys, unexpected_keys = ovis_image.load_state_dict(model_state_dict)
39
+ print(f"Load Missing Keys {missing_keys}")
40
+ print(f"Load Unexpected Keys {unexpected_keys}")
41
+ ovis_image = ovis_image.to(device=device, dtype=_dtype)
42
+ ovis_image.eval()
43
+
44
+ # init vae
45
+ vae_path = hf_hub_download(
46
+ repo_id="AIDC-AI/Ovis-Image-7B",
47
+ filename="ae.safetensors",
48
+ token=hf_token,
49
+ )
50
+ autoencoder = load_ae(
51
+ vae_path,
52
+ model_config.autoencoder_params,
53
+ device=device,
54
+ dtype=_dtype,
55
+ random_init=False,
56
+ )
57
+ autoencoder.eval()
58
+
59
+ # init ovis
60
+ ovis_path = hf_hub_download(
61
+ repo_id="AIDC-AI/Ovis-Image-7B",
62
+ filename="Ovis2.5-2B",
63
+ token=hf_token,
64
+ )
65
+ ovis_tokenizer = build_ovis_tokenizer(ovis_path)
66
+ ovis_encoder = OvisEmbedder(
67
+ model_path=ovis_path,
68
+ random_init=False,
69
+ low_cpu_mem_usage=True,
70
+ torch_dtype=torch.bfloat16,
71
+ ).to(device=device, dtype=_dtype)
72
+
73
+
74
+ @spaces.GPU(duration=75)
75
+ def generate(prompt, img_height=1024, img_width=1024, seed=42, steps=50, guidance_scale=5.0):
76
+ print(f'inference with prompt : {prompt}, size: {img_height}x{img_width}, seed : {seed}, step : {steps}, cfg : {guidance_scale}')
77
+ image = generate_image(
78
+ device=device,
79
+ dtype=_dtype,
80
+ model=ovis_image,
81
+ prompt=prompt,
82
+ autoencoder=autoencoder,
83
+ ovis_tokenizer=ovis_tokenizer,
84
+ ovis_encoder=ovis_encoder,
85
+ img_height=img_height,
86
+ img_width=img_width,
87
+ denoising_steps=steps,
88
+ cfg_scale=guidance_scale,
89
+ seed=seed,
90
+ )
91
+ # bring into PIL format and save
92
+ image = image.clamp(-1, 1)
93
+ image = image.cpu().permute(0, 2, 3, 1).float().numpy()
94
+ image = (image * 255).round().astype("uint8")
95
+
96
+ return image[0]
97
 
98
  examples = [
99
+ "Solar punk vehicle in a bustling city",
100
+ "An anthropomorphic cat riding a Harley Davidson in Arizona with sunglasses and a leather jacket",
101
+ "An elderly woman poses for a high fashion photoshoot in colorful, patterned clothes with a cyberpunk 2077 vibe",
102
  ]
103
 
104
+ css="""
105
  #col-container {
106
  margin: 0 auto;
107
+ max-width: 520px;
108
  }
109
  """
110
 
111
+ with gr.Blocks(css=css) as demo:
112
+
113
  with gr.Column(elem_id="col-container"):
114
+ gr.Markdown(f"""# Ovis-Image
115
+ [[code](https://github.com/AIDC-AI/Ovis-Image)] [[model](https://huggingface.co/AIDC-AI/Ovis-Image-7B)]
116
+ """)
117
+
118
  with gr.Row():
119
+
120
  prompt = gr.Text(
121
  label="Prompt",
122
  show_label=False,
123
  max_lines=1,
124
+ placeholder="Enter your prompt here",
125
  container=False,
126
  )
127
+
128
+ run_button = gr.Button("Run", scale=0)
129
+
130
  result = gr.Image(label="Result", show_label=False)
131
+
132
  with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  with gr.Row():
134
+
135
+ img_height = gr.Slider(
136
+ label="Image Height",
137
  minimum=256,
138
+ maximum=2048,
139
  step=32,
140
+ value=1024,
141
  )
142
+
143
+ img_width = gr.Slider(
144
+ label="Image Width",
145
  minimum=256,
146
+ maximum=2048,
147
  step=32,
148
+ value=1024,
149
  )
150
+
151
  with gr.Row():
152
+
153
  guidance_scale = gr.Slider(
154
+ label="Guidance Scale",
155
+ minimum=1,
156
+ maximum=14,
157
  step=0.1,
158
+ value=5.0,
159
  )
160
+
161
  num_inference_steps = gr.Slider(
162
  label="Number of inference steps",
163
  minimum=1,
164
+ maximum=100,
165
  step=1,
166
+ value=50,
167
  )
168
 
169
+ seed = gr.Slider(
170
+ label="Seed",
171
+ minimum=0,
172
+ maximum=MAX_SEED,
173
+ step=1,
174
+ value=42,
175
+ )
176
+
177
+ gr.Examples(
178
+ examples = examples,
179
+ fn = generate,
180
+ inputs = [prompt],
181
+ outputs = [result],
182
+ cache_examples="lazy"
183
+ )
184
+
185
  gr.on(
186
  triggers=[run_button.click, prompt.submit],
187
+ fn = generate,
188
+ inputs = [prompt, img_height, img_width, seed, num_inference_steps, guidance_scale],
189
+ outputs = [result]
 
 
 
 
 
 
 
 
 
190
  )
191
 
192
+ if __name__ == '__main__':
193
+ demo.launch()
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
1
+ torch==2.6.0
2
+ transformers >= 4.53.0
3
+ einops
4
+ safetensors
 
 
setup.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+ import os
3
+
4
+ setup(
5
+ name='ovis_image',
6
+ version='1.0.0',
7
+ packages=find_packages(include=['ovis_image']),
8
+ # include any other necessary details here
9
+ )