Nymbo commited on
Commit
f20dc96
·
verified ·
1 Parent(s): f7bc613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -102
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import spaces
@@ -8,48 +13,65 @@ from PIL import Image
8
  from diffusers import FluxKontextPipeline
9
  from diffusers.utils import load_image
10
 
11
- # down to 22 steps to try and keep this ~<30 seconds so it will generally work in claude.ai - which doesn't reset timeout with notifications.
 
 
 
 
 
12
 
13
- MAX_SEED = np.iinfo(np.int32).max
 
 
 
 
 
14
 
15
- pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
 
 
16
 
17
  @spaces.GPU
18
- def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=20, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
19
  """
20
- Perform image editing using the FLUX.1 Kontext pipeline.
21
-
22
- This function takes an input image and a text prompt to generate a modified version
23
- of the image based on the provided instructions. It uses the FLUX.1 Kontext model
24
- for contextual image editing tasks.
25
-
26
  Args:
27
- input_image (PIL.Image.Image): The path to the input image to be edited.
28
- prompt (str): Text description of the desired edit to apply to the image. Examples: "Remove glasses", "Add a hat", "Change background to beach".
29
- seed (int, optional): Random seed for reproducible generation.
30
- Must be between 0 and MAX_SEED (2^31 - 1). Defaults to 42.
31
- randomize_seed (bool, optional): If True, generates a random seed instead of using the provided seed value.
32
- Defaults to False.
33
- guidance_scale (float, optional): Controls how closely the model follows the prompt. Higher values mean stronger adherence to the prompt but may reduce image quality. Range: 1.0-10.0. Defaults to 2.5.
34
- steps (int, optional): Controls how many steps to run the diffusion model for.
35
- Range: 1-30. Defaults to 20.
36
- progress (gr.Progress, optional): Gradio progress tracker for monitoring
37
- generation progress. Defaults to gr.Progress(track_tqdm=True).
38
-
39
  Returns:
40
- The modified image and seed used for generation.
41
  """
 
42
  if randomize_seed:
43
  seed = random.randint(0, MAX_SEED)
44
-
 
45
  if input_image:
46
  input_image = input_image.convert("RGB")
47
  image = pipe(
48
- image=input_image,
49
  prompt=prompt,
50
  guidance_scale=guidance_scale,
51
- width = input_image.size[0],
52
- height = input_image.size[1],
53
  num_inference_steps=steps,
54
  generator=torch.Generator().manual_seed(seed),
55
  ).images[0]
@@ -60,94 +82,182 @@ def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5
60
  num_inference_steps=steps,
61
  generator=torch.Generator().manual_seed(seed),
62
  ).images[0]
 
 
63
  return image, seed, gr.Button(visible=True)
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  @spaces.GPU(duration=25)
66
- def infer_example(input_image, prompt):
 
67
  image, seed, _ = infer(input_image, prompt)
68
  return image, seed
69
 
70
- css="""
 
 
 
71
  #col-container {
72
  margin: 0 auto;
73
  max-width: 960px;
74
  }
75
  """
76
 
 
 
 
77
  with gr.Blocks(css=css) as demo:
78
-
79
- with gr.Column(elem_id="col-container"):
80
- gr.Markdown(f"""# FLUX.1 Kontext [dev]
81
- Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro], [[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
82
- """)
83
- with gr.Row():
84
- with gr.Column():
85
- input_image = gr.Image(label="Upload the image for editing", type="pil")
86
- with gr.Row():
87
- prompt = gr.Text(
88
- label="Prompt",
89
- show_label=False,
90
- max_lines=1,
91
- placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
92
- container=False,
93
- )
94
- run_button = gr.Button("Run", scale=0)
95
- with gr.Accordion("Advanced Settings", open=False):
96
-
97
- seed = gr.Slider(
98
- label="Seed",
99
- minimum=0,
100
- maximum=MAX_SEED,
101
- step=1,
102
- value=0,
103
- )
104
-
105
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
106
-
107
- guidance_scale = gr.Slider(
108
- label="Guidance Scale",
109
- minimum=1,
110
- maximum=10,
111
- step=0.1,
112
- value=2.5,
113
- )
114
-
115
- steps = gr.Slider(
116
- label="Steps",
117
- minimum=1,
118
- maximum=30,
119
- value=20,
120
- step=1
121
- )
122
-
123
- with gr.Column():
124
- result = gr.Image(label="Result", show_label=False, interactive=False)
125
- reuse_button = gr.Button("Reuse this image", visible=False)
126
-
127
-
128
- examples = gr.Examples(
129
- examples=[
130
- ["flowers.png", "turn the flowers into sunflowers"],
131
- ["monster.png", "make this monster ride a skateboard on the beach"],
132
- ["cat.png", "make this cat happy"]
133
- ],
134
- inputs=[input_image, prompt],
135
- outputs=[result, seed],
136
- fn=infer_example,
137
- cache_examples="lazy"
138
- )
139
-
 
 
 
 
 
 
 
 
 
 
 
 
140
  gr.on(
141
  triggers=[run_button.click, prompt.submit],
142
- fn = infer,
143
- inputs = [input_image, prompt, seed, randomize_seed, guidance_scale, steps],
144
- outputs = [result, seed, reuse_button]
 
 
145
  )
146
-
147
- # reuse_button.click(
148
- # fn = lambda image: image,
149
- # inputs = [result],
150
- # outputs = [input_image]
151
- # )
152
-
153
- demo.launch(mcp_server=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: app.py
2
+ # Purpose: Gradio UI + MCP server for FLUX.1 Kontext-dev with two MCP tools:
3
+ # 1) edit_image -> edits an uploaded image based on a prompt
4
+ # 2) text_to_image -> generates a brand-new image from a prompt (no input image)
5
+
6
  import gradio as gr
7
  import numpy as np
8
  import spaces
 
13
  from diffusers import FluxKontextPipeline
14
  from diffusers.utils import load_image
15
 
16
+ # -----------------------------
17
+ # Constants & model bootstrap
18
+ # -----------------------------
19
+
20
+ # MAX_SEED is the highest 32-bit signed int; many generators expect this bound
21
+ MAX_SEED = np.iinfo(np.int32).max # <-- (layman's) the biggest safe random seed we'll allow
22
 
23
+ # Load the FLUX.1 Kontext-dev pipeline once and keep it on GPU for speed
24
+ # (layman's) this downloads the model and prepares it to run on your graphics card
25
+ pipe = FluxKontextPipeline.from_pretrained(
26
+ "black-forest-labs/FLUX.1-Kontext-dev",
27
+ torch_dtype=torch.bfloat16
28
+ ).to("cuda")
29
 
30
+ # ---------------------------------------------------------
31
+ # Core editing function (works WITH or WITHOUT input image)
32
+ # ---------------------------------------------------------
33
 
34
  @spaces.GPU
35
+ def infer(
36
+ input_image: Image.Image | None,
37
+ prompt: str,
38
+ seed: int = 42,
39
+ randomize_seed: bool = False,
40
+ guidance_scale: float = 2.5,
41
+ steps: int = 20,
42
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
43
+ ) -> tuple[Image.Image, int, gr.Button]:
44
  """
45
+ Perform image editing or generation using the FLUX.1 Kontext pipeline.
46
+
47
+ If an input image is provided, the model performs contextual editing.
48
+ If no image is provided, the model generates a new image from the prompt.
49
+
 
50
  Args:
51
+ input_image: Optional image to edit. If None, we do text-to-image instead.
52
+ prompt: What you want to change/create (e.g., "Remove glasses", "Neon cyberpunk cityscape").
53
+ seed: Random seed for reproducibility (0..2^31-1).
54
+ randomize_seed: If True, ignore `seed` and pick a random one.
55
+ guidance_scale: How strongly to follow the prompt (higher = more literal, but can reduce quality).
56
+ steps: Number of diffusion steps (1..30). More steps = slower but usually better.
57
+ progress: (Gradio) Used to stream progress updates.
58
+
 
 
 
 
59
  Returns:
60
+ (image, seed, reuse_button_visibility): The resulting image, the actual seed used, and a visible "reuse" button.
61
  """
62
+ # (layman's) pick a new seed if user asked for randomness
63
  if randomize_seed:
64
  seed = random.randint(0, MAX_SEED)
65
+
66
+ # (layman's) if you gave us an image, we edit it; if not, we create from scratch
67
  if input_image:
68
  input_image = input_image.convert("RGB")
69
  image = pipe(
70
+ image=input_image,
71
  prompt=prompt,
72
  guidance_scale=guidance_scale,
73
+ width=input_image.size[0],
74
+ height=input_image.size[1],
75
  num_inference_steps=steps,
76
  generator=torch.Generator().manual_seed(seed),
77
  ).images[0]
 
82
  num_inference_steps=steps,
83
  generator=torch.Generator().manual_seed(seed),
84
  ).images[0]
85
+
86
+ # (layman's) return the finished picture, the seed, and show a "reuse" button
87
  return image, seed, gr.Button(visible=True)
88
 
89
+ # ------------------------------------------------------------
90
+ # NEW: Dedicated text-to-image function (separate MCP tool)
91
+ # ------------------------------------------------------------
92
+
93
+ @spaces.GPU # (layman's) make sure we run on the GPU so it's fast
94
+ def text_to_image(
95
+ prompt: str,
96
+ seed: int = 42,
97
+ randomize_seed: bool = False,
98
+ guidance_scale: float = 2.5,
99
+ steps: int = 20,
100
+ width: int = 1024,
101
+ height: int = 1024,
102
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
103
+ ) -> tuple[Image.Image, int]:
104
+ """
105
+ Generate a brand-new image from text only (no input image required).
106
+
107
+ This calls FLUX.1 Kontext-dev in "text-to-image" mode.
108
+ Great for creating images from scratch with a clean, separate MCP tool.
109
+
110
+ Args:
111
+ prompt: The scene or edit you want to create (e.g., "cozy cabin at dusk, cinematic lighting").
112
+ seed: Random seed for reproducibility (0..2^31-1).
113
+ randomize_seed: If True, ignore `seed` and pick a random one.
114
+ guidance_scale: How strongly to follow the prompt (higher = more literal, can reduce quality).
115
+ steps: Number of diffusion steps (1..30). 20 is a good speed/quality balance.
116
+ width: Output image width in pixels.
117
+ height: Output image height in pixels.
118
+ progress: (Gradio) Used to stream progress updates.
119
+
120
+ Returns:
121
+ (image, seed): The generated image and the seed actually used.
122
+ """
123
+ # (layman's) pick a new seed if requested
124
+ if randomize_seed:
125
+ seed = random.randint(0, MAX_SEED)
126
+
127
+ # (layman's) run the model in pure text-to-image mode
128
+ image = pipe(
129
+ prompt=prompt,
130
+ guidance_scale=guidance_scale,
131
+ width=width,
132
+ height=height,
133
+ num_inference_steps=steps,
134
+ generator=torch.Generator().manual_seed(seed),
135
+ ).images[0]
136
+
137
+ return image, seed
138
+
139
+ # -------------------------------------
140
+ # Lightweight helper for the Examples
141
+ # -------------------------------------
142
+
143
  @spaces.GPU(duration=25)
144
+ def infer_example(input_image: Image.Image | None, prompt: str) -> tuple[Image.Image, int]:
145
+ # (layman's) small wrapper used by the clickable examples
146
  image, seed, _ = infer(input_image, prompt)
147
  return image, seed
148
 
149
+ # -------------
150
+ # Minimal CSS
151
+ # -------------
152
+ css = """
153
  #col-container {
154
  margin: 0 auto;
155
  max-width: 960px;
156
  }
157
  """
158
 
159
+ # --------------------------
160
+ # UI (Gradio Blocks layout)
161
+ # --------------------------
162
  with gr.Blocks(css=css) as demo:
163
+ # (layman's) top caption & links
164
+ gr.Markdown(
165
+ """# FLUX.1 Kontext [dev]
166
+ Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro],
167
+ [[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
168
+ """
169
+ )
170
+
171
+ with gr.Row():
172
+ # -------------------------------
173
+ # Left column: inputs & settings
174
+ # -------------------------------
175
+ with gr.Column():
176
+ # (layman's) you can upload an image to edit or leave it blank to generate from text
177
+ input_image = gr.Image(label="Upload the image for editing (optional)", type="pil")
178
+ with gr.Row():
179
+ prompt = gr.Text(
180
+ label="Prompt",
181
+ show_label=False,
182
+ max_lines=1,
183
+ placeholder="Describe what to create/edit (e.g., 'Neon skyline at night')",
184
+ container=False,
185
+ )
186
+ run_button = gr.Button("Run", scale=0)
187
+
188
+ # (layman's) extra knobs if you want finer control
189
+ with gr.Accordion("Advanced Settings", open=False):
190
+ seed = gr.Slider(
191
+ label="Seed",
192
+ minimum=0,
193
+ maximum=MAX_SEED,
194
+ step=1,
195
+ value=42,
196
+ )
197
+
198
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
199
+
200
+ guidance_scale = gr.Slider(
201
+ label="Guidance Scale",
202
+ minimum=1.0,
203
+ maximum=10.0,
204
+ step=0.1,
205
+ value=2.5,
206
+ )
207
+
208
+ steps = gr.Slider(
209
+ label="Steps",
210
+ minimum=1,
211
+ maximum=30,
212
+ value=20,
213
+ step=1,
214
+ )
215
+
216
+ # -------------------------
217
+ # Right column: the output
218
+ # -------------------------
219
+ with gr.Column():
220
+ result = gr.Image(label="Result", show_label=False, interactive=False)
221
+ reuse_button = gr.Button("Reuse this image", visible=False)
222
+
223
+ # (layman's) a few quick examples for testing
224
+ examples = gr.Examples(
225
+ examples=[
226
+ ["flowers.png", "turn the flowers into sunflowers"],
227
+ ["monster.png", "make this monster ride a skateboard on the beach"],
228
+ ["cat.png", "make this cat happy"],
229
+ ],
230
+ inputs=[input_image, prompt],
231
+ outputs=[result, seed],
232
+ fn=infer_example,
233
+ cache_examples="lazy",
234
+ )
235
+
236
+ # (layman's) wire the "Run" button and Enter key to call our main function
237
  gr.on(
238
  triggers=[run_button.click, prompt.submit],
239
+ fn=infer,
240
+ inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps],
241
+ outputs=[result, seed, reuse_button],
242
+ api_name="edit_image", # <-- MCP tool name for UI-based edit/generate
243
+ api_description="Edit an uploaded image with a prompt (or generate from text if no image is provided) using FLUX.1 Kontext-dev.",
244
  )
245
+
246
+ # (Optional) If you want a 1-click "reuse image" flow in the UI later:
247
+ # reuse_button.click(fn=lambda image: image, inputs=[result], outputs=[input_image])
248
+
249
+ # ------------------------------------------------------------------
250
+ # NEW: Register a dedicated MCP tool that does text-to-image only.
251
+ # This does not create any extra UI — it's a clean API endpoint.
252
+ # ------------------------------------------------------------------
253
+ gr.api(
254
+ text_to_image,
255
+ api_name="text_to_image", # <-- MCP tool route
256
+ api_description=(
257
+ "Generate a brand-new image from text (no input image required) "
258
+ "using FLUX.1 Kontext-dev. Returns the image and the seed used."
259
+ ),
260
+ )
261
+
262
+ # (layman's) start the app with MCP enabled so tools show up to agents (e.g., Claude/Cursor)
263
+ demo.launch(mcp_server=True)