FLUX.1-Kontext-Dev

Runtime error

App Files Files Community

Nymbo commited on Aug 22

Commit

f20dc96

verified ·

1 Parent(s): f7bc613

Update app.py

Browse files

Files changed (1) hide show

app.py +212 -102

app.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import gradio as gr
 import numpy as np
 import spaces
@@ -8,48 +13,65 @@ from PIL import Image
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
-# down to 22 steps to try and keep this ~<30 seconds so it will generally work in claude.ai - which doesn't reset timeout with notifications.
-MAX_SEED = np.iinfo(np.int32).max
-pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
 @spaces.GPU
-def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=20, progress=gr.Progress(track_tqdm=True)):
     """
-    Perform image editing using the FLUX.1 Kontext pipeline.
-    This function takes an input image and a text prompt to generate a modified version
-    of the image based on the provided instructions. It uses the FLUX.1 Kontext model
-    for contextual image editing tasks.
     Args:
-        input_image (PIL.Image.Image): The path to the input image to be edited.
-        prompt (str): Text description of the desired edit to apply to the image. Examples: "Remove glasses", "Add a hat", "Change background to beach".
-        seed (int, optional): Random seed for reproducible generation.
-            Must be between 0 and MAX_SEED (2^31 - 1). Defaults to 42.
-        randomize_seed (bool, optional): If True, generates a random seed instead of using the provided seed value.
-            Defaults to False.
-        guidance_scale (float, optional): Controls how closely the model follows the prompt. Higher values mean stronger adherence to the prompt but may reduce image quality. Range: 1.0-10.0. Defaults to 2.5.
-        steps (int, optional): Controls how many steps to run the diffusion model for.
-            Range: 1-30. Defaults to 20.
-        progress (gr.Progress, optional): Gradio progress tracker for monitoring
-            generation progress. Defaults to gr.Progress(track_tqdm=True).
     Returns:
-        The modified image and seed used for generation.
     """
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if input_image:
         input_image = input_image.convert("RGB")
         image = pipe(
-            image=input_image,
             prompt=prompt,
             guidance_scale=guidance_scale,
-            width = input_image.size[0],
-            height = input_image.size[1],
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
@@ -60,94 +82,182 @@ def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
     return image, seed, gr.Button(visible=True)
 @spaces.GPU(duration=25)
-def infer_example(input_image, prompt):
     image, seed, _ = infer(input_image, prompt)
     return image, seed
-css="""
 #col-container {
     margin: 0 auto;
     max-width: 960px;
 }
 """
 with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""# FLUX.1 Kontext [dev]
-Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro], [[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
-        """)
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(label="Upload the image for editing", type="pil")
-                with gr.Row():
-                    prompt = gr.Text(
-                        label="Prompt",
-                        show_label=False,
-                        max_lines=1,
-                        placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
-                        container=False,
-                    )
-                    run_button = gr.Button("Run", scale=0)
-                with gr.Accordion("Advanced Settings", open=False):
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=0,
-                        maximum=MAX_SEED,
-                        step=1,
-                        value=0,
-                    )
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    guidance_scale = gr.Slider(
-                        label="Guidance Scale",
-                        minimum=1,
-                        maximum=10,
-                        step=0.1,
-                        value=2.5,
-                    )
-                    steps = gr.Slider(
-                        label="Steps",
-                        minimum=1,
-                        maximum=30,
-                        value=20,
-                        step=1
-                    )
-            with gr.Column():
-                result = gr.Image(label="Result", show_label=False, interactive=False)
-                reuse_button = gr.Button("Reuse this image", visible=False)
-        examples = gr.Examples(
-            examples=[
-                ["flowers.png", "turn the flowers into sunflowers"],
-                ["monster.png", "make this monster ride a skateboard on the beach"],
-                ["cat.png", "make this cat happy"]
-            ],
-            inputs=[input_image, prompt],
-            outputs=[result, seed],
-            fn=infer_example,
-            cache_examples="lazy"
-        )
     gr.on(
         triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [input_image, prompt, seed, randomize_seed, guidance_scale, steps],
-        outputs = [result, seed, reuse_button]
     )
-    # reuse_button.click(
-    #    fn = lambda image: image,
-    #    inputs = [result],
-    #    outputs = [input_image]
-    # )
-demo.launch(mcp_server=True)

+# File: app.py
+# Purpose: Gradio UI + MCP server for FLUX.1 Kontext-dev with two MCP tools:
+#          1) edit_image  -> edits an uploaded image based on a prompt
+#          2) text_to_image -> generates a brand-new image from a prompt (no input image)
 import gradio as gr
 import numpy as np
 import spaces
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
+# -----------------------------
+# Constants & model bootstrap
+# -----------------------------
+# MAX_SEED is the highest 32-bit signed int; many generators expect this bound
+MAX_SEED = np.iinfo(np.int32).max  # <-- (layman's) the biggest safe random seed we'll allow
+# Load the FLUX.1 Kontext-dev pipeline once and keep it on GPU for speed
+# (layman's) this downloads the model and prepares it to run on your graphics card
+pipe = FluxKontextPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Kontext-dev",
+    torch_dtype=torch.bfloat16
+).to("cuda")
+# ---------------------------------------------------------
+# Core editing function (works WITH or WITHOUT input image)
+# ---------------------------------------------------------
 @spaces.GPU
+def infer(
+    input_image: Image.Image | None,
+    prompt: str,
+    seed: int = 42,
+    randomize_seed: bool = False,
+    guidance_scale: float = 2.5,
+    steps: int = 20,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+) -> tuple[Image.Image, int, gr.Button]:
     """
+    Perform image editing or generation using the FLUX.1 Kontext pipeline.
+    If an input image is provided, the model performs contextual editing.
+    If no image is provided, the model generates a new image from the prompt.
     Args:
+        input_image: Optional image to edit. If None, we do text-to-image instead.
+        prompt: What you want to change/create (e.g., "Remove glasses", "Neon cyberpunk cityscape").
+        seed: Random seed for reproducibility (0..2^31-1).
+        randomize_seed: If True, ignore `seed` and pick a random one.
+        guidance_scale: How strongly to follow the prompt (higher = more literal, but can reduce quality).
+        steps: Number of diffusion steps (1..30). More steps = slower but usually better.
+        progress: (Gradio) Used to stream progress updates.
     Returns:
+        (image, seed, reuse_button_visibility): The resulting image, the actual seed used, and a visible "reuse" button.
     """
+    # (layman's) pick a new seed if user asked for randomness
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # (layman's) if you gave us an image, we edit it; if not, we create from scratch
     if input_image:
         input_image = input_image.convert("RGB")
         image = pipe(
+            image=input_image,
             prompt=prompt,
             guidance_scale=guidance_scale,
+            width=input_image.size[0],
+            height=input_image.size[1],
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
+    # (layman's) return the finished picture, the seed, and show a "reuse" button
     return image, seed, gr.Button(visible=True)
+# ------------------------------------------------------------
+# NEW: Dedicated text-to-image function (separate MCP tool)
+# ------------------------------------------------------------
+@spaces.GPU  # (layman's) make sure we run on the GPU so it's fast
+def text_to_image(
+    prompt: str,
+    seed: int = 42,
+    randomize_seed: bool = False,
+    guidance_scale: float = 2.5,
+    steps: int = 20,
+    width: int = 1024,
+    height: int = 1024,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+) -> tuple[Image.Image, int]:
+    """
+    Generate a brand-new image from text only (no input image required).
+    This calls FLUX.1 Kontext-dev in "text-to-image" mode.
+    Great for creating images from scratch with a clean, separate MCP tool.
+    Args:
+        prompt: The scene or edit you want to create (e.g., "cozy cabin at dusk, cinematic lighting").
+        seed: Random seed for reproducibility (0..2^31-1).
+        randomize_seed: If True, ignore `seed` and pick a random one.
+        guidance_scale: How strongly to follow the prompt (higher = more literal, can reduce quality).
+        steps: Number of diffusion steps (1..30). 20 is a good speed/quality balance.
+        width: Output image width in pixels.
+        height: Output image height in pixels.
+        progress: (Gradio) Used to stream progress updates.
+    Returns:
+        (image, seed): The generated image and the seed actually used.
+    """
+    # (layman's) pick a new seed if requested
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    # (layman's) run the model in pure text-to-image mode
+    image = pipe(
+        prompt=prompt,
+        guidance_scale=guidance_scale,
+        width=width,
+        height=height,
+        num_inference_steps=steps,
+        generator=torch.Generator().manual_seed(seed),
+    ).images[0]
+    return image, seed
+# -------------------------------------
+# Lightweight helper for the Examples
+# -------------------------------------
 @spaces.GPU(duration=25)
+def infer_example(input_image: Image.Image | None, prompt: str) -> tuple[Image.Image, int]:
+    # (layman's) small wrapper used by the clickable examples
     image, seed, _ = infer(input_image, prompt)
     return image, seed
+# -------------
+# Minimal CSS
+# -------------
+css = """
 #col-container {
     margin: 0 auto;
     max-width: 960px;
 }
 """
+# --------------------------
+# UI (Gradio Blocks layout)
+# --------------------------
 with gr.Blocks(css=css) as demo:
+    # (layman's) top caption & links
+    gr.Markdown(
+        """# FLUX.1 Kontext [dev]
+Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro],
+[[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
+        """
+    )
+    with gr.Row():
+        # -------------------------------
+        # Left column: inputs & settings
+        # -------------------------------
+        with gr.Column():
+            # (layman's) you can upload an image to edit — or leave it blank to generate from text
+            input_image = gr.Image(label="Upload the image for editing (optional)", type="pil")
+            with gr.Row():
+                prompt = gr.Text(
+                    label="Prompt",
+                    show_label=False,
+                    max_lines=1,
+                    placeholder="Describe what to create/edit (e.g., 'Neon skyline at night')",
+                    container=False,
+                )
+                run_button = gr.Button("Run", scale=0)
+            # (layman's) extra knobs if you want finer control
+            with gr.Accordion("Advanced Settings", open=False):
+                seed = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=42,
+                )
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
+                guidance_scale = gr.Slider(
+                    label="Guidance Scale",
+                    minimum=1.0,
+                    maximum=10.0,
+                    step=0.1,
+                    value=2.5,
+                )
+                steps = gr.Slider(
+                    label="Steps",
+                    minimum=1,
+                    maximum=30,
+                    value=20,
+                    step=1,
+                )
+        # -------------------------
+        # Right column: the output
+        # -------------------------
+        with gr.Column():
+            result = gr.Image(label="Result", show_label=False, interactive=False)
+            reuse_button = gr.Button("Reuse this image", visible=False)
+    # (layman's) a few quick examples for testing
+    examples = gr.Examples(
+        examples=[
+            ["flowers.png", "turn the flowers into sunflowers"],
+            ["monster.png", "make this monster ride a skateboard on the beach"],
+            ["cat.png", "make this cat happy"],
+        ],
+        inputs=[input_image, prompt],
+        outputs=[result, seed],
+        fn=infer_example,
+        cache_examples="lazy",
+    )
+    # (layman's) wire the "Run" button and Enter key to call our main function
     gr.on(
         triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps],
+        outputs=[result, seed, reuse_button],
+        api_name="edit_image",  # <-- MCP tool name for UI-based edit/generate
+        api_description="Edit an uploaded image with a prompt (or generate from text if no image is provided) using FLUX.1 Kontext-dev.",
     )
+    # (Optional) If you want a 1-click "reuse image" flow in the UI later:
+    # reuse_button.click(fn=lambda image: image, inputs=[result], outputs=[input_image])
+# ------------------------------------------------------------------
+# NEW: Register a dedicated MCP tool that does text-to-image only.
+#      This does not create any extra UI — it's a clean API endpoint.
+# ------------------------------------------------------------------
+gr.api(
+    text_to_image,
+    api_name="text_to_image",  # <-- MCP tool route
+    api_description=(
+        "Generate a brand-new image from text (no input image required) "
+        "using FLUX.1 Kontext-dev. Returns the image and the seed used."
+    ),
+)
+# (layman's) start the app with MCP enabled so tools show up to agents (e.g., Claude/Cursor)
+demo.launch(mcp_server=True)