Spaces:

AI4Editing
/

MagicQuillV2

Running

App Files Files Community

LiuZichen commited on 7 days ago

Commit

47a0ec7

verified ·

1 Parent(s): 4129e4e

Upload 3 files

Browse files

Files changed (3) hide show

app.py +313 -0
requirements.txt +10 -0
util.py +207 -0

app.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import subprocess
+import shlex
+# Install the custom component if needed
+subprocess.run(
+    shlex.split(
+        "pip install ./gradio_magicquillv2-0.0.1-py3-none-any.whl"
+    )
+)
+import sys
+import os
+import gradio as gr
+import tempfile
+import numpy as np
+import io
+import base64
+import json
+import uvicorn
+import torch
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from gradio_client import Client, handle_file
+from gradio_magicquillv2 import MagicQuillV2
+from PIL import Image
+from util import (
+    read_base64_image as read_base64_image_utils,
+    tensor_to_base64,
+    get_mask_bbox
+)
+# --- Configuration ---
+# Set this to the URL of your backend Space (running app_backend.py)
+# Example: "https://huggingface.co/spaces/username/backend-space"
+hf_token = hf_token = os.environ.get("HF_TOKEN")
+BACKEND_URL = "LiuZichen/MagicQuillV2"
+SAM_URL = "LiuZichen/MagicQuillHelper"
+print(f"Connecting to backend at: {BACKEND_URL}")
+backend_client = Client(BACKEND_URL, token=hf_token)
+print(f"Connecting to SAM client at: {SAM_URL}")
+sam_client = Client(SAM_URL, token=hf_token)
+# --- Helper Functions ---
+def generate_image_handler(x, negative_prompt, fine_edge, fix_perspective, grow_size, edge_strength, color_strength, local_strength, seed, steps, cfg):
+    merged_image = x['from_frontend']['img']
+    total_mask = x['from_frontend']['total_mask']
+    original_image = x['from_frontend']['original_image']
+    add_color_image = x['from_frontend']['add_color_image']
+    add_edge_mask = x['from_frontend']['add_edge_mask']
+    remove_edge_mask = x['from_frontend']['remove_edge_mask']
+    fill_mask = x['from_frontend']['fill_mask']
+    add_prop_image = x['from_frontend']['add_prop_image']
+    positive_prompt = x['from_backend']['prompt']
+    if backend_client is None:
+        print("Backend client not initialized")
+        x["from_backend"]["generated_image"] = None
+        return x
+    try:
+        # Call the backend API
+        # The order of arguments must match app_backend.py input list
+        res_base64 = backend_client.predict(
+            merged_image,       # merged_image
+            total_mask,         # total_mask
+            original_image,     # original_image
+            add_color_image,    # add_color_image
+            add_edge_mask,      # add_edge_mask
+            remove_edge_mask,   # remove_edge_mask
+            fill_mask,          # fill_mask
+            add_prop_image,     # add_prop_image
+            positive_prompt,    # positive_prompt
+            negative_prompt,    # negative_prompt
+            fine_edge,          # fine_edge
+            fix_perspective,    # fix_perspective
+            grow_size,          # grow_size
+            edge_strength,      # edge_strength
+            color_strength,     # color_strength
+            local_strength,     # local_strength
+            seed,               # seed
+            steps,              # steps
+            cfg,                # cfg
+            api_name="/generate"
+        )
+        x["from_backend"]["generated_image"] = res_base64
+    except Exception as e:
+        print(f"Error in generation: {e}")
+        x["from_backend"]["generated_image"] = None
+    return x
+# --- Gradio UI ---
+with gr.Blocks(title="MagicQuill V2") as demo:
+    with gr.Row():
+        ms = MagicQuillV2()
+    with gr.Row():
+        with gr.Column():
+            btn = gr.Button("Run", variant="primary")
+        with gr.Column():
+            with gr.Accordion("parameters", open=False):
+                negative_prompt = gr.Textbox(label="Negative Prompt", value="", interactive=True)
+                fine_edge = gr.Radio(label="Fine Edge", choices=['enable', 'disable'], value='disable', interactive=True)
+                fix_perspective = gr.Radio(label="Fix Perspective", choices=['enable', 'disable'], value='disable', interactive=True)
+                grow_size = gr.Slider(label="Grow Size", minimum=10, maximum=100, value=50, step=1, interactive=True)
+                edge_strength = gr.Slider(label="Edge Strength", minimum=0.0, maximum=5.0, value=0.6, step=0.01, interactive=True)
+                color_strength = gr.Slider(label="Color Strength", minimum=0.0, maximum=5.0, value=1.5, step=0.01, interactive=True)
+                local_strength = gr.Slider(label="Local Strength", minimum=0.0, maximum=5.0, value=1.0, step=0.01, interactive=True)
+                seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
+                steps = gr.Slider(label="Steps", minimum=0, maximum=50, value=20, interactive=True)
+                cfg = gr.Slider(label="CFG", minimum=0.0, maximum=20.0, value=3.5, step=0.1, interactive=True)
+    btn.click(
+        generate_image_handler,
+        inputs=[ms, negative_prompt, fine_edge, fix_perspective, grow_size, edge_strength, color_strength, local_strength, seed, steps, cfg],
+        outputs=ms
+    )
+# --- FastAPI App ---
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=['*'],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Helper to fix root path if running behind proxy (Spaces)
+def get_root_url(request: Request, route_path: str, root_path: str | None):
+    return root_path
+import gradio.route_utils
+gr.route_utils.get_root_url = get_root_url
+# Mount the Gradio app
+gr.mount_gradio_app(app, demo, path="/demo", root_path="/demo")
+@app.post("/magic_quill/generate_image")
+async def generate_image(request: Request):
+    data = await request.json()
+    if backend_client is None:
+        return {'error': 'Backend client not connected'}
+    try:
+        res = backend_client.predict(
+            data["merged_image"],
+            data["total_mask"],
+            data["original_image"],
+            data["add_color_image"],
+            data["add_edge_mask"],
+            data["remove_edge_mask"],
+            data["fill_mask"],
+            data["add_prop_image"],
+            data["positive_prompt"],
+            data["negative_prompt"],
+            data["fine_edge"],
+            data["fix_perspective"],
+            data["grow_size"],
+            data["edge_strength"],
+            data["color_strength"],
+            data["local_strength"],
+            data["seed"],
+            data["steps"],
+            data["cfg"],
+            api_name="/generate"
+        )
+        return {'res': res}
+    except Exception as e:
+        print(f"Error in backend generation: {e}")
+        return {'error': str(e)}
+@app.post("/magic_quill/process_background_img")
+async def process_background_img(request: Request):
+    img = await request.json()
+    from util import process_background
+    # process_background returns tensor [1, H, W, 3] in uint8 or float
+    resized_img_tensor = process_background(img)
+    # tensor_to_base64 from util expects tensor
+    resized_img_base64 = "data:image/webp;base64," + tensor_to_base64(
+        resized_img_tensor,
+        quality=80,
+        method=6
+    )
+    return resized_img_base64
+@app.post("/magic_quill/segmentation")
+async def segmentation(request: Request):
+    json_data = await request.json()
+    image_base64 = json_data.get("image", None)
+    coordinates_positive = json_data.get("coordinates_positive", None)
+    coordinates_negative = json_data.get("coordinates_negative", None)
+    bboxes = json_data.get("bboxes", None)
+    if sam_client is None:
+        return {"error": "sam client not initialized"}
+    # Process coordinates and bboxes (copied from original app.py)
+    pos_coordinates = None
+    if coordinates_positive and len(coordinates_positive) > 0:
+        pos_coordinates = []
+        for coord in coordinates_positive:
+            coord['x'] = int(round(coord['x']))
+            coord['y'] = int(round(coord['y']))
+            pos_coordinates.append({'x': coord['x'], 'y': coord['y']})
+        pos_coordinates = json.dumps(pos_coordinates)
+    neg_coordinates = None
+    if coordinates_negative and len(coordinates_negative) > 0:
+        neg_coordinates = []
+        for coord in coordinates_negative:
+            coord['x'] = int(round(coord['x']))
+            coord['y'] = int(round(coord['y']))
+            neg_coordinates.append({'x': coord['x'], 'y': coord['y']})
+        neg_coordinates = json.dumps(neg_coordinates)
+    bboxes_xyxy = None
+    if bboxes and len(bboxes) > 0:
+        valid_bboxes = []
+        for bbox in bboxes:
+            if (bbox.get("startX") is None or
+                bbox.get("startY") is None or
+                bbox.get("endX") is None or
+                bbox.get("endY") is None):
+                continue
+            else:
+                x_min = max(min(int(bbox["startX"]), int(bbox["endX"])), 0)
+                y_min = max(min(int(bbox["startY"]), int(bbox["endY"])), 0)
+                x_max = int(bbox["startX"]) if int(bbox["startX"]) > int(bbox["endX"]) else int(bbox["endX"])
+                y_max = int(bbox["startY"]) if int(bbox["startY"]) > int(bbox["endY"]) else int(bbox["endY"])
+                valid_bboxes.append((x_min, y_min, x_max, y_max))
+        bboxes_xyxy = []
+        for bbox in valid_bboxes:
+            x_min, y_min, x_max, y_max = bbox
+            bboxes_xyxy.append((x_min, y_min, x_max, y_max))
+        if bboxes_xyxy:
+            bboxes_xyxy = json.dumps(bboxes_xyxy)
+    print(f"Segmentation request: pos={pos_coordinates}, neg={neg_coordinates}, bboxes={bboxes_xyxy}")
+    try:
+        # Save base64 image to temp file
+        image_bytes = read_base64_image_utils(image_base64)
+        pil_image = Image.open(image_bytes)
+        with tempfile.NamedTemporaryFile(suffix=".webp", delete=False) as temp_in:
+            pil_image.save(temp_in.name, format="WEBP", quality=80)
+            temp_in_path = temp_in.name
+        # Execute segmentation via Client
+        result_path = sam_client.predict(
+            handle_file(temp_in_path),
+            pos_coordinates,
+            neg_coordinates,
+            bboxes_xyxy,
+            api_name="/segment"
+        )
+        os.unlink(temp_in_path)
+        if isinstance(result_path, (list, tuple)):
+            result_path = result_path[0]
+        if not result_path or not os.path.exists(result_path):
+             raise RuntimeError("Client returned invalid result path")
+        mask_pil = Image.open(result_path)
+        if mask_pil.mode != 'L':
+            mask_pil = mask_pil.convert('L')
+        pil_image = pil_image.convert("RGB")
+        if pil_image.size != mask_pil.size:
+            mask_pil = mask_pil.resize(pil_image.size, Image.NEAREST)
+        r, g, b = pil_image.split()
+        res_pil = Image.merge("RGBA", (r, g, b, mask_pil))
+        mask_tensor = torch.from_numpy(np.array(mask_pil) / 255.0).float().unsqueeze(0)
+        mask_bbox = get_mask_bbox(mask_tensor)
+        if mask_bbox:
+            x_min, y_min, x_max, y_max = mask_bbox
+            seg_bbox = {'startX': x_min, 'startY': y_min, 'endX': x_max, 'endY': y_max}
+        else:
+            seg_bbox = {'startX': 0, 'startY': 0, 'endX': 0, 'endY': 0}
+        print(seg_bbox)
+        buffered = io.BytesIO()
+        res_pil.save(buffered, format="PNG")
+        image_base64_res = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        return {
+            "error": False,
+            "segmentation_image": "data:image/png;base64," + image_base64_res,
+            "segmentation_bbox": seg_bbox
+        }
+    except Exception as e:
+        print(f"Error in segmentation: {e}")
+        return {"error": str(e)}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi
+uvicorn
+gradio==5.4.0
+gradio_client
+numpy
+opencv-python
+pillow
+requests
+torch
+torchvision

util.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import random
+from collections import Counter
+import numpy as np
+from torchvision import transforms
+import cv2  # OpenCV
+import torch
+import re
+import io
+import base64
+from PIL import Image, ImageOps
+PREFERRED_KONTEXT_RESOLUTIONS = [
+    (672, 1568),
+    (688, 1504),
+    (720, 1456),
+    (752, 1392),
+    (800, 1328),
+    (832, 1248),
+    (880, 1184),
+    (944, 1104),
+    (1024, 1024),
+    (1104, 944),
+    (1184, 880),
+    (1248, 832),
+    (1328, 800),
+    (1392, 752),
+    (1456, 720),
+    (1504, 688),
+    (1568, 672),
+]
+def get_bounding_box_from_mask(mask, padded=False):
+    mask = mask.squeeze()
+    rows, cols = torch.where(mask > 0.5)
+    if len(rows) == 0 or len(cols) == 0:
+        return (0, 0, 0, 0)
+    height, width = mask.shape
+    if padded:
+        padded_size = max(width, height)
+        if width < height:
+            offset_x = (padded_size - width) / 2
+            offset_y = 0
+        else:
+            offset_y = (padded_size - height) / 2
+            offset_x = 0
+        top_left_x = round(float((torch.min(cols).item() + offset_x) / padded_size), 3)
+        bottom_right_x = round(float((torch.max(cols).item() + offset_x) / padded_size), 3)
+        top_left_y = round(float((torch.min(rows).item() + offset_y) / padded_size), 3)
+        bottom_right_y = round(float((torch.max(rows).item() + offset_y) / padded_size), 3)
+    else:
+        offset_x = 0
+        offset_y = 0
+        top_left_x = round(float(torch.min(cols).item() / width), 3)
+        bottom_right_x = round(float(torch.max(cols).item() / width), 3)
+        top_left_y = round(float(torch.min(rows).item() / height), 3)
+        bottom_right_y = round(float(torch.max(rows).item() / height), 3)
+    return (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
+def extract_bbox(text):
+    pattern = r"\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]"
+    match = re.search(pattern, text)
+    return (int(match.group(1)), int(match.group(2)), int(match.group(3)), int(match.group(4)))
+def resize_bbox(bbox, width_ratio, height_ratio):
+    x1, y1, x2, y2 = bbox
+    new_x1 = int(x1 * width_ratio)
+    new_y1 = int(y1 * height_ratio)
+    new_x2 = int(x2 * width_ratio)
+    new_y2 = int(y2 * height_ratio)
+    return (new_x1, new_y1, new_x2, new_y2)
+def tensor_to_base64(tensor, quality=80, method=6):
+    tensor = tensor.squeeze(0).clone().detach().cpu()
+    if tensor.dtype == torch.float32 or tensor.dtype == torch.float64 or tensor.dtype == torch.float16:
+        tensor *= 255
+    tensor = tensor.to(torch.uint8)
+    if tensor.ndim == 2:  # 灰度图像
+        pil_image = Image.fromarray(tensor.numpy(), 'L')
+        pil_image = pil_image.convert('RGB')
+    elif tensor.ndim == 3:
+        if tensor.shape[2] == 1:  # 单通道
+            pil_image = Image.fromarray(tensor.numpy().squeeze(2), 'L')
+            pil_image = pil_image.convert('RGB')
+        elif tensor.shape[2] == 3:  # RGB
+            pil_image = Image.fromarray(tensor.numpy(), 'RGB')
+        elif tensor.shape[2] == 4:  # RGBA
+            pil_image = Image.fromarray(tensor.numpy(), 'RGBA')
+        else:
+            raise ValueError(f"Unsupported number of channels: {tensor.shape[2]}")
+    else:
+        raise ValueError(f"Unsupported tensor dimensions: {tensor.ndim}")
+    buffered = io.BytesIO()
+    pil_image.save(buffered, format="WEBP", quality=quality, method=method, lossless=False)
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_str
+def load_and_preprocess_image(image_path, convert_to='RGB', has_alpha=False):
+    image = Image.open(image_path)
+    image = ImageOps.exif_transpose(image)
+    if image.mode == 'RGBA':
+        background = Image.new('RGBA', image.size, (255, 255, 255, 255))
+        image = Image.alpha_composite(background, image)
+    image = image.convert(convert_to)
+    image_array = np.array(image).astype(np.float32) / 255.0
+    if has_alpha and convert_to == 'RGBA':
+        image_tensor = torch.from_numpy(image_array)[None,]
+    else:
+        if len(image_array.shape) == 3 and image_array.shape[2] > 3:
+            image_array = image_array[:, :, :3]
+        image_tensor = torch.from_numpy(image_array)[None,]
+    return image_tensor
+def process_background(base64_image, convert_to='RGB', size=None):
+    image_data = read_base64_image(base64_image)
+    image = Image.open(image_data)
+    image = ImageOps.exif_transpose(image)
+    image = image.convert(convert_to)
+    # Select preferred size by closest aspect ratio, then snap to multiple_of
+    w0, h0 = image.size
+    aspect_ratio = (w0 / h0) if h0 != 0 else 1.0
+    # Choose the (w, h) whose aspect ratio is closest to the input
+    _, tw, th = min((abs(aspect_ratio - w / h), w, h) for (w, h) in PREFERRED_KONTEXT_RESOLUTIONS)
+    multiple_of = 16  # default: vae_scale_factor (8) * 2
+    tw = (tw // multiple_of) * multiple_of
+    th = (th // multiple_of) * multiple_of
+    if (w0, h0) != (tw, th):
+        image = image.resize((tw, th), resample=Image.BICUBIC)
+    image_array = np.array(image).astype(np.uint8)
+    image_tensor = torch.from_numpy(image_array)[None,]
+    return image_tensor
+def read_base64_image(base64_image):
+    if base64_image.startswith("data:image/png;base64,"):
+        base64_image = base64_image.split(",")[1]
+    elif base64_image.startswith("data:image/jpeg;base64,"):
+        base64_image = base64_image.split(",")[1]
+    elif base64_image.startswith("data:image/webp;base64,"):
+        base64_image = base64_image.split(",")[1]
+    else:
+        raise ValueError("Unsupported image format.")
+    image_data = base64.b64decode(base64_image)
+    return io.BytesIO(image_data)
+def create_alpha_mask(image_path):
+    """Create an alpha mask from the alpha channel of an image."""
+    image = Image.open(image_path)
+    image = ImageOps.exif_transpose(image)
+    mask = torch.zeros((1, image.height, image.width), dtype=torch.float32)
+    if 'A' in image.getbands():
+        alpha_channel = np.array(image.getchannel('A')).astype(np.float32) / 255.0
+        mask[0] = 1.0 - torch.from_numpy(alpha_channel)
+    return mask
+def get_mask_bbox(mask_tensor, padding=10):
+    assert len(mask_tensor.shape) == 3 and mask_tensor.shape[0] == 1
+    _, H, W = mask_tensor.shape
+    mask_2d = mask_tensor.squeeze(0)
+    y_coords, x_coords = torch.where(mask_2d > 0)
+    if len(y_coords) == 0:
+        return None
+    x_min = int(torch.min(x_coords))
+    y_min = int(torch.min(y_coords))
+    x_max = int(torch.max(x_coords))
+    y_max = int(torch.max(y_coords))
+    x_min = max(0, x_min - padding)
+    y_min = max(0, y_min - padding)
+    x_max = min(W - 1, x_max + padding)
+    y_max = min(H - 1, y_max + padding)
+    return x_min, y_min, x_max, y_max
+def tensor_to_pil(tensor):
+    tensor = tensor.squeeze(0).clone().detach().cpu()
+    if tensor.dtype in [torch.float32, torch.float64, torch.float16]:
+        if tensor.max() <= 1.0:
+            tensor *= 255
+        tensor = tensor.to(torch.uint8)
+    if tensor.ndim == 2:  # 灰度图像 [H, W]
+        return Image.fromarray(tensor.numpy(), 'L')
+    elif tensor.ndim == 3:
+        if tensor.shape[2] == 1:  # 单通道 [H, W, 1]
+            return Image.fromarray(tensor.numpy().squeeze(2), 'L')
+        elif tensor.shape[2] >= 3:  # RGB [H, W, 3]
+            return Image.fromarray(tensor.numpy(), 'RGB')
+        else:
+            raise ValueError(f"不支持的通道数: {tensor.shape[2]}")
+    else:
+        raise ValueError(f"不支持的tensor维度: {tensor.ndim}")