Spaces:
Running
Running
use HunyuanImage-2.1 for text to image
Browse files
app.py
CHANGED
|
@@ -2833,7 +2833,7 @@ def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "
|
|
| 2833 |
"""
|
| 2834 |
try:
|
| 2835 |
# Create unique filename with timestamp and UUID
|
| 2836 |
-
timestamp = datetime.
|
| 2837 |
unique_id = str(uuid.uuid4())[:8]
|
| 2838 |
base_name, ext = os.path.splitext(filename)
|
| 2839 |
unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
|
|
@@ -2925,7 +2925,7 @@ def upload_media_to_hf(media_bytes: bytes, filename: str, media_type: str = "ima
|
|
| 2925 |
# Continue anyway, repo might already exist
|
| 2926 |
|
| 2927 |
# Create unique filename with timestamp and UUID
|
| 2928 |
-
timestamp = datetime.
|
| 2929 |
unique_id = str(uuid.uuid4())[:8]
|
| 2930 |
base_name, ext = os.path.splitext(filename)
|
| 2931 |
unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
|
|
@@ -3029,93 +3029,54 @@ def cleanup_temp_media_files():
|
|
| 3029 |
except Exception as e:
|
| 3030 |
print(f"[TempCleanup] Error during cleanup: {str(e)}")
|
| 3031 |
|
| 3032 |
-
def
|
| 3033 |
-
"""Generate image using
|
| 3034 |
|
| 3035 |
-
Uses
|
| 3036 |
|
| 3037 |
Returns an HTML <img> tag whose src is an uploaded temporary URL.
|
| 3038 |
"""
|
| 3039 |
try:
|
| 3040 |
-
print(f"[Text2Image] Starting generation with prompt: {prompt[:100]}...")
|
| 3041 |
-
|
| 3042 |
-
|
| 3043 |
-
|
| 3044 |
-
|
| 3045 |
-
|
|
|
|
| 3046 |
|
| 3047 |
-
import
|
| 3048 |
-
import json as _json
|
| 3049 |
-
import base64
|
| 3050 |
-
import io as _io
|
| 3051 |
from PIL import Image
|
|
|
|
| 3052 |
|
| 3053 |
-
# Create
|
| 3054 |
-
|
| 3055 |
-
"
|
| 3056 |
-
|
| 3057 |
-
|
|
|
|
| 3058 |
|
| 3059 |
-
|
| 3060 |
-
|
| 3061 |
-
|
| 3062 |
-
|
| 3063 |
-
|
| 3064 |
-
|
| 3065 |
-
|
| 3066 |
-
|
| 3067 |
-
|
| 3068 |
-
|
| 3069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3070 |
|
| 3071 |
-
try:
|
| 3072 |
-
print("[Text2Image] Making API request to OpenRouter...")
|
| 3073 |
-
response = requests.post(
|
| 3074 |
-
"https://openrouter.ai/api/v1/chat/completions",
|
| 3075 |
-
headers=headers,
|
| 3076 |
-
json=data,
|
| 3077 |
-
timeout=60
|
| 3078 |
-
)
|
| 3079 |
-
response.raise_for_status()
|
| 3080 |
-
result_data = response.json()
|
| 3081 |
-
print(f"[Text2Image] Received API response: {_json.dumps(result_data, indent=2)}")
|
| 3082 |
-
|
| 3083 |
-
# Extract the generated image from the response (using same pattern as image-to-image)
|
| 3084 |
-
message = result_data.get('choices', [{}])[0].get('message', {})
|
| 3085 |
-
|
| 3086 |
-
if message and 'images' in message and message['images']:
|
| 3087 |
-
# Get the first image from the 'images' list
|
| 3088 |
-
image_data = message['images'][0]
|
| 3089 |
-
base64_string = image_data.get('image_url', {}).get('url', '')
|
| 3090 |
-
|
| 3091 |
-
if base64_string and ',' in base64_string:
|
| 3092 |
-
# Remove the "data:image/png;base64," prefix
|
| 3093 |
-
base64_content = base64_string.split(',')[1]
|
| 3094 |
-
|
| 3095 |
-
# Decode the base64 string and create a PIL image
|
| 3096 |
-
img_bytes = base64.b64decode(base64_content)
|
| 3097 |
-
generated_image = Image.open(_io.BytesIO(img_bytes))
|
| 3098 |
-
|
| 3099 |
-
# Convert PIL image to JPEG bytes for upload
|
| 3100 |
-
out_buf = _io.BytesIO()
|
| 3101 |
-
generated_image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
|
| 3102 |
-
image_bytes = out_buf.getvalue()
|
| 3103 |
-
else:
|
| 3104 |
-
raise RuntimeError(f"API returned an invalid image format. Response: {_json.dumps(result_data, indent=2)}")
|
| 3105 |
-
else:
|
| 3106 |
-
raise RuntimeError(f"API did not return an image. Full Response: {_json.dumps(result_data, indent=2)}")
|
| 3107 |
-
|
| 3108 |
-
except requests.exceptions.HTTPError as err:
|
| 3109 |
-
error_body = err.response.text
|
| 3110 |
-
if err.response.status_code == 401:
|
| 3111 |
-
return "Error: Authentication failed. Check your OpenRouter API key."
|
| 3112 |
-
elif err.response.status_code == 429:
|
| 3113 |
-
return "Error: Rate limit exceeded or insufficient credits. Check your OpenRouter account."
|
| 3114 |
-
else:
|
| 3115 |
-
return f"Error: An API error occurred: {error_body}"
|
| 3116 |
-
except Exception as e:
|
| 3117 |
-
return f"Error: An unexpected error occurred: {str(e)}"
|
| 3118 |
-
|
| 3119 |
# Upload and return HTML tag
|
| 3120 |
print("[Text2Image] Uploading image to HF...")
|
| 3121 |
filename = f"generated_image_{image_index}.jpg"
|
|
@@ -3125,8 +3086,9 @@ def generate_image_with_gemini(prompt: str, image_index: int = 0, token: gr.OAut
|
|
| 3125 |
return temp_url
|
| 3126 |
print(f"[Text2Image] Successfully generated image: {temp_url}")
|
| 3127 |
return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
|
|
|
|
| 3128 |
except Exception as e:
|
| 3129 |
-
print(f"
|
| 3130 |
return f"Error generating image (text-to-image): {str(e)}"
|
| 3131 |
|
| 3132 |
def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
|
|
@@ -3651,7 +3613,7 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
|
|
| 3651 |
# Generate images for each prompt
|
| 3652 |
generated_images = []
|
| 3653 |
for i, prompt in enumerate(image_prompts):
|
| 3654 |
-
image_html =
|
| 3655 |
if not image_html.startswith("Error"):
|
| 3656 |
generated_images.append((i, image_html))
|
| 3657 |
|
|
@@ -3741,7 +3703,7 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
|
|
| 3741 |
placeholder_images = re.findall(img_pattern, html_content)
|
| 3742 |
|
| 3743 |
# Generate a single image
|
| 3744 |
-
image_html =
|
| 3745 |
if image_html.startswith("Error"):
|
| 3746 |
return ""
|
| 3747 |
|
|
@@ -4272,8 +4234,8 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
|
|
| 4272 |
print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
|
| 4273 |
# Single-image flow for text-to-image (LLM placement first, fallback deterministic)
|
| 4274 |
try:
|
| 4275 |
-
print(f"[MediaApply] Calling
|
| 4276 |
-
image_html_tag =
|
| 4277 |
print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
|
| 4278 |
if not (image_html_tag or "").startswith("Error"):
|
| 4279 |
print("[MediaApply] Attempting LLM placement of image...")
|
|
@@ -7233,7 +7195,7 @@ with gr.Blocks(
|
|
| 7233 |
label="π¨ Generate Images (text β image)",
|
| 7234 |
value=False,
|
| 7235 |
visible=True,
|
| 7236 |
-
info="Include generated images in your outputs using
|
| 7237 |
)
|
| 7238 |
text_to_image_prompt = gr.Textbox(
|
| 7239 |
label="Text-to-Image Prompt",
|
|
|
|
| 2833 |
"""
|
| 2834 |
try:
|
| 2835 |
# Create unique filename with timestamp and UUID
|
| 2836 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 2837 |
unique_id = str(uuid.uuid4())[:8]
|
| 2838 |
base_name, ext = os.path.splitext(filename)
|
| 2839 |
unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
|
|
|
|
| 2925 |
# Continue anyway, repo might already exist
|
| 2926 |
|
| 2927 |
# Create unique filename with timestamp and UUID
|
| 2928 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 2929 |
unique_id = str(uuid.uuid4())[:8]
|
| 2930 |
base_name, ext = os.path.splitext(filename)
|
| 2931 |
unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
|
|
|
|
| 3029 |
except Exception as e:
|
| 3030 |
print(f"[TempCleanup] Error during cleanup: {str(e)}")
|
| 3031 |
|
| 3032 |
+
def generate_image_with_hunyuan(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
|
| 3033 |
+
"""Generate image using Tencent HunyuanImage-2.1 via Hugging Face InferenceClient.
|
| 3034 |
|
| 3035 |
+
Uses tencent/HunyuanImage-2.1 via HuggingFace InferenceClient with fal-ai provider.
|
| 3036 |
|
| 3037 |
Returns an HTML <img> tag whose src is an uploaded temporary URL.
|
| 3038 |
"""
|
| 3039 |
try:
|
| 3040 |
+
print(f"[Text2Image] Starting HunyuanImage generation with prompt: {prompt[:100]}...")
|
| 3041 |
+
|
| 3042 |
+
# Check for HF_TOKEN
|
| 3043 |
+
hf_token = os.getenv('HF_TOKEN')
|
| 3044 |
+
if not hf_token:
|
| 3045 |
+
print("[Text2Image] Missing HF_TOKEN")
|
| 3046 |
+
return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
|
| 3047 |
|
| 3048 |
+
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
| 3049 |
from PIL import Image
|
| 3050 |
+
import io as _io
|
| 3051 |
|
| 3052 |
+
# Create InferenceClient with fal-ai provider
|
| 3053 |
+
client = InferenceClient(
|
| 3054 |
+
provider="fal-ai",
|
| 3055 |
+
api_key=hf_token,
|
| 3056 |
+
bill_to="huggingface",
|
| 3057 |
+
)
|
| 3058 |
|
| 3059 |
+
print("[Text2Image] Making API request to HuggingFace InferenceClient...")
|
| 3060 |
+
|
| 3061 |
+
# Generate image using HunyuanImage-2.1 model
|
| 3062 |
+
image = client.text_to_image(
|
| 3063 |
+
prompt,
|
| 3064 |
+
model="tencent/HunyuanImage-2.1",
|
| 3065 |
+
)
|
| 3066 |
+
|
| 3067 |
+
print(f"[Text2Image] Successfully generated image with size: {image.size}")
|
| 3068 |
+
|
| 3069 |
+
# Resize image to reduce size while maintaining quality
|
| 3070 |
+
max_size = 1024
|
| 3071 |
+
if image.width > max_size or image.height > max_size:
|
| 3072 |
+
image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
|
| 3073 |
+
|
| 3074 |
+
# Convert PIL Image to bytes for upload
|
| 3075 |
+
buffer = _io.BytesIO()
|
| 3076 |
+
# Save as JPEG with good quality
|
| 3077 |
+
image.convert('RGB').save(buffer, format='JPEG', quality=90, optimize=True)
|
| 3078 |
+
image_bytes = buffer.getvalue()
|
| 3079 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3080 |
# Upload and return HTML tag
|
| 3081 |
print("[Text2Image] Uploading image to HF...")
|
| 3082 |
filename = f"generated_image_{image_index}.jpg"
|
|
|
|
| 3086 |
return temp_url
|
| 3087 |
print(f"[Text2Image] Successfully generated image: {temp_url}")
|
| 3088 |
return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
|
| 3089 |
+
|
| 3090 |
except Exception as e:
|
| 3091 |
+
print(f"[Text2Image] Error generating image with HunyuanImage: {str(e)}")
|
| 3092 |
return f"Error generating image (text-to-image): {str(e)}"
|
| 3093 |
|
| 3094 |
def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
|
|
|
|
| 3613 |
# Generate images for each prompt
|
| 3614 |
generated_images = []
|
| 3615 |
for i, prompt in enumerate(image_prompts):
|
| 3616 |
+
image_html = generate_image_with_hunyuan(prompt, i, token=None) # TODO: Pass token from parent context
|
| 3617 |
if not image_html.startswith("Error"):
|
| 3618 |
generated_images.append((i, image_html))
|
| 3619 |
|
|
|
|
| 3703 |
placeholder_images = re.findall(img_pattern, html_content)
|
| 3704 |
|
| 3705 |
# Generate a single image
|
| 3706 |
+
image_html = generate_image_with_hunyuan(prompt, 0, token=None) # TODO: Pass token from parent context
|
| 3707 |
if image_html.startswith("Error"):
|
| 3708 |
return ""
|
| 3709 |
|
|
|
|
| 4234 |
print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
|
| 4235 |
# Single-image flow for text-to-image (LLM placement first, fallback deterministic)
|
| 4236 |
try:
|
| 4237 |
+
print(f"[MediaApply] Calling generate_image_with_hunyuan with prompt: {t2i_prompt[:50]}...")
|
| 4238 |
+
image_html_tag = generate_image_with_hunyuan(t2i_prompt, 0, token=token)
|
| 4239 |
print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
|
| 4240 |
if not (image_html_tag or "").startswith("Error"):
|
| 4241 |
print("[MediaApply] Attempting LLM placement of image...")
|
|
|
|
| 7195 |
label="π¨ Generate Images (text β image)",
|
| 7196 |
value=False,
|
| 7197 |
visible=True,
|
| 7198 |
+
info="Include generated images in your outputs using HunyuanImage-2.1"
|
| 7199 |
)
|
| 7200 |
text_to_image_prompt = gr.Textbox(
|
| 7201 |
label="Text-to-Image Prompt",
|