Spaces:
Running
Running
use data URIs for generated media
Browse files
app.py
CHANGED
|
@@ -69,6 +69,35 @@ Always respond with code that can be executed or rendered directly.
|
|
| 69 |
|
| 70 |
Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text. Do NOT add the language name at the top of the code output."""
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def llm_place_media(html_content: str, media_html_tag: str, media_kind: str = "image") -> str:
|
| 73 |
"""Ask a lightweight model to produce search/replace blocks that insert media_html_tag in the best spot.
|
| 74 |
|
|
@@ -78,13 +107,30 @@ def llm_place_media(html_content: str, media_html_tag: str, media_kind: str = "i
|
|
| 78 |
client = get_inference_client("Qwen/Qwen3-Coder-480B-A35B-Instruct", "auto")
|
| 79 |
system_prompt = (
|
| 80 |
"You are a code editor. Insert the provided media tag into the given HTML in the most semantically appropriate place.\n"
|
| 81 |
-
"
|
|
|
|
|
|
|
| 82 |
"Return ONLY search/replace blocks using the exact markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE.\n"
|
| 83 |
"Do NOT include any commentary. Ensure the SEARCH block matches exact lines from the input.\n"
|
|
|
|
| 84 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
user_payload = (
|
| 86 |
"HTML Document:\n" + html_content + "\n\n" +
|
| 87 |
-
f"Media ({media_kind}):\n" +
|
| 88 |
"Produce search/replace blocks now."
|
| 89 |
)
|
| 90 |
messages = [
|
|
@@ -98,6 +144,16 @@ def llm_place_media(html_content: str, media_html_tag: str, media_kind: str = "i
|
|
| 98 |
temperature=0.2,
|
| 99 |
)
|
| 100 |
text = (completion.choices[0].message.content or "") if completion and completion.choices else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return text.strip()
|
| 102 |
except Exception as e:
|
| 103 |
print(f"[LLMPlaceMedia] Fallback due to error: {e}")
|
|
@@ -1631,6 +1687,115 @@ def process_image_for_model(image):
|
|
| 1631 |
image.save(buffer, format='PNG')
|
| 1632 |
img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
| 1633 |
return f"data:image/png;base64,{img_str}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1634 |
def generate_image_with_qwen(prompt: str, image_index: int = 0) -> str:
|
| 1635 |
"""Generate image using Qwen image model via Hugging Face InferenceClient with optimized data URL"""
|
| 1636 |
try:
|
|
@@ -1841,52 +2006,33 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
|
|
| 1841 |
)
|
| 1842 |
print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
|
| 1843 |
|
| 1844 |
-
#
|
| 1845 |
-
|
| 1846 |
-
|
| 1847 |
-
|
| 1848 |
-
|
| 1849 |
-
|
| 1850 |
-
|
| 1851 |
-
|
| 1852 |
-
|
| 1853 |
-
|
| 1854 |
-
|
| 1855 |
-
|
| 1856 |
-
|
| 1857 |
-
|
| 1858 |
-
|
| 1859 |
-
|
| 1860 |
-
|
| 1861 |
-
|
| 1862 |
-
|
| 1863 |
-
|
| 1864 |
-
|
| 1865 |
-
|
| 1866 |
-
|
| 1867 |
-
|
| 1868 |
-
|
| 1869 |
-
|
| 1870 |
-
|
| 1871 |
-
# Note: this may not be fully standards-compliant on Windows
|
| 1872 |
-
if file_path.startswith('/'):
|
| 1873 |
-
file_url = f"file:///{file_path.lstrip('/')}" # file:///abs/path
|
| 1874 |
-
else:
|
| 1875 |
-
file_url = f"file:///{file_path}"
|
| 1876 |
-
except Exception:
|
| 1877 |
-
file_url = None
|
| 1878 |
-
|
| 1879 |
-
if file_url:
|
| 1880 |
-
video_html = (
|
| 1881 |
-
f"<video controls autoplay muted loop playsinline style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\">"
|
| 1882 |
-
f"<source src=\"{file_url}\" type=\"video/mp4\" />"
|
| 1883 |
-
f"Your browser does not support the video tag."
|
| 1884 |
-
f"</video>"
|
| 1885 |
-
)
|
| 1886 |
-
else:
|
| 1887 |
-
# If a file URL cannot be constructed, signal error to avoid embedding data URIs.
|
| 1888 |
-
return "Error generating video (image-to-video): Could not persist video to a local file."
|
| 1889 |
-
print("[Image2Video] Successfully generated video HTML tag")
|
| 1890 |
return video_html
|
| 1891 |
except Exception as e:
|
| 1892 |
import traceback
|
|
@@ -1898,7 +2044,7 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
|
|
| 1898 |
def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> str:
|
| 1899 |
"""Generate a video from a text prompt using Hugging Face InferenceClient.
|
| 1900 |
|
| 1901 |
-
Returns an HTML <video> tag
|
| 1902 |
"""
|
| 1903 |
try:
|
| 1904 |
print("[Text2Video] Starting video generation from text")
|
|
@@ -1932,41 +2078,33 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
|
|
| 1932 |
)
|
| 1933 |
print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
|
| 1934 |
|
| 1935 |
-
#
|
| 1936 |
-
|
| 1937 |
-
|
| 1938 |
-
|
| 1939 |
-
|
| 1940 |
-
|
| 1941 |
-
|
| 1942 |
-
|
| 1943 |
-
|
| 1944 |
-
|
| 1945 |
-
except Exception:
|
| 1946 |
-
file_size = -1
|
| 1947 |
-
print(f"[Text2Video] Saved video to temp file: {file_path} (size={file_size} bytes)")
|
| 1948 |
-
except Exception as save_exc:
|
| 1949 |
-
print(f"[Text2Video] Warning: could not persist temp video file: {save_exc}")
|
| 1950 |
-
|
| 1951 |
-
# Build file:// URL
|
| 1952 |
-
file_url = None
|
| 1953 |
-
try:
|
| 1954 |
-
if 'file_path' in locals() and file_path:
|
| 1955 |
-
from pathlib import Path
|
| 1956 |
-
file_url = Path(file_path).as_uri()
|
| 1957 |
-
except Exception:
|
| 1958 |
-
file_url = None
|
| 1959 |
-
|
| 1960 |
-
if not file_url:
|
| 1961 |
-
return "Error generating video (text-to-video): Could not persist video to a local file."
|
| 1962 |
-
|
| 1963 |
video_html = (
|
| 1964 |
-
f
|
| 1965 |
-
f"
|
| 1966 |
-
f"
|
| 1967 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 1968 |
)
|
| 1969 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1970 |
return video_html
|
| 1971 |
except Exception as e:
|
| 1972 |
import traceback
|
|
@@ -1978,7 +2116,7 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
|
|
| 1978 |
def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None) -> str:
|
| 1979 |
"""Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
|
| 1980 |
|
| 1981 |
-
|
| 1982 |
Requires ELEVENLABS_API_KEY in the environment.
|
| 1983 |
"""
|
| 1984 |
try:
|
|
@@ -2001,38 +2139,30 @@ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_
|
|
| 2001 |
except Exception as e:
|
| 2002 |
return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
|
| 2003 |
|
| 2004 |
-
#
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
|
| 2008 |
-
|
| 2009 |
-
|
| 2010 |
-
|
| 2011 |
-
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
| 2015 |
-
# Build file URI
|
| 2016 |
-
try:
|
| 2017 |
-
from pathlib import Path
|
| 2018 |
-
file_url = Path(file_path).as_uri()
|
| 2019 |
-
except Exception:
|
| 2020 |
-
if file_path.startswith('/'):
|
| 2021 |
-
file_url = f"file:///{file_path.lstrip('/')}"
|
| 2022 |
-
else:
|
| 2023 |
-
file_url = f"file:///{file_path}"
|
| 2024 |
-
|
| 2025 |
audio_html = (
|
| 2026 |
"<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
|
| 2027 |
" <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px\">"
|
| 2028 |
" <span>🎵 Generated music</span>"
|
| 2029 |
" </div>"
|
| 2030 |
f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
|
| 2031 |
-
f" <source src=\"{
|
| 2032 |
" Your browser does not support the audio element."
|
| 2033 |
" </audio>"
|
| 2034 |
"</div>"
|
| 2035 |
)
|
|
|
|
|
|
|
| 2036 |
return audio_html
|
| 2037 |
except Exception as e:
|
| 2038 |
return f"Error generating music: {str(e)}"
|
|
@@ -2315,15 +2445,50 @@ def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str
|
|
| 2315 |
{REPLACE_END}""")
|
| 2316 |
return '\n\n'.join(blocks)
|
| 2317 |
|
| 2318 |
-
# Otherwise insert after <body>
|
| 2319 |
if '<body' in html_content:
|
| 2320 |
-
|
| 2321 |
-
|
| 2322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2323 |
{insertion_point}
|
| 2324 |
{DIVIDER}
|
| 2325 |
{insertion_point}
|
| 2326 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2327 |
{REPLACE_END}"""
|
| 2328 |
|
| 2329 |
# If no <body>, just append
|
|
@@ -2586,10 +2751,17 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
|
|
| 2586 |
try:
|
| 2587 |
video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id)
|
| 2588 |
if not (video_html_tag or "").startswith("Error"):
|
| 2589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2590 |
else:
|
|
|
|
| 2591 |
blocks_v = ""
|
| 2592 |
-
except Exception:
|
|
|
|
| 2593 |
blocks_v = ""
|
| 2594 |
if not blocks_v:
|
| 2595 |
blocks_v = create_video_replacement_blocks_from_input_image(result, i2v_prompt, input_image_data, session_id=session_id)
|
|
@@ -2622,10 +2794,17 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
|
|
| 2622 |
try:
|
| 2623 |
video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id)
|
| 2624 |
if not (video_html_tag or "").startswith("Error"):
|
| 2625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2626 |
else:
|
|
|
|
| 2627 |
blocks_tv = ""
|
| 2628 |
-
except Exception:
|
|
|
|
| 2629 |
blocks_tv = ""
|
| 2630 |
if not blocks_tv:
|
| 2631 |
blocks_tv = create_video_replacement_blocks_text_to_video(result, t2v_prompt, session_id=session_id)
|
|
|
|
| 69 |
|
| 70 |
Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text. Do NOT add the language name at the top of the code output."""
|
| 71 |
|
| 72 |
+
def validate_video_html(video_html: str) -> bool:
|
| 73 |
+
"""Validate that the video HTML is well-formed and safe to insert."""
|
| 74 |
+
try:
|
| 75 |
+
# Basic checks for video HTML structure
|
| 76 |
+
if not video_html or not video_html.strip():
|
| 77 |
+
return False
|
| 78 |
+
|
| 79 |
+
# Check for required video elements
|
| 80 |
+
if '<video' not in video_html or '</video>' not in video_html:
|
| 81 |
+
return False
|
| 82 |
+
|
| 83 |
+
# Check for proper source tag
|
| 84 |
+
if '<source' not in video_html:
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
# Check for data URI format
|
| 88 |
+
if 'data:video/mp4;base64,' not in video_html:
|
| 89 |
+
return False
|
| 90 |
+
|
| 91 |
+
# Basic HTML structure validation
|
| 92 |
+
video_start = video_html.find('<video')
|
| 93 |
+
video_end = video_html.find('</video>') + 8
|
| 94 |
+
if video_start == -1 or video_end == 7: # 7 means </video> not found
|
| 95 |
+
return False
|
| 96 |
+
|
| 97 |
+
return True
|
| 98 |
+
except Exception:
|
| 99 |
+
return False
|
| 100 |
+
|
| 101 |
def llm_place_media(html_content: str, media_html_tag: str, media_kind: str = "image") -> str:
|
| 102 |
"""Ask a lightweight model to produce search/replace blocks that insert media_html_tag in the best spot.
|
| 103 |
|
|
|
|
| 107 |
client = get_inference_client("Qwen/Qwen3-Coder-480B-A35B-Instruct", "auto")
|
| 108 |
system_prompt = (
|
| 109 |
"You are a code editor. Insert the provided media tag into the given HTML in the most semantically appropriate place.\n"
|
| 110 |
+
"For video elements: prefer replacing placeholder images or inserting in hero sections with proper container divs.\n"
|
| 111 |
+
"For image elements: prefer replacing placeholder images or inserting near related content.\n"
|
| 112 |
+
"CRITICAL: Ensure proper HTML structure - videos should be wrapped in appropriate containers.\n"
|
| 113 |
"Return ONLY search/replace blocks using the exact markers: <<<<<<< SEARCH, =======, >>>>>>> REPLACE.\n"
|
| 114 |
"Do NOT include any commentary. Ensure the SEARCH block matches exact lines from the input.\n"
|
| 115 |
+
"When inserting videos, ensure they are properly contained within semantic HTML elements.\n"
|
| 116 |
)
|
| 117 |
+
# Truncate very long media tags for LLM prompt only to prevent token limits
|
| 118 |
+
truncated_media_tag_for_prompt = media_html_tag
|
| 119 |
+
if len(media_html_tag) > 2000:
|
| 120 |
+
# For very long data URIs, show structure but truncate the data for LLM prompt
|
| 121 |
+
if 'data:video/mp4;base64,' in media_html_tag:
|
| 122 |
+
start_idx = media_html_tag.find('data:video/mp4;base64,')
|
| 123 |
+
end_idx = media_html_tag.find('"', start_idx)
|
| 124 |
+
if start_idx != -1 and end_idx != -1:
|
| 125 |
+
truncated_media_tag_for_prompt = (
|
| 126 |
+
media_html_tag[:start_idx] +
|
| 127 |
+
'data:video/mp4;base64,[TRUNCATED_BASE64_DATA]' +
|
| 128 |
+
media_html_tag[end_idx:]
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
user_payload = (
|
| 132 |
"HTML Document:\n" + html_content + "\n\n" +
|
| 133 |
+
f"Media ({media_kind}):\n" + truncated_media_tag_for_prompt + "\n\n" +
|
| 134 |
"Produce search/replace blocks now."
|
| 135 |
)
|
| 136 |
messages = [
|
|
|
|
| 144 |
temperature=0.2,
|
| 145 |
)
|
| 146 |
text = (completion.choices[0].message.content or "") if completion and completion.choices else ""
|
| 147 |
+
|
| 148 |
+
# Replace any truncated placeholders with the original full media HTML
|
| 149 |
+
if '[TRUNCATED_BASE64_DATA]' in text and 'data:video/mp4;base64,[TRUNCATED_BASE64_DATA]' in truncated_media_tag_for_prompt:
|
| 150 |
+
# Extract the original base64 data from the full media tag
|
| 151 |
+
original_start = media_html_tag.find('data:video/mp4;base64,')
|
| 152 |
+
original_end = media_html_tag.find('"', original_start)
|
| 153 |
+
if original_start != -1 and original_end != -1:
|
| 154 |
+
original_data_uri = media_html_tag[original_start:original_end]
|
| 155 |
+
text = text.replace('data:video/mp4;base64,[TRUNCATED_BASE64_DATA]', original_data_uri)
|
| 156 |
+
|
| 157 |
return text.strip()
|
| 158 |
except Exception as e:
|
| 159 |
print(f"[LLMPlaceMedia] Fallback due to error: {e}")
|
|
|
|
| 1687 |
image.save(buffer, format='PNG')
|
| 1688 |
img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
| 1689 |
return f"data:image/png;base64,{img_str}"
|
| 1690 |
+
|
| 1691 |
+
def compress_video_for_data_uri(video_bytes: bytes, max_size_mb: int = 8) -> bytes:
|
| 1692 |
+
"""Compress video bytes for data URI embedding with size limit"""
|
| 1693 |
+
import subprocess
|
| 1694 |
+
import tempfile
|
| 1695 |
+
import os
|
| 1696 |
+
|
| 1697 |
+
max_size = max_size_mb * 1024 * 1024
|
| 1698 |
+
|
| 1699 |
+
# If already small enough, return as-is
|
| 1700 |
+
if len(video_bytes) <= max_size:
|
| 1701 |
+
return video_bytes
|
| 1702 |
+
|
| 1703 |
+
print(f"[VideoCompress] Video size {len(video_bytes)} bytes exceeds {max_size_mb}MB limit, attempting compression")
|
| 1704 |
+
|
| 1705 |
+
try:
|
| 1706 |
+
# Create temp files
|
| 1707 |
+
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_input:
|
| 1708 |
+
temp_input.write(video_bytes)
|
| 1709 |
+
temp_input_path = temp_input.name
|
| 1710 |
+
|
| 1711 |
+
temp_output_path = temp_input_path.replace('.mp4', '_compressed.mp4')
|
| 1712 |
+
|
| 1713 |
+
try:
|
| 1714 |
+
# Compress with ffmpeg - aggressive settings for small size
|
| 1715 |
+
subprocess.run([
|
| 1716 |
+
'ffmpeg', '-i', temp_input_path,
|
| 1717 |
+
'-vcodec', 'libx264', '-crf', '30', '-preset', 'fast',
|
| 1718 |
+
'-vf', 'scale=480:-1', '-r', '15', # Lower resolution and frame rate
|
| 1719 |
+
'-an', # Remove audio to save space
|
| 1720 |
+
'-y', temp_output_path
|
| 1721 |
+
], check=True, capture_output=True, stderr=subprocess.DEVNULL)
|
| 1722 |
+
|
| 1723 |
+
# Read compressed video
|
| 1724 |
+
with open(temp_output_path, 'rb') as f:
|
| 1725 |
+
compressed_bytes = f.read()
|
| 1726 |
+
|
| 1727 |
+
print(f"[VideoCompress] Compressed from {len(video_bytes)} to {len(compressed_bytes)} bytes")
|
| 1728 |
+
return compressed_bytes
|
| 1729 |
+
|
| 1730 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 1731 |
+
print("[VideoCompress] ffmpeg compression failed, using original video")
|
| 1732 |
+
return video_bytes
|
| 1733 |
+
finally:
|
| 1734 |
+
# Clean up temp files
|
| 1735 |
+
for path in [temp_input_path, temp_output_path]:
|
| 1736 |
+
try:
|
| 1737 |
+
if os.path.exists(path):
|
| 1738 |
+
os.remove(path)
|
| 1739 |
+
except Exception:
|
| 1740 |
+
pass
|
| 1741 |
+
|
| 1742 |
+
except Exception as e:
|
| 1743 |
+
print(f"[VideoCompress] Compression failed: {e}, using original video")
|
| 1744 |
+
return video_bytes
|
| 1745 |
+
|
| 1746 |
+
def compress_audio_for_data_uri(audio_bytes: bytes, max_size_mb: int = 4) -> bytes:
|
| 1747 |
+
"""Compress audio bytes for data URI embedding with size limit"""
|
| 1748 |
+
import subprocess
|
| 1749 |
+
import tempfile
|
| 1750 |
+
import os
|
| 1751 |
+
|
| 1752 |
+
max_size = max_size_mb * 1024 * 1024
|
| 1753 |
+
|
| 1754 |
+
# If already small enough, return as-is
|
| 1755 |
+
if len(audio_bytes) <= max_size:
|
| 1756 |
+
return audio_bytes
|
| 1757 |
+
|
| 1758 |
+
print(f"[AudioCompress] Audio size {len(audio_bytes)} bytes exceeds {max_size_mb}MB limit, attempting compression")
|
| 1759 |
+
|
| 1760 |
+
try:
|
| 1761 |
+
# Create temp files
|
| 1762 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_input:
|
| 1763 |
+
temp_input.write(audio_bytes)
|
| 1764 |
+
temp_input_path = temp_input.name
|
| 1765 |
+
|
| 1766 |
+
temp_output_path = temp_input_path.replace('.wav', '_compressed.mp3')
|
| 1767 |
+
|
| 1768 |
+
try:
|
| 1769 |
+
# Compress with ffmpeg - convert to MP3 with lower bitrate
|
| 1770 |
+
subprocess.run([
|
| 1771 |
+
'ffmpeg', '-i', temp_input_path,
|
| 1772 |
+
'-codec:a', 'libmp3lame', '-b:a', '64k', # Low bitrate MP3
|
| 1773 |
+
'-y', temp_output_path
|
| 1774 |
+
], check=True, capture_output=True, stderr=subprocess.DEVNULL)
|
| 1775 |
+
|
| 1776 |
+
# Read compressed audio
|
| 1777 |
+
with open(temp_output_path, 'rb') as f:
|
| 1778 |
+
compressed_bytes = f.read()
|
| 1779 |
+
|
| 1780 |
+
print(f"[AudioCompress] Compressed from {len(audio_bytes)} to {len(compressed_bytes)} bytes")
|
| 1781 |
+
return compressed_bytes
|
| 1782 |
+
|
| 1783 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 1784 |
+
print("[AudioCompress] ffmpeg compression failed, using original audio")
|
| 1785 |
+
return audio_bytes
|
| 1786 |
+
finally:
|
| 1787 |
+
# Clean up temp files
|
| 1788 |
+
for path in [temp_input_path, temp_output_path]:
|
| 1789 |
+
try:
|
| 1790 |
+
if os.path.exists(path):
|
| 1791 |
+
os.remove(path)
|
| 1792 |
+
except Exception:
|
| 1793 |
+
pass
|
| 1794 |
+
|
| 1795 |
+
except Exception as e:
|
| 1796 |
+
print(f"[AudioCompress] Compression failed: {e}, using original audio")
|
| 1797 |
+
return audio_bytes
|
| 1798 |
+
|
| 1799 |
def generate_image_with_qwen(prompt: str, image_index: int = 0) -> str:
|
| 1800 |
"""Generate image using Qwen image model via Hugging Face InferenceClient with optimized data URL"""
|
| 1801 |
try:
|
|
|
|
| 2006 |
)
|
| 2007 |
print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
|
| 2008 |
|
| 2009 |
+
# Convert video to compressed data URI for deployment compatibility
|
| 2010 |
+
import base64
|
| 2011 |
+
|
| 2012 |
+
# Compress video for data URI embedding
|
| 2013 |
+
compressed_video_bytes = compress_video_for_data_uri(video_bytes, max_size_mb=8)
|
| 2014 |
+
|
| 2015 |
+
# Create data URI
|
| 2016 |
+
video_b64 = base64.b64encode(compressed_video_bytes).decode()
|
| 2017 |
+
data_uri = f"data:video/mp4;base64,{video_b64}"
|
| 2018 |
+
|
| 2019 |
+
video_html = (
|
| 2020 |
+
f'<video controls autoplay muted loop playsinline '
|
| 2021 |
+
f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
|
| 2022 |
+
f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
|
| 2023 |
+
f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
|
| 2024 |
+
f'<source src="{data_uri}" type="video/mp4" />'
|
| 2025 |
+
f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
|
| 2026 |
+
f'</video>'
|
| 2027 |
+
)
|
| 2028 |
+
|
| 2029 |
+
print(f"[Image2Video] Successfully generated video HTML tag with data URI ({len(compressed_video_bytes)} bytes)")
|
| 2030 |
+
|
| 2031 |
+
# Validate the generated video HTML
|
| 2032 |
+
if not validate_video_html(video_html):
|
| 2033 |
+
print("[Image2Video] Generated video HTML failed validation")
|
| 2034 |
+
return "Error: Generated video HTML is malformed"
|
| 2035 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2036 |
return video_html
|
| 2037 |
except Exception as e:
|
| 2038 |
import traceback
|
|
|
|
| 2044 |
def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> str:
|
| 2045 |
"""Generate a video from a text prompt using Hugging Face InferenceClient.
|
| 2046 |
|
| 2047 |
+
Returns an HTML <video> tag with compressed data URI for deployment compatibility.
|
| 2048 |
"""
|
| 2049 |
try:
|
| 2050 |
print("[Text2Video] Starting video generation from text")
|
|
|
|
| 2078 |
)
|
| 2079 |
print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
|
| 2080 |
|
| 2081 |
+
# Convert video to compressed data URI for deployment compatibility
|
| 2082 |
+
import base64
|
| 2083 |
+
|
| 2084 |
+
# Compress video for data URI embedding
|
| 2085 |
+
compressed_video_bytes = compress_video_for_data_uri(video_bytes, max_size_mb=8)
|
| 2086 |
+
|
| 2087 |
+
# Create data URI
|
| 2088 |
+
video_b64 = base64.b64encode(compressed_video_bytes).decode()
|
| 2089 |
+
data_uri = f"data:video/mp4;base64,{video_b64}"
|
| 2090 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2091 |
video_html = (
|
| 2092 |
+
f'<video controls autoplay muted loop playsinline '
|
| 2093 |
+
f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
|
| 2094 |
+
f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
|
| 2095 |
+
f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
|
| 2096 |
+
f'<source src="{data_uri}" type="video/mp4" />'
|
| 2097 |
+
f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
|
| 2098 |
+
f'</video>'
|
| 2099 |
)
|
| 2100 |
+
|
| 2101 |
+
print(f"[Text2Video] Successfully generated video HTML tag with data URI ({len(compressed_video_bytes)} bytes)")
|
| 2102 |
+
|
| 2103 |
+
# Validate the generated video HTML
|
| 2104 |
+
if not validate_video_html(video_html):
|
| 2105 |
+
print("[Text2Video] Generated video HTML failed validation")
|
| 2106 |
+
return "Error: Generated video HTML is malformed"
|
| 2107 |
+
|
| 2108 |
return video_html
|
| 2109 |
except Exception as e:
|
| 2110 |
import traceback
|
|
|
|
| 2116 |
def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None) -> str:
|
| 2117 |
"""Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
|
| 2118 |
|
| 2119 |
+
Returns compressed data URI for deployment compatibility.
|
| 2120 |
Requires ELEVENLABS_API_KEY in the environment.
|
| 2121 |
"""
|
| 2122 |
try:
|
|
|
|
| 2139 |
except Exception as e:
|
| 2140 |
return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
|
| 2141 |
|
| 2142 |
+
# Convert audio to compressed data URI for deployment compatibility
|
| 2143 |
+
import base64
|
| 2144 |
+
|
| 2145 |
+
# Compress audio for data URI embedding
|
| 2146 |
+
compressed_audio_bytes = compress_audio_for_data_uri(resp.content, max_size_mb=4)
|
| 2147 |
+
|
| 2148 |
+
# Create data URI - use appropriate MIME type based on compression
|
| 2149 |
+
audio_format = "audio/mpeg" if len(compressed_audio_bytes) < len(resp.content) else "audio/wav"
|
| 2150 |
+
audio_b64 = base64.b64encode(compressed_audio_bytes).decode()
|
| 2151 |
+
data_uri = f"data:{audio_format};base64,{audio_b64}"
|
| 2152 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2153 |
audio_html = (
|
| 2154 |
"<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
|
| 2155 |
" <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px\">"
|
| 2156 |
" <span>🎵 Generated music</span>"
|
| 2157 |
" </div>"
|
| 2158 |
f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
|
| 2159 |
+
f" <source src=\"{data_uri}\" type=\"{audio_format}\" />"
|
| 2160 |
" Your browser does not support the audio element."
|
| 2161 |
" </audio>"
|
| 2162 |
"</div>"
|
| 2163 |
)
|
| 2164 |
+
|
| 2165 |
+
print(f"[Music] Successfully generated music HTML tag with data URI ({len(compressed_audio_bytes)} bytes)")
|
| 2166 |
return audio_html
|
| 2167 |
except Exception as e:
|
| 2168 |
return f"Error generating music: {str(e)}"
|
|
|
|
| 2445 |
{REPLACE_END}""")
|
| 2446 |
return '\n\n'.join(blocks)
|
| 2447 |
|
| 2448 |
+
# Otherwise insert after <body> with proper container
|
| 2449 |
if '<body' in html_content:
|
| 2450 |
+
body_start = html_content.find('<body')
|
| 2451 |
+
body_end = html_content.find('>', body_start) + 1
|
| 2452 |
+
opening_body_tag = html_content[body_start:body_end]
|
| 2453 |
+
|
| 2454 |
+
# Look for existing container elements to insert into
|
| 2455 |
+
body_content_start = body_end
|
| 2456 |
+
|
| 2457 |
+
# Try to find a good insertion point within existing content structure
|
| 2458 |
+
patterns_to_try = [
|
| 2459 |
+
r'<main[^>]*>',
|
| 2460 |
+
r'<section[^>]*class="[^"]*hero[^"]*"[^>]*>',
|
| 2461 |
+
r'<div[^>]*class="[^"]*container[^"]*"[^>]*>',
|
| 2462 |
+
r'<header[^>]*>',
|
| 2463 |
+
]
|
| 2464 |
+
|
| 2465 |
+
insertion_point = None
|
| 2466 |
+
for pattern in patterns_to_try:
|
| 2467 |
+
import re
|
| 2468 |
+
match = re.search(pattern, html_content[body_content_start:], re.IGNORECASE)
|
| 2469 |
+
if match:
|
| 2470 |
+
match_end = body_content_start + match.end()
|
| 2471 |
+
# Find the end of this tag
|
| 2472 |
+
tag_content = html_content[body_content_start + match.start():match_end]
|
| 2473 |
+
insertion_point = html_content[:match_end] + '\n '
|
| 2474 |
+
break
|
| 2475 |
+
|
| 2476 |
+
if not insertion_point:
|
| 2477 |
+
# Fallback to right after body tag with container div
|
| 2478 |
+
insertion_point = html_content[:body_end] + '\n '
|
| 2479 |
+
video_with_container = f'<div class="video-container" style="margin: 20px 0; text-align: center;">\n {video_html}\n </div>'
|
| 2480 |
+
return f"""{SEARCH_START}
|
| 2481 |
{insertion_point}
|
| 2482 |
{DIVIDER}
|
| 2483 |
{insertion_point}
|
| 2484 |
+
{video_with_container}
|
| 2485 |
+
{REPLACE_END}"""
|
| 2486 |
+
else:
|
| 2487 |
+
return f"""{SEARCH_START}
|
| 2488 |
+
{insertion_point}
|
| 2489 |
+
{DIVIDER}
|
| 2490 |
+
{insertion_point}
|
| 2491 |
+
{video_html}
|
| 2492 |
{REPLACE_END}"""
|
| 2493 |
|
| 2494 |
# If no <body>, just append
|
|
|
|
| 2751 |
try:
|
| 2752 |
video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id)
|
| 2753 |
if not (video_html_tag or "").startswith("Error"):
|
| 2754 |
+
# Validate video HTML before attempting placement
|
| 2755 |
+
if validate_video_html(video_html_tag):
|
| 2756 |
+
blocks_v = llm_place_media(result, video_html_tag, media_kind="video")
|
| 2757 |
+
else:
|
| 2758 |
+
print("[MediaApply] Generated video HTML failed validation, skipping LLM placement")
|
| 2759 |
+
blocks_v = ""
|
| 2760 |
else:
|
| 2761 |
+
print(f"[MediaApply] Video generation failed: {video_html_tag}")
|
| 2762 |
blocks_v = ""
|
| 2763 |
+
except Exception as e:
|
| 2764 |
+
print(f"[MediaApply] Exception during image-to-video generation: {str(e)}")
|
| 2765 |
blocks_v = ""
|
| 2766 |
if not blocks_v:
|
| 2767 |
blocks_v = create_video_replacement_blocks_from_input_image(result, i2v_prompt, input_image_data, session_id=session_id)
|
|
|
|
| 2794 |
try:
|
| 2795 |
video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id)
|
| 2796 |
if not (video_html_tag or "").startswith("Error"):
|
| 2797 |
+
# Validate video HTML before attempting placement
|
| 2798 |
+
if validate_video_html(video_html_tag):
|
| 2799 |
+
blocks_tv = llm_place_media(result, video_html_tag, media_kind="video")
|
| 2800 |
+
else:
|
| 2801 |
+
print("[MediaApply] Generated video HTML failed validation, skipping LLM placement")
|
| 2802 |
+
blocks_tv = ""
|
| 2803 |
else:
|
| 2804 |
+
print(f"[MediaApply] Video generation failed: {video_html_tag}")
|
| 2805 |
blocks_tv = ""
|
| 2806 |
+
except Exception as e:
|
| 2807 |
+
print(f"[MediaApply] Exception during text-to-video generation: {str(e)}")
|
| 2808 |
blocks_tv = ""
|
| 2809 |
if not blocks_tv:
|
| 2810 |
blocks_tv = create_video_replacement_blocks_text_to_video(result, t2v_prompt, session_id=session_id)
|