Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ st.write("Upload an image and watch as it’s captioned, turned into a short sto
|
|
| 9 |
|
| 10 |
@st.cache_resource
|
| 11 |
def load_captioner():
|
| 12 |
-
return pipeline("image-
|
| 13 |
|
| 14 |
@st.cache_resource
|
| 15 |
def load_story_gen():
|
|
@@ -18,19 +18,19 @@ def load_story_gen():
|
|
| 18 |
captioner = load_captioner()
|
| 19 |
story_gen = load_story_gen()
|
| 20 |
|
| 21 |
-
# 1) Upload
|
| 22 |
uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
|
| 23 |
if uploaded:
|
| 24 |
img = Image.open(uploaded)
|
| 25 |
st.image(img, use_column_width=True)
|
| 26 |
|
| 27 |
-
# 2) Caption
|
| 28 |
if "caption" not in st.session_state:
|
| 29 |
with st.spinner("Generating caption…"):
|
| 30 |
st.session_state.caption = captioner(img)[0]["generated_text"]
|
| 31 |
st.write("**Caption:**", st.session_state.caption)
|
| 32 |
|
| 33 |
-
# 3) Story
|
| 34 |
if "story" not in st.session_state:
|
| 35 |
with st.spinner("Spinning up a story…"):
|
| 36 |
out = story_gen(
|
|
@@ -43,15 +43,15 @@ if uploaded:
|
|
| 43 |
st.session_state.story = out[0]["generated_text"]
|
| 44 |
st.write("**Story:**", st.session_state.story)
|
| 45 |
|
| 46 |
-
# 4) Pre-generate
|
| 47 |
-
if "
|
| 48 |
with st.spinner("Generating audio…"):
|
| 49 |
tts = gTTS(text=st.session_state.story, lang="en")
|
| 50 |
buf = io.BytesIO()
|
| 51 |
tts.write_to_fp(buf)
|
| 52 |
-
buf.
|
| 53 |
-
st.session_state.audio_buffer = buf.read()
|
| 54 |
|
| 55 |
# 5) Play on demand
|
| 56 |
if st.button("🔊 Play Story Audio"):
|
| 57 |
-
|
|
|
|
|
|
| 9 |
|
| 10 |
@st.cache_resource
|
| 11 |
def load_captioner():
|
| 12 |
+
return pipeline("image-captioning", model="nlpconnect/vit-gpt2-image-captioning")
|
| 13 |
|
| 14 |
@st.cache_resource
|
| 15 |
def load_story_gen():
|
|
|
|
| 18 |
captioner = load_captioner()
|
| 19 |
story_gen = load_story_gen()
|
| 20 |
|
| 21 |
+
# 1) Upload
|
| 22 |
uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
|
| 23 |
if uploaded:
|
| 24 |
img = Image.open(uploaded)
|
| 25 |
st.image(img, use_column_width=True)
|
| 26 |
|
| 27 |
+
# 2) Caption
|
| 28 |
if "caption" not in st.session_state:
|
| 29 |
with st.spinner("Generating caption…"):
|
| 30 |
st.session_state.caption = captioner(img)[0]["generated_text"]
|
| 31 |
st.write("**Caption:**", st.session_state.caption)
|
| 32 |
|
| 33 |
+
# 3) Story
|
| 34 |
if "story" not in st.session_state:
|
| 35 |
with st.spinner("Spinning up a story…"):
|
| 36 |
out = story_gen(
|
|
|
|
| 43 |
st.session_state.story = out[0]["generated_text"]
|
| 44 |
st.write("**Story:**", st.session_state.story)
|
| 45 |
|
| 46 |
+
# 4) Pre-generate raw MP3 bytes
|
| 47 |
+
if "audio_bytes" not in st.session_state:
|
| 48 |
with st.spinner("Generating audio…"):
|
| 49 |
tts = gTTS(text=st.session_state.story, lang="en")
|
| 50 |
buf = io.BytesIO()
|
| 51 |
tts.write_to_fp(buf)
|
| 52 |
+
st.session_state.audio_bytes = buf.getvalue()
|
|
|
|
| 53 |
|
| 54 |
# 5) Play on demand
|
| 55 |
if st.button("🔊 Play Story Audio"):
|
| 56 |
+
audio_buffer = io.BytesIO(st.session_state.audio_bytes)
|
| 57 |
+
st.audio(audio_buffer, format="audio/mp3")
|