Spaces:

Fanny1366
/

assignment

Sleeping

App Files Files Community

Fanny1366 commited on Mar 9, 2025

Commit

c8359bd

verified ·

1 Parent(s): 85a0c1a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -19

app.py CHANGED Viewed

@@ -1,45 +1,51 @@
 # import part
 import streamlit as st
 from transformers import pipeline
-import soundfile as sf
-import numpy as np
-import tempfile
 # function part
 # img2text
 def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
     return text
 # text2story
 def text2story(text):
-    story_text_model = pipeline("text-generation", model="google/gemma-2-9b-it")
-    story = story_text_model(text, max_length=150)[0]['generated_text']
-    return story
 # text2audio
 def text2audio(story_text):
-    tts_model = pipeline("text-to-speech", model="tts_models/en/ljspeech/tacotron2")
-    audio_data = tts_model(story_text)
-    # Save audio to a temporary file
-    audio_filename = tempfile.mktemp(suffix=".wav")
-    sf.write(audio_filename, audio_data['audio'], audio_data['sampling_rate'])
-    return audio_filename
 # main part
-st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 st.header("Turn Your Image to Audio Story")
 uploaded_file = st.file_uploader("Select an Image...")
 if uploaded_file is not None:
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
-    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
     # Stage 1: Image to Text
     st.text('Processing img2text...')
@@ -53,8 +59,8 @@ if uploaded_file is not None:
     # Stage 3: Story to Audio data
     st.text('Generating audio data...')
-    audio_filename = text2audio(story)
     # Play button
     if st.button("Play Audio"):
-        st.audio(audio_filename, format="audio/wav")

 # import part
 import streamlit as st
 from transformers import pipeline
+from gtts import gTTS
+import io
 # function part
 # img2text
 def img2text(url):
+    image_to_text_model = pipeline("image-to-text",
+                                   model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
     return text
 # text2story
 def text2story(text):
+    story_pipeline = pipeline("text-generation", model="facebook/opt-125m")
+    # 调用 pipeline 生成故事文本
+    result = story_pipeline(text, max_length=200, num_return_sequences=1)
+    story_text = result[0]['generated_text']
+    return story_text
 # text2audio
 def text2audio(story_text):
+    # 使用 gTTS 库将文本转换为音频
+    tts = gTTS(text=story_text, lang='en')
+    # 创建一个内存中的字节流对象，用于存储音频数据
+    audio_file = io.BytesIO()
+    # 将音频数据写入字节流
+    tts.write_to_fp(audio_file)
+    # 将文件指针移动到文件开头，以便后续读取
+    audio_file.seek(0)
+    return audio_file
 # main part
+st.set_page_config(page_title="Your Image to Audio Story",
+                   page_icon="🦜")
 st.header("Turn Your Image to Audio Story")
 uploaded_file = st.file_uploader("Select an Image...")
 if uploaded_file is not None:
+    print(uploaded_file)
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
+    st.image(uploaded_file, caption="Uploaded Image",
+             use_column_width=True)
     # Stage 1: Image to Text
     st.text('Processing img2text...')
     # Stage 3: Story to Audio data
     st.text('Generating audio data...')
+    audio_data = text2audio(story)
     # Play button
     if st.button("Play Audio"):
+        st.audio(audio_data, format="audio/mpeg")