Spaces:

Fanny1366
/

assignment

Sleeping

App Files Files Community

Fanny1366 commited on Mar 9

Commit

8d2615a

verified ·

1 Parent(s): cfac1ab

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -13

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import streamlit as st
 from transformers import pipeline
 from gtts import gTTS
 import os
-# Function: Image to Text
 def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
     return text
-# Function: Text to Story (Placeholder)
 def text2story(text):
-    story_text = text  # Placeholder for now
     return story_text
-# Function: Text to Audio
 def text2audio(story_text):
     # Convert text to audio using gTTS
     tts = gTTS(story_text, lang="en")
@@ -22,32 +25,42 @@ def text2audio(story_text):
     tts.save(audio_file)
     return audio_file
-# Streamlit App
-st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 st.header("Turn Your Image to Audio Story")
 uploaded_file = st.file_uploader("Select an Image...")
 if uploaded_file is not None:
     print(uploaded_file)
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
-    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
-    # Stage 1: Image to Text
     st.text('Processing img2text...')
     scenario = img2text(uploaded_file.name)
     st.write(scenario)
-    # Stage 2: Text to Story
     st.text('Generating a story...')
     story = text2story(scenario)
     st.write(story)
-    # Stage 3: Story to Audio
     st.text('Generating audio data...')
-    audio_file = text2audio(story)
     # Play button
     if st.button("Play Audio"):
-        st.audio(audio_file, format="audio/wav")

+# import part
 import streamlit as st
 from transformers import pipeline
 from gtts import gTTS
 import os
+# function part
+# img2text
 def img2text(url):
+    image_to_text_model = pipeline("image-to-text",
+                                   model="Salesforce/blip-image-captioning-base")
     text = image_to_text_model(url)[0]["generated_text"]
     return text
+# text2story
 def text2story(text):
+    story_text = "pipeline("text-generation", model="perplexity-ai/r1-1776", trust_remote_code=True)"   # to be completed
     return story_text
+# text2audio
 def text2audio(story_text):
     # Convert text to audio using gTTS
     tts = gTTS(story_text, lang="en")
     tts.save(audio_file)
     return audio_file
+# main part
+st.set_page_config(page_title="Your Image to Audio Story",
+                   page_icon="🦜") # prepare configuration
 st.header("Turn Your Image to Audio Story")
+# Upload image
 uploaded_file = st.file_uploader("Select an Image...")
+# If it is none, skip all the following things
 if uploaded_file is not None:
     print(uploaded_file)
     bytes_data = uploaded_file.getvalue()
     with open(uploaded_file.name, "wb") as file:
         file.write(bytes_data)
+    st.image(uploaded_file, caption="Uploaded Image",
+             use_column_width=True)
+    #Stage 1: Image to Text
     st.text('Processing img2text...')
     scenario = img2text(uploaded_file.name)
     st.write(scenario)
+    #Stage 2: Text to Story
     st.text('Generating a story...')
     story = text2story(scenario)
     st.write(story)
+    #Stage 3: Story to Audio data
     st.text('Generating audio data...')
+    audio_data =text2audio(story)
     # Play button
     if st.button("Play Audio"):
+        st.audio(audio_data['audio'],
+                    format="audio/wav",
+                    start_time=0,
+                    sample_rate = audio_data['sampling_rate'])
+        st.audio("kids_playing_audio.wav")