Spaces:

Fer14
/

coffee_machine_captioning

Sleeping

vicgalle commited on Jun 11, 2024

Commit

05dc25c

1 Parent(s): be1d27c

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,15 @@ from tqdm import tqdm
 st.title("Coffe machine captioning app")
-uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-model_id = "Fer14/paligemma_coffe_machine_caption"
-model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
-processor = PaliGemmaProcessor.from_pretrained(model_id)
 # Instructions for Tesseract OCR
@@ -19,11 +21,15 @@ st.sidebar.title("Instructions")
 st.sidebar.write(
     """
     1. Upload an image using the file uploader.
-    2. Wait for the app to process and extract text from the image.
-    3. The extracted text will be displayed in the text area.
     """
 )
 prompt  = (
             f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
@@ -51,15 +57,13 @@ if uploaded_image is not None:
         )
-    st.write("Generating caption for the image...")
-    with tqdm(total=100) as pbar:
         output = model.generate(**inputs, max_length=1000)
-        pbar.update(100)
     out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
     # Display the extracted text
-    st.text_area("Coffe machine description", out, height=300)

 st.title("Coffe machine captioning app")
+with st.spinner('Loading model and tokenizer...'):
+    model_id = "Fer14/paligemma_coffe_machine_caption"
+    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+    processor = PaliGemmaProcessor.from_pretrained(model_id)
+st.success('Model loaded!')
 # Instructions for Tesseract OCR
 st.sidebar.write(
     """
     1. Upload an image using the file uploader.
+    2. Wait for the app to process and generate the caption.
+    3. The caption will be displayed in the text area.
+    4. Enjoy your caption!
     """
 )
+uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 prompt  = (
             f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
         )
+    with st.spinner('Generating caption...'):
         output = model.generate(**inputs, max_length=1000)
     out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
     # Display the extracted text
+    st.text_area("Coffe machine caption", out, height=300)