Spaces:
Sleeping
Sleeping
update app
Browse files
app.py
CHANGED
|
@@ -5,13 +5,15 @@ from tqdm import tqdm
|
|
| 5 |
|
| 6 |
st.title("Coffe machine captioning app")
|
| 7 |
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
|
| 16 |
|
| 17 |
# Instructions for Tesseract OCR
|
|
@@ -19,11 +21,15 @@ st.sidebar.title("Instructions")
|
|
| 19 |
st.sidebar.write(
|
| 20 |
"""
|
| 21 |
1. Upload an image using the file uploader.
|
| 22 |
-
2. Wait for the app to process and
|
| 23 |
-
3. The
|
|
|
|
| 24 |
"""
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
prompt = (
|
| 29 |
f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
|
|
@@ -51,15 +57,13 @@ if uploaded_image is not None:
|
|
| 51 |
)
|
| 52 |
|
| 53 |
|
| 54 |
-
st.
|
| 55 |
-
with tqdm(total=100) as pbar:
|
| 56 |
output = model.generate(**inputs, max_length=1000)
|
| 57 |
-
pbar.update(100)
|
| 58 |
|
| 59 |
out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
|
| 60 |
|
| 61 |
# Display the extracted text
|
| 62 |
-
st.text_area("Coffe machine
|
| 63 |
|
| 64 |
|
| 65 |
|
|
|
|
| 5 |
|
| 6 |
st.title("Coffe machine captioning app")
|
| 7 |
|
| 8 |
+
with st.spinner('Loading model and tokenizer...'):
|
| 9 |
|
| 10 |
+
model_id = "Fer14/paligemma_coffe_machine_caption"
|
| 11 |
|
| 12 |
+
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
|
| 13 |
+
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
| 14 |
|
| 15 |
+
st.success('Model loaded!')
|
| 16 |
+
|
| 17 |
|
| 18 |
|
| 19 |
# Instructions for Tesseract OCR
|
|
|
|
| 21 |
st.sidebar.write(
|
| 22 |
"""
|
| 23 |
1. Upload an image using the file uploader.
|
| 24 |
+
2. Wait for the app to process and generate the caption.
|
| 25 |
+
3. The caption will be displayed in the text area.
|
| 26 |
+
4. Enjoy your caption!
|
| 27 |
"""
|
| 28 |
)
|
| 29 |
|
| 30 |
+
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
| 31 |
+
|
| 32 |
+
|
| 33 |
|
| 34 |
prompt = (
|
| 35 |
f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
|
|
|
|
| 57 |
)
|
| 58 |
|
| 59 |
|
| 60 |
+
with st.spinner('Generating caption...'):
|
|
|
|
| 61 |
output = model.generate(**inputs, max_length=1000)
|
|
|
|
| 62 |
|
| 63 |
out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
|
| 64 |
|
| 65 |
# Display the extracted text
|
| 66 |
+
st.text_area("Coffe machine caption", out, height=300)
|
| 67 |
|
| 68 |
|
| 69 |
|