Spaces:
Sleeping
Sleeping
| import os | |
| # Force Streamlit to use a writable config directory | |
| os.environ["XDG_CONFIG_HOME"] = "/tmp" | |
| os.environ["STREAMLIT_HOME"] = "/tmp" | |
| os.makedirs("/tmp/.streamlit", exist_ok=True) | |
| import streamlit as st | |
| import pandas as pd | |
| from ocr_llm_utils import run_ocr_with_gcv, extract_table_from_text, extract_markdown_table | |
| import tempfile | |
| from PIL import Image | |
| import io | |
| # Set wide layout | |
| st.set_page_config(page_title="Invoice Processor", layout="wide") | |
| # Sidebar navigation | |
| st.sidebar.title("Navigation") | |
| page = st.sidebar.radio("Go to", ["Charges Incurred", "Invoices to Table"]) | |
| # Charges Incurred page (placeholder) | |
| if page == "Charges Incurred": | |
| st.title("Charges Incurred") | |
| col1, col2 = st.columns([1, 1]) # col1 = processing, col2 = image | |
| with col1: | |
| st.subheader("1οΈβ£ Google Cloud Vision OCR Cost") | |
| st.image("assets/gcv_ocr_costs.png", caption="GCV OCR Pricing", use_container_width=True) | |
| with col2: | |
| st.subheader("2οΈβ£ Groq API Cost (LLaMA 4 Scout)") | |
| st.image("assets/groq_api_costs.png", caption="Groq LLM Pricing", use_container_width=True) | |
| st.subheader("3οΈβ£ Combined Cost Summary") | |
| st.image("assets/cost_summary_from_chatgpt.png", caption="Total Estimated Cost for 1000 Invoices", use_container_width=True) | |
| # Invoices to Table | |
| elif page == "Invoices to Table": | |
| st.title("Invoice Table Extractor") | |
| st.sidebar.markdown("### π’ Max Tokens for LLM") | |
| selected_token_limit = st.sidebar.radio( | |
| "Choose max tokens:", | |
| options=[512, 1024, 2048, 4096], | |
| index=3, | |
| key="token_selector_sidebar" | |
| ) | |
| # Model selector | |
| st.sidebar.markdown("### π€ Choose LLM Model") | |
| selected_model = st.sidebar.radio( | |
| "Which model to use?", | |
| options=[ | |
| "meta-llama/llama-4-maverick-17b-128e-instruct", | |
| "meta-llama/llama-4-scout-17b-16e-instruct", | |
| "deepseek-r1-distill-llama-70b", | |
| "llama-3.3-70b-versatile", | |
| "gemma2-9b-it" | |
| ], | |
| index=1, | |
| key="model_selector_sidebar" | |
| ) | |
| uploaded_file = st.file_uploader("π€ Upload Invoice Image", type=["jpg", "jpeg", "png"]) | |
| col1, col2 = st.columns([1.5, 1]) # col1 = processing, col2 = image | |
| if uploaded_file is not None: | |
| # Save image temporarily | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| image_path = temp_file.name | |
| with col2: | |
| st.subheader("πΌοΈ Invoice Preview") | |
| st.image(uploaded_file, use_container_width=True) | |
| with col1: | |
| with st.spinner("π Running OCR..."): | |
| text = run_ocr_with_gcv(image_path) | |
| with st.expander("π Extracted Text"): | |
| st.text_area("OCR Text", text, height=300) | |
| with st.spinner("π Extracting Table..."): | |
| table_md = extract_table_from_text(text,max_tokens=selected_token_limit,model=selected_model) | |
| if st.button("π§ Parse Table"): | |
| try: | |
| df = extract_markdown_table(table_md) | |
| # β Store it freshly (overwrite old one if it exists) | |
| st.session_state["parsed_table_df"] = df | |
| st.success("β Table parsed successfully. You can now edit it.") | |
| except Exception as e: | |
| st.error(f"β Parsing failed: {e}") | |
| if "parsed_table_df" in st.session_state: | |
| st.subheader("βοΈ Editable Parsed Table") | |
| edited_df = st.data_editor( | |
| st.session_state["parsed_table_df"], | |
| num_rows="dynamic", | |
| use_container_width=True, | |
| key="invoice_editor" | |
| ) | |
| # Update session state only after editing | |
| st.session_state["parsed_table_df"] = edited_df | |
| # Export options | |
| import io | |
| buffer = io.BytesIO() | |
| edited_df.to_excel(buffer, index=False) | |
| buffer.seek(0) | |
| st.download_button( | |
| label="π₯ Download Edited Table as Excel", | |
| data=buffer, | |
| file_name="edited_invoice_table.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| if st.button("π Reset Table"): | |
| st.session_state.pop("parsed_table_df", None) | |
| st.success("Table has been cleared. You can parse again.") | |