Spaces:

RameshJ
/

Invoices_to_Table

Sleeping

App Files Files Community

Invoices_to_Table / app.py

RameshJ

Update app.py

12e951c verified 6 months ago

raw

history blame contribute delete

4.73 kB

	import os

	# Force Streamlit to use a writable config directory
	os.environ["XDG_CONFIG_HOME"] = "/tmp"
	os.environ["STREAMLIT_HOME"] = "/tmp"
	os.makedirs("/tmp/.streamlit", exist_ok=True)

	import streamlit as st
	import pandas as pd
	from ocr_llm_utils import run_ocr_with_gcv, extract_table_from_text, extract_markdown_table
	import tempfile
	from PIL import Image
	import io

	# Set wide layout
	st.set_page_config(page_title="Invoice Processor", layout="wide")

	# Sidebar navigation
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to", ["Charges Incurred", "Invoices to Table"])

	# Charges Incurred page (placeholder)
	if page == "Charges Incurred":
	st.title("Charges Incurred")

	col1, col2 = st.columns([1, 1]) # col1 = processing, col2 = image
	with col1:
	st.subheader("1️⃣ Google Cloud Vision OCR Cost")
	st.image("assets/gcv_ocr_costs.png", caption="GCV OCR Pricing", use_container_width=True)
	with col2:
	st.subheader("2️⃣ Groq API Cost (LLaMA 4 Scout)")
	st.image("assets/groq_api_costs.png", caption="Groq LLM Pricing", use_container_width=True)

	st.subheader("3️⃣ Combined Cost Summary")
	st.image("assets/cost_summary_from_chatgpt.png", caption="Total Estimated Cost for 1000 Invoices", use_container_width=True)

	# Invoices to Table
	elif page == "Invoices to Table":
	st.title("Invoice Table Extractor")

	st.sidebar.markdown("### 🔢 Max Tokens for LLM")
	selected_token_limit = st.sidebar.radio(
	"Choose max tokens:",
	options=[512, 1024, 2048, 4096],
	index=3,
	key="token_selector_sidebar"
	)
	# Model selector
	st.sidebar.markdown("### 🤖 Choose LLM Model")
	selected_model = st.sidebar.radio(
	"Which model to use?",
	options=[
	"meta-llama/llama-4-maverick-17b-128e-instruct",
	"meta-llama/llama-4-scout-17b-16e-instruct",
	"deepseek-r1-distill-llama-70b",
	"llama-3.3-70b-versatile",
	"gemma2-9b-it"
	],
	index=1,
	key="model_selector_sidebar"
	)

	uploaded_file = st.file_uploader("📤 Upload Invoice Image", type=["jpg", "jpeg", "png"])
	col1, col2 = st.columns([1.5, 1]) # col1 = processing, col2 = image

	if uploaded_file is not None:
	# Save image temporarily
	with tempfile.NamedTemporaryFile(delete=False) as temp_file:
	temp_file.write(uploaded_file.read())
	image_path = temp_file.name

	with col2:
	st.subheader("🖼️ Invoice Preview")
	st.image(uploaded_file, use_container_width=True)


	with col1:
	with st.spinner("🔍 Running OCR..."):
	text = run_ocr_with_gcv(image_path)

	with st.expander("📝 Extracted Text"):
	st.text_area("OCR Text", text, height=300)

	with st.spinner("📊 Extracting Table..."):
	table_md = extract_table_from_text(text,max_tokens=selected_token_limit,model=selected_model)

	if st.button("🧠 Parse Table"):
	try:
	df = extract_markdown_table(table_md)

	# ✅ Store it freshly (overwrite old one if it exists)
	st.session_state["parsed_table_df"] = df
	st.success("✅ Table parsed successfully. You can now edit it.")
	except Exception as e:
	st.error(f"❌ Parsing failed: {e}")

	if "parsed_table_df" in st.session_state:
	st.subheader("✏️ Editable Parsed Table")

	edited_df = st.data_editor(
	st.session_state["parsed_table_df"],
	num_rows="dynamic",
	use_container_width=True,
	key="invoice_editor"
	)

	# Update session state only after editing
	st.session_state["parsed_table_df"] = edited_df

	# Export options
	import io
	buffer = io.BytesIO()
	edited_df.to_excel(buffer, index=False)
	buffer.seek(0)

	st.download_button(
	label="📥 Download Edited Table as Excel",
	data=buffer,
	file_name="edited_invoice_table.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	if st.button("🔄 Reset Table"):
	st.session_state.pop("parsed_table_df", None)
	st.success("Table has been cleared. You can parse again.")