MSU576 commited on
Commit
0f7ab73
Β·
verified Β·
1 Parent(s): 2950fbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -252
app.py CHANGED
@@ -1418,253 +1418,53 @@ def locator_ui():
1418
  # - Final UI glue: Reports page and main app routing
1419
  # -------------------------------------------------------
1420
 
1421
- import os
1422
- import io
1423
- import zipfile
1424
- import json
1425
- import tempfile
1426
- import base64
1427
- import pickle
1428
- from datetime import datetime
1429
- import textwrap
1430
-
1431
- # LLM client (Groq)
1432
- try:
1433
- from groq import Groq
1434
- except Exception:
1435
- Groq = None
1436
-
1437
- # FAISS
1438
- try:
1439
- import faiss
1440
- except Exception:
1441
- faiss = None
1442
-
1443
- # PDF (ReportLab)
1444
- from reportlab.lib import colors
1445
- from reportlab.lib.pagesizes import A4, landscape
1446
- from reportlab.lib.units import mm
1447
- from reportlab.platypus import (
1448
- SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as RLImage, PageBreak
1449
- )
1450
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
1451
-
1452
- # Matplotlib for embedding GSD plot images into PDFs
1453
- import matplotlib.pyplot as plt
1454
-
1455
  # --------------------------
1456
- # Basic secret checks (run early)
1457
  # --------------------------
1458
- # Using st.secrets for HF Spaces; fallback to env
1459
- GROQ_KEY = os.getenv("GROQ_API_KEY") or (st.secrets.get("GROQ_API_KEY") if "GROQ_API_KEY" in st.secrets else None)
1460
- SERVICE_ACCOUNT = os.getenv("SERVICE_ACCOUNT") or (st.secrets.get("SERVICE_ACCOUNT") if "SERVICE_ACCOUNT" in st.secrets else None)
1461
- EARTH_ENGINE_KEY = os.getenv("EARTH_ENGINE_KEY") or (st.secrets.get("EARTH_ENGINE_KEY") if "EARTH_ENGINE_KEY" in st.secrets else None)
1462
-
1463
- missing = []
1464
- if not GROQ_KEY:
1465
- missing.append("GROQ_API_KEY")
1466
- if not SERVICE_ACCOUNT:
1467
- missing.append("SERVICE_ACCOUNT")
1468
- if not EARTH_ENGINE_KEY:
1469
- # Don't require EARTH_ENGINE_KEY strictly (locator will warn) but warn user
1470
- st.warning("EARTH_ENGINE_KEY not found in secrets. Locator functions will be limited or disabled.")
1471
-
1472
- if missing:
1473
- st.error(f"Missing required secrets: {', '.join(missing)}. Please add them to Hugging Face Secrets and reload the app.")
1474
- st.stop()
1475
-
1476
- # Initialize Groq client
1477
- if Groq is None:
1478
- st.warning("Groq client lib not installed (pip package 'groq'). RAG will be disabled.")
1479
- GROQ_CLIENT = None
1480
- else:
1481
- try:
1482
- GROQ_CLIENT = Groq(api_key=GROQ_KEY)
1483
- except Exception as e:
1484
- st.error(f"Failed to create Groq client: {e}")
1485
- GROQ_CLIENT = None
1486
-
1487
- # --------------------------
1488
- # Session-state helpers (ensure present)
1489
- # --------------------------
1490
- if "sites" not in ss:
1491
- ss["sites"] = [{"Site Name": "Home", "chat_history": [], "classifier_inputs": {}, "classifier_state": 0,
1492
- "GSD": None, "USCS": None, "AASHTO": None, "GI": None,
1493
- "map_snapshot": None, "report_convo_state": 0, "soil_profile": None}]
1494
-
1495
- if "active_site_idx" not in ss:
1496
- ss["active_site_idx"] = 0
1497
-
1498
- def get_active_site():
1499
- idx = ss.get("active_site_idx", 0)
1500
- sites = ss.get("sites", [])
1501
- if idx < 0 or idx >= len(sites):
1502
- ss["active_site_idx"] = 0
1503
- idx = 0
1504
- return sites[idx]
1505
-
1506
- def save_active_site(site):
1507
- idx = ss.get("active_site_idx", 0)
1508
- ss["sites"][idx] = site
1509
- # ensure persistence in session_state
1510
- ss.modified = True
1511
-
1512
- def add_site(name="New Site"):
1513
- if len(ss["sites"]) >= 4:
1514
- st.warning("Maximum 4 sites allowed.")
1515
- return
1516
- ss["sites"].append({"Site Name": name, "chat_history": [], "classifier_inputs": {}, "classifier_state": 0,
1517
- "GSD": None, "USCS": None, "AASHTO": None, "GI": None,
1518
- "map_snapshot": None, "report_convo_state": 0, "soil_profile": None})
1519
- ss["active_site_idx"] = len(ss["sites"]) - 1
1520
-
1521
- def remove_site(idx):
1522
- if 0 <= idx < len(ss["sites"]):
1523
- ss["sites"].pop(idx)
1524
- ss["active_site_idx"] = max(0, ss["active_site_idx"] - 1)
1525
-
1526
- # --------------------------
1527
- # FAISS DB loader (expects a zip containing index.faiss + meta.pkl)
1528
- # --------------------------
1529
- def load_faiss_db_from_zip(zip_path: str):
1530
- """
1531
- Accepts path to a zip file containing index.faiss and meta.pkl (or index.pkl).
1532
- Extracts to tempdir and loads faiss index + metadata list.
1533
- Returns (index, meta_list) or (None, None) on error.
1534
- """
1535
- if faiss is None:
1536
- st.error("faiss not installed. RAG unavailable.")
1537
- return None, None
1538
-
1539
- if not os.path.exists(zip_path):
1540
- st.error("FAISS DB zip not found at provided path.")
1541
- return None, None
1542
- tmpd = tempfile.mkdtemp()
1543
- try:
1544
- with zipfile.ZipFile(zip_path, "r") as z:
1545
- z.extractall(tmpd)
1546
- # look for index.faiss or index.pkl
1547
- idx_file = None
1548
- meta_file = None
1549
- for fname in os.listdir(tmpd):
1550
- if fname.endswith(".faiss") or fname == "index.faiss":
1551
- idx_file = os.path.join(tmpd, fname)
1552
- if fname.endswith(".pkl") or fname == "meta.pkl":
1553
- meta_file = os.path.join(tmpd, fname)
1554
- if idx_file is None or meta_file is None:
1555
- st.error("Zip did not contain index.faiss and meta.pkl.")
1556
- return None, None
1557
- index = faiss.read_index(idx_file)
1558
- with open(meta_file, "rb") as f:
1559
- meta = pickle.load(f)
1560
- return index, meta
1561
- except Exception as e:
1562
- st.error(f"Failed to load FAISS DB: {e}")
1563
- return None, None
1564
-
1565
- # --------------------------
1566
- # RAG helper: simple retriever + Groq caller
1567
- # --------------------------
1568
- def rag_retrieve_and_answer(query: str, topk: int = 5):
1569
- """
1570
- Retrieve from FAISS (if loaded) and call Groq with context.
1571
- """
1572
- # Load index from session if present
1573
- if "faiss_index" not in ss or ss.get("faiss_index") is None:
1574
- st.error("FAISS index not loaded. Please upload faiss_books_db.zip in GeoMate Ask page.")
1575
- return "FAISS DB missing."
1576
-
1577
- index = ss["faiss_index"]
1578
- meta = ss["faiss_meta"]
1579
- # prepare embedding for query β€” simplified: use Groq's embed endpoint if available.
1580
- # For demo, we'll simply return top-K metadata concatenated as context
1581
- try:
1582
- # nearest neighbor search using user-provided embeddings if available
1583
- # Here: assume index is IndexFlatL2 and meta is list
1584
- # For safety, we will not compute embeddings here (requires sentence-transformers).
1585
- # We'll do a cheap fallback: return top K metas (or first K) as context.
1586
- context_texts = [m.get("text","") for m in (meta[:topk] if isinstance(meta, list) else meta)]
1587
- context = "\n\n".join(context_texts)
1588
- except Exception:
1589
- context = ""
1590
-
1591
- # Build prompt for Groq
1592
- system = "You are GeoMate RAG assistant. Use the context to answer precisely and professionally."
1593
- user_prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer concisely and cite context sections if relevant."
1594
 
1595
- if GROQ_CLIENT is None:
1596
- # fallback: simple echo
1597
- return "Groq client not available. Cannot complete RAG call."
1598
  try:
1599
- completion = GROQ_CLIENT.chat.completions.create(
1600
- model="meta-llama/llama-4-maverick-17b-128e-instruct",
1601
- messages=[{"role":"system","content":system},{"role":"user","content":user_prompt}],
1602
- temperature=0.2,
1603
- max_tokens=800
1604
- )
1605
- text = completion.choices[0].message.content
1606
- return text
 
 
 
 
 
 
 
 
 
1607
  except Exception as e:
1608
- st.error(f"Groq call failed: {e}")
1609
- return "RAG call failed."
1610
-
1611
- # --------------------------
1612
- # Entity extraction placeholder
1613
- # --------------------------
1614
- def update_site_description_from_text(site: dict, text: str) -> dict:
1615
- """
1616
- Very simple regex-based extraction for core engineering parameters from free text.
1617
- Called after each RAG/chat answer: extracts numbers like 'bearing capacity 2000 psf' etc.
1618
- This is a placeholder; can be replaced by a proper NER model.
1619
- """
1620
- import re
1621
- # load-bearing capacity (psf or kPa)
1622
- m = re.search(r"bearing capacity\s*(?:of)?\s*([0-9,.]+)\s*(kpa|psf|pa|kn/m2)?", text, re.IGNORECASE)
1623
- if m:
1624
- val = m.group(1).replace(",", "")
1625
- unit = m.group(2) or ""
1626
- site["Load Bearing Capacity"] = f"{val} {unit}".strip()
1627
-
1628
- # skin shear strength
1629
- m2 = re.search(r"skin shear strength\s*(?:[:is]*)\s*([0-9,.]+)\s*(kpa|kn/m2|psf)?", text, re.IGNORECASE)
1630
- if m2:
1631
- site["Skin Shear Strength"] = f"{m2.group(1).replace(',','')} { (m2.group(2) or '')}".strip()
1632
-
1633
- # % compaction
1634
- m3 = re.search(r"compaction\s*(?:[:is]*)\s*([0-9]{1,3})\s*%", text, re.IGNORECASE)
1635
- if m3:
1636
- site["Relative Compaction"] = f"{m3.group(1)}%"
1637
-
1638
- # rate of consolidation
1639
- m4 = re.search(r"consolidation rate\s*(?:[:is]*)\s*([0-9,.]+)\s*(mm/year|mm/yr|mm per year|m/year)?", text, re.IGNORECASE)
1640
- if m4:
1641
- site["Rate of Consolidation"] = f"{m4.group(1)} {m4.group(2) or ''}".strip()
1642
-
1643
- # nature of construction - look for keywords
1644
- if "residential" in text.lower():
1645
- site["Nature of Construction"] = "Residential"
1646
- elif "commercial" in text.lower():
1647
- site["Nature of Construction"] = "Commercial"
1648
- elif "pavement" in text.lower() or "road" in text.lower():
1649
- site["Nature of Construction"] = "Pavement / Road"
1650
-
1651
- return site
1652
 
1653
- # --------------------------
1654
- # GeoMate Ask (RAG Chat) UI
1655
- # --------------------------
1656
  def rag_ui():
1657
- st.header("πŸ€– GeoMate Ask β€” RAG + Groq (per-site memory)")
1658
  site = get_active_site()
1659
 
1660
  if "chat_history" not in site:
1661
  site["chat_history"] = []
1662
 
1663
- st.markdown("**Context:** The RAG uses your FAISS knowledge base (upload .zip in this page) and Groq LLM for answers. Chat history is saved for this site during the session.")
 
 
 
 
1664
 
1665
  # FAISS DB upload (one-time)
1666
- with st.expander("FAISS DB (index.faiss + meta.pkl inside a zip)"):
1667
- uploaded = st.file_uploader("Upload faiss_books_db.zip", type=["zip"])
1668
  if uploaded:
1669
  tmpf = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
1670
  tmpf.write(uploaded.getvalue())
@@ -1673,46 +1473,63 @@ def rag_ui():
1673
  if ix is not None:
1674
  ss["faiss_index"] = ix
1675
  ss["faiss_meta"] = meta
1676
- st.success("FAISS DB loaded.")
1677
 
1678
  # Render chat history
1679
  for turn in site.get("chat_history", []):
1680
- role = turn.get("role")
1681
- text = turn.get("text")
1682
  if role == "bot":
1683
- st.markdown(f"<div style='background:{THEME['bubble_bg']};padding:8px;border-radius:12px;border:2px solid {THEME['accent']};'><b>πŸ€– GeoMate:</b> {text}</div>", unsafe_allow_html=True)
 
 
 
1684
  else:
1685
- st.markdown(f"<div style='background:#1a2436;color:#fff;padding:8px;border-radius:12px;margin-left:40px;'><b>πŸ‘€ You:</b> {text}</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
1686
 
1687
- # Input box
1688
- user_q = st.text_input("Ask GeoMate (RAG + site memory):", key="geomate_rag_input")
1689
  if st.button("Ask", key="geomate_rag_button"):
1690
- if not user_q.strip():
1691
- st.warning("Type a question first.")
1692
  else:
1693
- # Append to history
1694
- site["chat_history"].append({"role":"user","text":user_q, "time":datetime.utcnow().isoformat()})
 
 
 
 
 
 
 
1695
  save_active_site(site)
 
1696
  # Retrieve + call LLM
1697
  with st.spinner("Retrieving context and calling LLM..."):
1698
- answer = rag_retrieve_and_answer(user_q, topk=5)
 
1699
  # Append bot answer
1700
  site["chat_history"].append({"role":"bot","text":answer, "time":datetime.utcnow().isoformat()})
1701
- # Try to extract any engineering parameters from answer or the user question
1702
- site = update_site_description_from_text(site, user_q + "\n" + answer)
 
1703
  save_active_site(site)
1704
- st.experimental_rerun() # small rerun to display new messages
 
1705
 
1706
  # Quick buttons
1707
  colA, colB = st.columns(2)
1708
- if colA.button("Save Chat to Site JSON"):
1709
  save_active_site(site)
1710
- st.success("Saved chat into site JSON.")
1711
- if colB.button("Clear Site Chat"):
1712
  site["chat_history"] = []
1713
  save_active_site(site)
1714
  st.success("Cleared history for this site.")
1715
-
1716
  # --------------------------
1717
  # REPORTS: PDF builders
1718
  # --------------------------
 
1418
  # - Final UI glue: Reports page and main app routing
1419
  # -------------------------------------------------------
1420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1421
  # --------------------------
1422
+ # GeoMate Ask (RAG Chat with OCR) UI
1423
  # --------------------------
1424
+ import pytesseract
1425
+ from PIL import Image
1426
+ import fitz # PyMuPDF for PDF OCR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1427
 
1428
+ def extract_text_from_file(uploaded_file):
1429
+ """OCR utility for images and PDFs"""
 
1430
  try:
1431
+ if uploaded_file.type in ["image/png", "image/jpeg", "image/jpg"]:
1432
+ img = Image.open(uploaded_file)
1433
+ text = pytesseract.image_to_string(img)
1434
+ return text.strip()
1435
+ elif uploaded_file.type == "application/pdf":
1436
+ text_pages = []
1437
+ pdf = fitz.open(stream=uploaded_file.read(), filetype="pdf")
1438
+ for page in pdf:
1439
+ text_pages.append(page.get_text("text"))
1440
+ # fallback to OCR if no text
1441
+ if not text_pages[-1].strip():
1442
+ pix = page.get_pixmap()
1443
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
1444
+ text_pages[-1] = pytesseract.image_to_string(img)
1445
+ return "\n".join(text_pages).strip()
1446
+ else:
1447
+ return ""
1448
  except Exception as e:
1449
+ st.error(f"OCR failed: {e}")
1450
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451
 
 
 
 
1452
  def rag_ui():
1453
+ st.header("πŸ€– GeoMate Ask β€” RAG + Groq (per-site memory + OCR)")
1454
  site = get_active_site()
1455
 
1456
  if "chat_history" not in site:
1457
  site["chat_history"] = []
1458
 
1459
+ st.markdown(
1460
+ "**Context:** The RAG uses your FAISS knowledge base (upload .zip in this page), "
1461
+ "Groq LLM for answers, and optional OCR from uploaded images/PDFs. "
1462
+ "Chat history is saved per site."
1463
+ )
1464
 
1465
  # FAISS DB upload (one-time)
1466
+ with st.expander("πŸ“‚ Upload FAISS DB (zip with index.faiss + meta.pkl)"):
1467
+ uploaded = st.file_uploader("Upload faiss_books_db.zip", type=["zip"], key="faiss_db_uploader")
1468
  if uploaded:
1469
  tmpf = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
1470
  tmpf.write(uploaded.getvalue())
 
1473
  if ix is not None:
1474
  ss["faiss_index"] = ix
1475
  ss["faiss_meta"] = meta
1476
+ st.success("βœ… FAISS DB loaded.")
1477
 
1478
  # Render chat history
1479
  for turn in site.get("chat_history", []):
1480
+ role, text = turn.get("role"), turn.get("text")
 
1481
  if role == "bot":
1482
+ st.markdown(
1483
+ f"<div style='background:{THEME['bubble_bg']};padding:8px;border-radius:12px;border:2px solid {THEME['accent']};'>"
1484
+ f"<b>πŸ€– GeoMate:</b> {text}</div>", unsafe_allow_html=True
1485
+ )
1486
  else:
1487
+ st.markdown(
1488
+ f"<div style='background:#1a2436;color:#fff;padding:8px;border-radius:12px;margin-left:40px;'>"
1489
+ f"<b>πŸ‘€ You:</b> {text}</div>", unsafe_allow_html=True
1490
+ )
1491
+
1492
+ # Input + OCR
1493
+ user_q = st.text_input("Ask GeoMate:", key="geomate_rag_input")
1494
+ uploaded_ocr = st.file_uploader("Optional OCR input (image/pdf)", type=["png","jpg","jpeg","pdf"], key="rag_ocr_uploader")
1495
 
 
 
1496
  if st.button("Ask", key="geomate_rag_button"):
1497
+ if not user_q.strip() and not uploaded_ocr:
1498
+ st.warning("Please type a question or upload a file.")
1499
  else:
1500
+ query_text = user_q.strip()
1501
+ if uploaded_ocr:
1502
+ with st.spinner("Running OCR..."):
1503
+ ocr_text = extract_text_from_file(uploaded_ocr)
1504
+ if ocr_text:
1505
+ query_text += "\n\n[OCR Extracted Content]\n" + ocr_text
1506
+
1507
+ # Append user query
1508
+ site["chat_history"].append({"role":"user","text":query_text, "time":datetime.utcnow().isoformat()})
1509
  save_active_site(site)
1510
+
1511
  # Retrieve + call LLM
1512
  with st.spinner("Retrieving context and calling LLM..."):
1513
+ answer = rag_retrieve_and_answer(query_text, topk=5)
1514
+
1515
  # Append bot answer
1516
  site["chat_history"].append({"role":"bot","text":answer, "time":datetime.utcnow().isoformat()})
1517
+
1518
+ # Extract parameters
1519
+ site = update_site_description_from_text(site, query_text + "\n" + answer)
1520
  save_active_site(site)
1521
+
1522
+ st.rerun()
1523
 
1524
  # Quick buttons
1525
  colA, colB = st.columns(2)
1526
+ if colA.button("πŸ’Ύ Save Chat"):
1527
  save_active_site(site)
1528
+ st.success("Chat saved into site JSON.")
1529
+ if colB.button("πŸ—‘οΈ Clear Chat"):
1530
  site["chat_history"] = []
1531
  save_active_site(site)
1532
  st.success("Cleared history for this site.")
 
1533
  # --------------------------
1534
  # REPORTS: PDF builders
1535
  # --------------------------