import os
import io
import json
from google.cloud import vision
from dotenv import load_dotenv
from groq import Groq

load_dotenv()

# Load credentials from env variable

# Save secret JSON string to a temporary file
gcv_json_str = os.environ.get("GCV_JSON")
if gcv_json_str:
    temp_path = "/tmp/gcv_temp.json"
    with open(temp_path, "w") as f:
        f.write(gcv_json_str)
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_path

client = Groq(api_key=os.getenv("GROQ_API_KEY"))

def run_ocr_with_gcv(image_path):
    client_vision = vision.ImageAnnotatorClient()
    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client_vision.document_text_detection(image=image)
    return response.full_text_annotation.text

def extract_table_from_text(text,max_tokens=4096,model="meta-llama/llama-4-scout-17b-16e-instruct"):
    prompt = f"""
    Extract a structured table of items from the invoice text below. 
    - First findout what are the table column names
    - The table should include all items under column names.
    -
    If some values are missing, fill as "N/A".

    Output the table in Markdown format. Only return the table.

    Invoice Text:
    \"\"\"
    {text}
    \"\"\"
    """
    response = client.chat.completions.create(
        model=model,
    messages=[
        {"role": "system", "content": "You are a professional invoice data extractor."},
        {"role": "user", "content": prompt}
    ],
        temperature=1,
        max_completion_tokens=max_tokens,
        top_p=1,
    )
    return response.choices[0].message.content


import pandas as pd
from io import StringIO


def extract_markdown_table(output_text):
    # Step 1: Try to find the first line that starts with '|'
    lines = output_text.strip().split('\n')
    table_lines = [line for line in lines if '|' in line and line.count('|') > 1]

    if not table_lines or len(table_lines) < 2:
        raise ValueError("❌ No markdown table found in output.")

    # Step 2: Remove markdown header separator if exists
    if '---' in table_lines[1]:
        table_lines = [table_lines[0]] + table_lines[2:]

    # Step 3: Clean and convert to CSV
    cleaned_md = "\n".join(table_lines)
    df = pd.read_csv(StringIO(cleaned_md), sep='|', engine='python')
    df = df.dropna(axis=1, how='all')  # remove empty columns
    df.columns = [col.strip() for col in df.columns]
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    return df