File size: 2,922 Bytes
ecd5411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import pdfplumber
import openai
from dotenv import load_dotenv
import os

load_dotenv()  # Load environment variables from .env file

# Safe retrieval of API key from environment variables
openai.api_key = os.getenv("OPENAI_API_KEY")

# Streamlit UI setup for the application
st.title("Advanced PDF-Based Application")
st.markdown("Select the functionality you want to use from the sidebar.")

# Sidebar for mode selection and file uploading
with st.sidebar:
    mode = st.radio("Choose a mode:", ["PDF Summarizer", "Question Answering"])
    uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=['pdf'], on_change=lambda: st.experimental_rerun())

# Initializing documents list
documents = []

# Progress bar for file processing
if uploaded_files:
    with st.spinner('Processing PDF files...'):
        progress_bar = st.progress(0)
        total_files = len(uploaded_files)
        for i, uploaded_file in enumerate(uploaded_files):
            with pdfplumber.open(uploaded_file) as pdf:
                full_text = ""
                for page in pdf.pages[:50]:  # Process each page up to a limit of 50 pages
                    full_text += page.extract_text() or ""
                documents.append(full_text)
            progress_bar.progress((i + 1) / total_files)
        st.success("PDFs processed successfully. Proceed based on the selected mode.")
        progress_bar.empty()

# Using tabs to separate features
tab1, tab2 = st.tabs(["Question Answering", "PDF Summarizer"])

with tab1:
    if mode == "Question Answering":
        question = st.text_input("Enter your question here:")
        if question and documents:
            combined_text = "\n".join(documents[:3])
            messages = [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": question},
                {"role": "system", "content": combined_text}
            ]
            response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=messages,
                max_tokens=500
            )
            st.write("Answer:", response.choices[0].message['content'])

with tab2:
    if mode == "PDF Summarizer" and documents:
        summaries = []
        for doc in documents[:3]:
            messages = [
                {"role": "system", "content": "You are a helpful assistant tasked to summarize documents."},
                {"role": "user", "content": "Summarize the following text brifly:\n" + doc}
            ]
            response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=messages,
                max_tokens=1024
            )
            summaries.append(response.choices[0].message['content'].strip())
        
        for idx, summary in enumerate(summaries):
            st.write(f"Summary {idx+1}:", summary)