Generative-Subodh commited on
Commit
718c825
·
verified ·
1 Parent(s): fbe0774

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py CHANGED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit
2
+ import fitz # PyMuPDF = read the contents of the pdf file
3
+ import openai
4
+ from fpdf import FPDF # library for generating pdf files
5
+ import os
6
+ import tempfile
7
+
8
+
9
+ # function to extract pdf file
10
+ def extract_text_from_pdf(pdf_file):
11
+
12
+ # save the uploaded file
13
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
14
+ temp_file.write(pdf_file.read())
15
+ temp_file.close() # close the file to ensure it is saved
16
+
17
+ # open the saved pdf file
18
+ doc = fitz.open(temp_file.name)
19
+ text = "" # extracted info saved here
20
+
21
+ # iterate 'doc' page wise iteration
22
+ for page_num in range(len(doc)):
23
+ page = doc.load_page(page_num)
24
+ text += page.get_text() # saving the text of each page into the 'text'
25
+
26
+ # delete the temp file after reading
27
+ os.remove(temp_file.name)
28
+
29
+
30
+
31
+ # function to ensure if it is ends properly
32
+ # it might happen that LLM text generation might end in between, at this point we require this function
33
+ def ensure_full_stop(text):
34
+ text = text.strip()
35
+ if not text.endswith(('.', '!', '?')):
36
+ text += '.'
37
+ return text
38
+
39
+
40
+ # function to summarize
41
+ def summarize_text(api_key, text):
42
+ openai.api_key = api_key
43
+ response = openai.ChatCompletion.create(
44
+ model = "gpt-3.5-turbo",
45
+ messages = [
46
+ {"role": "system", "content": "You are a helpful assistant."},
47
+ {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
48
+ ],
49
+ max_tokens = 2048,
50
+ temp = 0.5 # creativity, 0.5 is sweet spot
51
+ )
52
+
53
+ summary = response.choices[0].message['content'].strip()
54
+ return ensure_full_stop(summary)
55
+
56
+
57
+ # understand the main gist of the pdf
58
+ def predict_topic(api_key, text):
59
+ openai.api_key = api_key
60
+ response = openai.ChatCompletion.create(
61
+ model="gpt-3.5-turbo", # Use "gpt-4" if you have access
62
+ messages=[{"role": "system", "content": "You are a helpful assistant."},
63
+ {"role": "user", "content": f"What is the main topic of the following text?\n\n{text}"}],
64
+ max_tokens=500,
65
+ temperature=0.5
66
+ )
67
+ topic = response.choices[0].message['content'].strip()
68
+ return topic
69
+
70
+ # Function to generate a PDF with summary and topic
71
+ def create_pdf(summary, topic, original_file_name):
72
+ base_name = os.path.splitext(original_file_name)[0] # Remove the .pdf extension
73
+ pdf_file_name = f"{base_name}_summary.pdf" # Create the new filename
74
+
75
+ pdf = FPDF()
76
+ pdf.add_page()
77
+ pdf.set_font("Arial", size=12)
78
+
79
+ pdf.cell(200, 10, txt="Summary", ln=True, align='C')
80
+ pdf.multi_cell(0, 10, txt=summary)
81
+
82
+ pdf.cell(200, 10, txt="Predicted Main Topic", ln=True, align='C')
83
+ pdf.multi_cell(0, 10, txt=topic)
84
+
85
+ # Save the PDF to a file in memory
86
+ pdf_file_path = f"/tmp/{pdf_file_name}"
87
+ pdf.output(pdf_file_path)
88
+
89
+ return pdf_file_path
90
+
91
+ st.title("Research paper summary")
92
+ api_key = st.text_input("Enter your OpenAI API Key: ", type="password")
93
+
94
+ # file uploading
95
+ uploaded_file = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
96
+
97
+ if uploaded_file is not None:
98
+ text = extract_text_from_pdf(uploaded_file)
99
+
100
+ if len(text) > 1000:
101
+ summary = summarize_text(api_key, text)
102
+ topic = predict_topic(api_key, text)
103
+
104
+ st.subheader("Summary")
105
+ st.write(summary)
106
+
107
+ st.subheader("Predicted Topic")
108
+ st.write(topic)
109
+
110
+ # Button to download results as a PDF
111
+ if st.button("Get the Summary PDF"):
112
+ pdf_path = create_pdf(summary, topic, uploaded_file.name)
113
+ st.download_button (
114
+ label="Download Summary PDF",
115
+ data=open(pdf_path, "rb").read(),
116
+ file_name=os.path.basename(pdf_path),
117
+ mime="application/pdf"
118
+ )
119
+ else:
120
+ st.warning("The document is too short for meaningful analysis.")
121
+
122
+ else:
123
+ st.info("Please upload a valid PDF file to proceed.")
124
+
125
+ def get_text():
126
+ pass