Generative-Subodh commited on
Commit
8b58979
·
verified ·
1 Parent(s): 718c825

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -81
app.py CHANGED
@@ -1,76 +1,65 @@
1
- import streamlit
2
- import fitz # PyMuPDF = read the contents of the pdf file
3
  import openai
4
- from fpdf import FPDF # library for generating pdf files
5
  import os
6
  import tempfile
7
 
8
-
9
  # function to extract pdf file
10
  def extract_text_from_pdf(pdf_file):
 
 
 
11
 
12
- # save the uploaded file
13
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
14
- temp_file.write(pdf_file.read())
15
- temp_file.close() # close the file to ensure it is saved
16
-
17
- # open the saved pdf file
18
- doc = fitz.open(temp_file.name)
19
- text = "" # extracted info saved here
20
-
21
- # iterate 'doc' page wise iteration
22
- for page_num in range(len(doc)):
23
- page = doc.load_page(page_num)
24
- text += page.get_text() # saving the text of each page into the 'text'
25
 
26
- # delete the temp file after reading
27
- os.remove(temp_file.name)
 
28
 
 
 
29
 
30
-
31
- # function to ensure if it is ends properly
32
- # it might happen that LLM text generation might end in between, at this point we require this function
33
  def ensure_full_stop(text):
34
- text = text.strip()
35
- if not text.endswith(('.', '!', '?')):
36
- text += '.'
37
- return text
38
-
39
 
40
  # function to summarize
41
  def summarize_text(api_key, text):
42
- openai.api_key = api_key
43
- response = openai.ChatCompletion.create(
44
- model = "gpt-3.5-turbo",
45
- messages = [
46
- {"role": "system", "content": "You are a helpful assistant."},
47
- {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
48
- ],
49
- max_tokens = 2048,
50
- temp = 0.5 # creativity, 0.5 is sweet spot
51
- )
52
-
53
- summary = response.choices[0].message['content'].strip()
54
- return ensure_full_stop(summary)
55
-
56
-
57
- # understand the main gist of the pdf
58
  def predict_topic(api_key, text):
59
  openai.api_key = api_key
60
  response = openai.ChatCompletion.create(
61
- model="gpt-3.5-turbo", # Use "gpt-4" if you have access
62
  messages=[{"role": "system", "content": "You are a helpful assistant."},
63
  {"role": "user", "content": f"What is the main topic of the following text?\n\n{text}"}],
64
  max_tokens=500,
65
  temperature=0.5
66
  )
67
- topic = response.choices[0].message['content'].strip()
68
- return topic
69
 
70
- # Function to generate a PDF with summary and topic
71
  def create_pdf(summary, topic, original_file_name):
72
- base_name = os.path.splitext(original_file_name)[0] # Remove the .pdf extension
73
- pdf_file_name = f"{base_name}_summary.pdf" # Create the new filename
74
 
75
  pdf = FPDF()
76
  pdf.add_page()
@@ -82,45 +71,39 @@ def create_pdf(summary, topic, original_file_name):
82
  pdf.cell(200, 10, txt="Predicted Main Topic", ln=True, align='C')
83
  pdf.multi_cell(0, 10, txt=topic)
84
 
85
- # Save the PDF to a file in memory
86
  pdf_file_path = f"/tmp/{pdf_file_name}"
87
  pdf.output(pdf_file_path)
88
 
89
  return pdf_file_path
90
 
91
- st.title("Research paper summary")
92
- api_key = st.text_input("Enter your OpenAI API Key: ", type="password")
 
93
 
94
- # file uploading
95
  uploaded_file = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
96
 
97
- if uploaded_file is not None:
98
- text = extract_text_from_pdf(uploaded_file)
99
-
100
- if len(text) > 1000:
101
- summary = summarize_text(api_key, text)
102
- topic = predict_topic(api_key, text)
103
-
104
- st.subheader("Summary")
105
- st.write(summary)
106
-
107
- st.subheader("Predicted Topic")
108
- st.write(topic)
109
-
110
- # Button to download results as a PDF
111
- if st.button("Get the Summary PDF"):
112
- pdf_path = create_pdf(summary, topic, uploaded_file.name)
113
- st.download_button (
114
- label="Download Summary PDF",
115
- data=open(pdf_path, "rb").read(),
116
- file_name=os.path.basename(pdf_path),
117
- mime="application/pdf"
118
- )
119
- else:
120
- st.warning("The document is too short for meaningful analysis.")
121
-
122
  else:
123
  st.info("Please upload a valid PDF file to proceed.")
124
-
125
- def get_text():
126
- pass
 
1
+ import streamlit as st # Correct import
2
+ import fitz # PyMuPDF = read the contents of the pdf file
3
  import openai
4
+ from fpdf import FPDF # Library for generating pdf files
5
  import os
6
  import tempfile
7
 
 
8
  # function to extract pdf file
9
  def extract_text_from_pdf(pdf_file):
10
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
11
+ temp_file.write(pdf_file.read())
12
+ temp_file.close()
13
 
14
+ doc = fitz.open(temp_file.name)
15
+ text = ""
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ for page_num in range(len(doc)):
18
+ page = doc.load_page(page_num)
19
+ text += page.get_text()
20
 
21
+ os.remove(temp_file.name)
22
+ return text # Ensure this function returns text
23
 
24
+ # function to ensure if it ends properly
 
 
25
  def ensure_full_stop(text):
26
+ text = text.strip()
27
+ if not text.endswith(('.', '!', '?')):
28
+ text += '.'
29
+ return text
 
30
 
31
  # function to summarize
32
  def summarize_text(api_key, text):
33
+ openai.api_key = api_key
34
+ response = openai.ChatCompletion.create(
35
+ model="gpt-3.5-turbo",
36
+ messages=[
37
+ {"role": "system", "content": "You are a helpful assistant."},
38
+ {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
39
+ ],
40
+ max_tokens=2048,
41
+ temperature=0.5
42
+ )
43
+
44
+ summary = response.choices[0].message['content'].strip()
45
+ return ensure_full_stop(summary)
46
+
47
+ # function to predict topic
 
48
  def predict_topic(api_key, text):
49
  openai.api_key = api_key
50
  response = openai.ChatCompletion.create(
51
+ model="gpt-3.5-turbo",
52
  messages=[{"role": "system", "content": "You are a helpful assistant."},
53
  {"role": "user", "content": f"What is the main topic of the following text?\n\n{text}"}],
54
  max_tokens=500,
55
  temperature=0.5
56
  )
57
+ return response.choices[0].message['content'].strip()
 
58
 
59
+ # Function to generate a PDF
60
  def create_pdf(summary, topic, original_file_name):
61
+ base_name = os.path.splitext(original_file_name)[0]
62
+ pdf_file_name = f"{base_name}_summary.pdf"
63
 
64
  pdf = FPDF()
65
  pdf.add_page()
 
71
  pdf.cell(200, 10, txt="Predicted Main Topic", ln=True, align='C')
72
  pdf.multi_cell(0, 10, txt=topic)
73
 
 
74
  pdf_file_path = f"/tmp/{pdf_file_name}"
75
  pdf.output(pdf_file_path)
76
 
77
  return pdf_file_path
78
 
79
+ # Streamlit UI
80
+ st.title("Research Paper Summary")
81
+ api_key = st.text_input("Enter your OpenAI API Key:", type="password")
82
 
 
83
  uploaded_file = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
84
 
85
+ if uploaded_file is not None:
86
+ text = extract_text_from_pdf(uploaded_file)
87
+
88
+ if len(text) > 1000:
89
+ summary = summarize_text(api_key, text)
90
+ topic = predict_topic(api_key, text)
91
+
92
+ st.subheader("Summary")
93
+ st.write(summary)
94
+
95
+ st.subheader("Predicted Topic")
96
+ st.write(topic)
97
+
98
+ if st.button("Get the Summary PDF"):
99
+ pdf_path = create_pdf(summary, topic, uploaded_file.name)
100
+ st.download_button(
101
+ label="Download Summary PDF",
102
+ data=open(pdf_path, "rb").read(),
103
+ file_name=os.path.basename(pdf_path),
104
+ mime="application/pdf"
105
+ )
106
+ else:
107
+ st.warning("The document is too short for meaningful analysis.")
 
 
108
  else:
109
  st.info("Please upload a valid PDF file to proceed.")