Generative-Subodh commited on
Commit
22eeb17
·
verified ·
1 Parent(s): 8b58979

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -1,65 +1,66 @@
1
- import streamlit as st # Correct import
2
- import fitz # PyMuPDF = read the contents of the pdf file
3
  import openai
4
- from fpdf import FPDF # Library for generating pdf files
5
  import os
6
  import tempfile
7
 
8
- # function to extract pdf file
9
  def extract_text_from_pdf(pdf_file):
 
10
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
11
  temp_file.write(pdf_file.read())
12
- temp_file.close()
13
 
 
14
  doc = fitz.open(temp_file.name)
15
  text = ""
16
-
17
  for page_num in range(len(doc)):
18
  page = doc.load_page(page_num)
19
  text += page.get_text()
20
 
 
21
  os.remove(temp_file.name)
22
- return text # Ensure this function returns text
23
 
24
- # function to ensure if it ends properly
 
 
25
  def ensure_full_stop(text):
26
  text = text.strip()
27
  if not text.endswith(('.', '!', '?')):
28
  text += '.'
29
  return text
30
 
31
- # function to summarize
32
  def summarize_text(api_key, text):
33
  openai.api_key = api_key
34
  response = openai.ChatCompletion.create(
35
- model="gpt-3.5-turbo",
36
- messages=[
37
- {"role": "system", "content": "You are a helpful assistant."},
38
- {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
39
- ],
40
- max_tokens=2048,
41
  temperature=0.5
42
  )
43
-
44
  summary = response.choices[0].message['content'].strip()
45
  return ensure_full_stop(summary)
46
 
47
- # function to predict topic
48
  def predict_topic(api_key, text):
49
  openai.api_key = api_key
50
  response = openai.ChatCompletion.create(
51
- model="gpt-3.5-turbo",
52
  messages=[{"role": "system", "content": "You are a helpful assistant."},
53
  {"role": "user", "content": f"What is the main topic of the following text?\n\n{text}"}],
54
  max_tokens=500,
55
  temperature=0.5
56
  )
57
- return response.choices[0].message['content'].strip()
 
58
 
59
- # Function to generate a PDF
60
  def create_pdf(summary, topic, original_file_name):
61
- base_name = os.path.splitext(original_file_name)[0]
62
- pdf_file_name = f"{base_name}_summary.pdf"
63
 
64
  pdf = FPDF()
65
  pdf.add_page()
@@ -71,30 +72,40 @@ def create_pdf(summary, topic, original_file_name):
71
  pdf.cell(200, 10, txt="Predicted Main Topic", ln=True, align='C')
72
  pdf.multi_cell(0, 10, txt=topic)
73
 
 
74
  pdf_file_path = f"/tmp/{pdf_file_name}"
75
  pdf.output(pdf_file_path)
76
 
77
  return pdf_file_path
78
 
79
  # Streamlit UI
80
- st.title("Research Paper Summary")
 
 
81
  api_key = st.text_input("Enter your OpenAI API Key:", type="password")
82
 
 
83
  uploaded_file = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
84
 
85
  if uploaded_file is not None:
 
86
  text = extract_text_from_pdf(uploaded_file)
87
 
88
  if len(text) > 1000:
 
89
  summary = summarize_text(api_key, text)
 
 
90
  topic = predict_topic(api_key, text)
91
 
 
92
  st.subheader("Summary")
93
  st.write(summary)
94
-
95
- st.subheader("Predicted Topic")
96
  st.write(topic)
97
 
 
98
  if st.button("Get the Summary PDF"):
99
  pdf_path = create_pdf(summary, topic, uploaded_file.name)
100
  st.download_button(
@@ -106,4 +117,4 @@ if uploaded_file is not None:
106
  else:
107
  st.warning("The document is too short for meaningful analysis.")
108
  else:
109
- st.info("Please upload a valid PDF file to proceed.")
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
  import openai
4
+ from fpdf import FPDF
5
  import os
6
  import tempfile
7
 
8
+ # Function to extract text from a PDF file
9
  def extract_text_from_pdf(pdf_file):
10
+ # Save the uploaded file to a temporary location
11
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
12
  temp_file.write(pdf_file.read())
13
+ temp_file.close() # Close the file to ensure it's saved
14
 
15
+ # Open the saved PDF file
16
  doc = fitz.open(temp_file.name)
17
  text = ""
 
18
  for page_num in range(len(doc)):
19
  page = doc.load_page(page_num)
20
  text += page.get_text()
21
 
22
+ # Delete the temporary file after reading (clean up)
23
  os.remove(temp_file.name)
 
24
 
25
+ return text
26
+
27
+ # Function to ensure the summary ends with a full stop
28
  def ensure_full_stop(text):
29
  text = text.strip()
30
  if not text.endswith(('.', '!', '?')):
31
  text += '.'
32
  return text
33
 
34
+ # Function to summarize text using OpenAI GPT model
35
  def summarize_text(api_key, text):
36
  openai.api_key = api_key
37
  response = openai.ChatCompletion.create(
38
+ model="gpt-3.5-turbo", # Use "gpt-4" if you have access
39
+ messages=[{"role": "system", "content": "You are a helpful assistant."},
40
+ {"role": "user", "content": f"Summarize the following text:\n\n{text}"}],
41
+ max_tokens=500,
 
 
42
  temperature=0.5
43
  )
 
44
  summary = response.choices[0].message['content'].strip()
45
  return ensure_full_stop(summary)
46
 
47
+ # Function to predict the main topic of the text
48
  def predict_topic(api_key, text):
49
  openai.api_key = api_key
50
  response = openai.ChatCompletion.create(
51
+ model="gpt-3.5-turbo", # Use "gpt-4" if you have access
52
  messages=[{"role": "system", "content": "You are a helpful assistant."},
53
  {"role": "user", "content": f"What is the main topic of the following text?\n\n{text}"}],
54
  max_tokens=500,
55
  temperature=0.5
56
  )
57
+ topic = response.choices[0].message['content'].strip()
58
+ return topic
59
 
60
+ # Function to generate a PDF with summary and topic
61
  def create_pdf(summary, topic, original_file_name):
62
+ base_name = os.path.splitext(original_file_name)[0] # Remove the .pdf extension
63
+ pdf_file_name = f"{base_name} summary.pdf" # Create the new filename
64
 
65
  pdf = FPDF()
66
  pdf.add_page()
 
72
  pdf.cell(200, 10, txt="Predicted Main Topic", ln=True, align='C')
73
  pdf.multi_cell(0, 10, txt=topic)
74
 
75
+ # Save the PDF to a file in memory
76
  pdf_file_path = f"/tmp/{pdf_file_name}"
77
  pdf.output(pdf_file_path)
78
 
79
  return pdf_file_path
80
 
81
  # Streamlit UI
82
+ st.title("Research Paper Summarizer")
83
+
84
+ # API Key input
85
  api_key = st.text_input("Enter your OpenAI API Key:", type="password")
86
 
87
+ # File upload
88
  uploaded_file = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
89
 
90
  if uploaded_file is not None:
91
+ # Extract text from the uploaded PDF
92
  text = extract_text_from_pdf(uploaded_file)
93
 
94
  if len(text) > 1000:
95
+ # Summarize the text
96
  summary = summarize_text(api_key, text)
97
+
98
+ # Predict the main topic
99
  topic = predict_topic(api_key, text)
100
 
101
+ # Display the results
102
  st.subheader("Summary")
103
  st.write(summary)
104
+
105
+ st.subheader("Predicted Main Topic")
106
  st.write(topic)
107
 
108
+ # Button to download results as a PDF
109
  if st.button("Get the Summary PDF"):
110
  pdf_path = create_pdf(summary, topic, uploaded_file.name)
111
  st.download_button(
 
117
  else:
118
  st.warning("The document is too short for meaningful analysis.")
119
  else:
120
+ st.info("Please upload a valid PDF file to proceed.")