Spaces:
Build error
Build error
cache haystack setup
Browse files
app.py
CHANGED
|
@@ -4,17 +4,20 @@ from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConver
|
|
| 4 |
from haystack.schema import Document
|
| 5 |
import logging
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def pdf_to_document_store(pdf_files):
|
|
@@ -33,6 +36,7 @@ def summarize(files):
|
|
| 33 |
summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
| 34 |
st.write(summary)
|
| 35 |
|
|
|
|
| 36 |
|
| 37 |
uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
|
| 38 |
|
|
|
|
| 4 |
from haystack.schema import Document
|
| 5 |
import logging
|
| 6 |
|
| 7 |
+
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True)
|
| 8 |
+
def start_haystack():
|
| 9 |
+
document_store = InMemoryDocumentStore()
|
| 10 |
+
preprocessor = PreProcessor(
|
| 11 |
+
clean_empty_lines=True,
|
| 12 |
+
clean_whitespace=True,
|
| 13 |
+
clean_header_footer=True,
|
| 14 |
+
split_by="word",
|
| 15 |
+
split_length=100,
|
| 16 |
+
split_respect_sentence_boundary=True,
|
| 17 |
+
split_overlap=3
|
| 18 |
+
)
|
| 19 |
+
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
|
| 20 |
+
return document_store, summarizer, preprocessor
|
| 21 |
|
| 22 |
|
| 23 |
def pdf_to_document_store(pdf_files):
|
|
|
|
| 36 |
summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
| 37 |
st.write(summary)
|
| 38 |
|
| 39 |
+
document_store, summarizer, preprocessor = start_haystack()
|
| 40 |
|
| 41 |
uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
|
| 42 |
|