Spaces:
Build error
Build error
| from pathlib import Path | |
| import streamlit as st | |
| from haystack.nodes import PreProcessor, TextConverter, FARMReader, BM25Retriever | |
| from haystack.document_stores import InMemoryDocumentStore | |
| from haystack.pipelines import ExtractiveQAPipeline | |
| from haystack.pipelines.standard_pipelines import TextIndexingPipeline | |
| from haystack.pipelines.base import Pipeline | |
| import tokenizers | |
| # Hash hack, assume all outputs of ExtractiveQAPipeline type are equal | |
| def get_pipe(): | |
| transcript_path = Path("making_sense_transcripts/") | |
| document_store = InMemoryDocumentStore(use_bm25=True) | |
| indexing_pipeline = Pipeline() | |
| indexing_pipeline.add_node( | |
| component=TextConverter(), name="TextConverter", inputs=["File"] | |
| ) | |
| indexing_pipeline.add_node( | |
| component=PreProcessor(), name="PreProcessor", inputs=["TextConverter"] | |
| ) | |
| indexing_pipeline.add_node( | |
| component=document_store, name="DocumentStore", inputs=["TextConverter"] | |
| ) | |
| file_paths = list(transcript_path.glob("*.txt")) | |
| doc_paths = [{"file_path": str(path)} for path in file_paths] | |
| indexing_pipeline.run_batch(file_paths=file_paths, meta=doc_paths) | |
| retriever = BM25Retriever(document_store=document_store) | |
| reader = FARMReader( | |
| model_name_or_path="deepset/roberta-base-squad2", | |
| use_gpu=False, | |
| context_window_size=200, | |
| ) | |
| pipe = ExtractiveQAPipeline(reader, retriever) | |
| return pipe | |
| def ask_pipe(question: str, pipe: ExtractiveQAPipeline) -> dict: | |
| prediction = pipe.run( | |
| query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}} | |
| ) | |
| return prediction | |