Spaces:

vanishing-grad
/

sense

Build error

App Files Files Community

sense / haystack_utils.py

vanishing-grad

Create the initial version

4e6bed1 almost 3 years ago

raw

history blame

1.7 kB

	from pathlib import Path

	import streamlit as st

	from haystack.nodes import PreProcessor, TextConverter, FARMReader, BM25Retriever
	from haystack.document_stores import InMemoryDocumentStore
	from haystack.pipelines import ExtractiveQAPipeline
	from haystack.pipelines.standard_pipelines import TextIndexingPipeline
	from haystack.pipelines.base import Pipeline
	import tokenizers


	# Hash hack, assume all outputs of ExtractiveQAPipeline type are equal
	@st.cache(hash_funcs={ExtractiveQAPipeline: lambda _: "42"})
	def get_pipe():
	transcript_path = Path("making_sense_transcripts/")
	document_store = InMemoryDocumentStore(use_bm25=True)

	indexing_pipeline = Pipeline()
	indexing_pipeline.add_node(
	component=TextConverter(), name="TextConverter", inputs=["File"]
	)
	indexing_pipeline.add_node(
	component=PreProcessor(), name="PreProcessor", inputs=["TextConverter"]
	)
	indexing_pipeline.add_node(
	component=document_store, name="DocumentStore", inputs=["TextConverter"]
	)

	file_paths = list(transcript_path.glob("*.txt"))
	doc_paths = [{"file_path": str(path)} for path in file_paths]
	indexing_pipeline.run_batch(file_paths=file_paths, meta=doc_paths)

	retriever = BM25Retriever(document_store=document_store)
	reader = FARMReader(
	model_name_or_path="deepset/roberta-base-squad2",
	use_gpu=False,
	context_window_size=200,
	)
	pipe = ExtractiveQAPipeline(reader, retriever)
	return pipe


	def ask_pipe(question: str, pipe: ExtractiveQAPipeline) -> dict:
	prediction = pipe.run(
	query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
	)
	return prediction