Spaces:

chrisvnz
/

meeting-minutes

Paused

App Files Files Community

meeting-minutes / app.py

chrisvnz

Update app.py

8daada9 over 2 years ago

raw

history blame contribute delete

3.15 kB

	import gradio as gr
	import os
	import torch
	from pathlib import Path
	import whisper
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.llms import OpenAI
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain import OpenAI
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import RetrievalQA
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

	def process_meeting(api_key, topic, audio_file):
	os.environ["OPENAI_API_KEY"] = api_key

	# Set the filename to the uploaded audio file's name without extension
	ffname, _ = os.path.splitext(audio_file.name)
	audio_file_path = ffname + ".m4a"

	# Save the uploaded audio file
	with open(audio_file_path, 'wb') as f:
	f.write(audio_file.read())

	file = audio_file_path
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	model = whisper.load_model("medium.en").to(DEVICE)

	def transcribe_file(model, file):
	file_path = Path(file)
	print(f"Transcribing file: {file_path}\n")

	result = model.transcribe(file, verbose = False, language = "en")

	return result["text"]

	transcribed_text = transcribe_file(model, file)

	loader = TextLoader(transcribed_text, encoding="utf8")
	doc = loader.load()

	text_splitter = CharacterTextSplitter(
	separator = "\n",
	chunk_size = 1024,
	chunk_overlap = 50,
	length_function = len,
	)

	llmChainName = "text-davinci-003"

	llm = OpenAI(
	model_name=llmChainName,
	streaming=True,
	verbose=True,
	temperature=0
	)

	embeddings = OpenAIEmbeddings()
	persist_directory = './chroma'
	texts = text_splitter.split_documents(doc)
	store = Chroma.from_documents(texts, embeddings, collection_name="meeting", persist_directory=persist_directory)

	llm = ChatOpenAI(
	model_name="gpt-4",
	temperature=0,
	streaming=True,
	verbose=False,
	callbacks=[StreamingStdOutCallbackHandler()]
	)

	chain_Instructions = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=store.as_retriever(search_kwargs={"k": 1}))

	meeting_summary = chain_Instructions.run("Please summarize the meeting and provide the minutes. Include actions, responsible persons, and due dates where possible. Provide a bullet list for actions. The topic is: " + topic)

	try:
	store.persist()
	del store
	except Exception as e:
	print(e)

	return meeting_summary

	# Define the Gradio interface
	iface = gr.Interface(
	fn=process_meeting,
	inputs=[
	gr.inputs.Textbox(lines=1, label="OpenAI API Key"),
	gr.inputs.Textbox(lines=2, label="Meeting Topic"),
	gr.inputs.File(label="Upload Audio"),
	],
	outputs=gr.outputs.Textbox(label="Meeting Minutes"),
	live=False,
	)

	# Launch the app
	iface.launch()