Spaces:

aigineer
/

AIGINEER_LLM

Sleeping

AIGINEER_LLM / app.py

pikachoof

Initial commit

d796e74 4 months ago

6.66 kB

	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_huggingface.llms import HuggingFacePipeline
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	import os
	from dotenv import load_dotenv
	import tiktoken

	load_dotenv()
	#HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

	#embeddings_model_name = "cointegrated/rubert-tiny2"
	embeddings_model_name = "text-embedding-3-large"

	llm_model_name = "gpt-4o-mini"

	store_save_path = "stores/openai"

	# Step 1: Document Loading and Splitting
	def load_and_split_documents(pdf_path="docs/test_file.pdf"):
	"""
	Loads a PDF document and splits it into smaller chunks.
	"""
	loader = PyPDFLoader(pdf_path)
	documents = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=800,
	chunk_overlap=200
	)
	docs = text_splitter.split_documents(documents)
	return docs

	# Step 2: Embeddings and Vector Store
	def get_vector_store(docs, store_save_path=store_save_path):
	"""
	Loads an existing vector store or creates a new one if it doesn't exist.
	"""
	if os.path.exists(store_save_path):
	print("Loading vector store from disk...")
	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
	db = FAISS.load_local(store_save_path, embeddings, allow_dangerous_deserialization=True)
	else:
	print("Creating a new vector store...")
	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
	db = FAISS.from_documents(docs, embeddings)
	db.save_local(store_save_path)
	return db

	# Step 3: Initialize the LLM
	def initialize_llm():
	"""
	Initializes a Russian-specific LLM locally using transformers
	"""
	#repo_id = "ai-forever/rugpt3large_based_on_gpt2"
	#repo_id = "ai-forever/ruBert-base"
	#repo_id = "ai-forever/ruGPT-3.5-13B"

	'''
	llm = HuggingFaceEndpoint(
	repo_id=repo_id,
	temperature=0.5,
	#max_new_tokens=300,
	task='text-generation'
	)
	'''

	llm = ChatOpenAI(
	model=llm_model_name,
	temperature=0.7
	)

	return llm

	# Step 4: Create the LCEL RAG Chain
	def setup_rag_chain(pdf_path):
	"""
	Sets up the complete Retrieval-Augmented Generation chain using LCEL.
	"""
	docs = load_and_split_documents(pdf_path)
	db = get_vector_store(docs)
	retriever = db.as_retriever()
	llm = initialize_llm()

	# Checking the vector store
	#print(f"Number of vectors in FAISS index: {db.index.ntotal}")

	# Define the prompt template
	template = """Используйте следующие фрагменты контекста, чтобы ответить на вопрос в конце. Если вы не знаете ответа, просто скажите, что не знаете, не пытайтесь что-то придумать. Всегда будьте вежливым.

	{context}

	Вопрос: {question}

	Полезный ответ:"""

	prompt = PromptTemplate.from_template(template)

	# Corrected RAG chain construction
	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	return rag_chain

	# Initialize the chain
	document_name = "docs/test_file.pdf"
	qa_chain = setup_rag_chain(pdf_path=document_name)

	# Gradio Interface
	def chat_with_doc(query):
	"""
	Function to handle the user query and return a response.
	"""
	try:
	# Pass the query directly, not as a dictionary
	result = qa_chain.invoke(query)
	return result
	except Exception as e:
	return f"Произошла ошибка: {type(e).__name__} - {e!r}"

	def count_tokens(text, model_name):
	encoding = tiktoken.encoding_for_model(model_name)
	num_tokens = len(encoding.encode(text))
	return num_tokens

	iface = gr.Interface(
	fn=chat_with_doc,
	inputs=gr.Textbox(lines=5, placeholder="Спросите что-нибудь о документе..."),
	outputs="text",
	title="RAG LLM модель для AIGINEER",
	description="Задайте вопрос о содержании документации",
	)

	css_code = """
	#submit-button {
	background-color: #4CAF50 !important;
	color: white !important;
	}

	#centered-text {
	text-align: center;
	//justify-content: center;
	}

	#fixed-height-textarea textarea {
	overflow-y: auto !important;
	}
	"""

	heading_text = "# AIGINEER-ИИ Модель"
	subheading_text = 'Узнайте любую информацию о нормативно-технической документации (НТД) со 100% точностью при помощи ИИ модели AIGINEER'

	with gr.Blocks(css=css_code) as demo:
	gr.Markdown(heading_text, elem_id='centered-text')
	gr.Markdown(subheading_text, elem_id='centered-text')
	with gr.Row(scale=1):
	with gr.Column():
	query_input = gr.Textbox(interactive=True, label='Вопрос', lines=5, placeholder="Спросите что-нибудь о документе...")
	with gr.Row():
	clear_button = gr.ClearButton(components=[query_input], variant='secondary', value='Очистить')
	submit_button = gr.Button(variant='primary', value='Отправить')
	#with gr.Column():
	# count_tokens_output = gr.TextArea(interactive=False, label='Стоимость запроса в токенах')
	# count_tokens_button = gr.Button(variant='secondary', value='Посчитать стоимость в токенах')
	response_output = gr.TextArea(interactive=True, label='Ответ', lines=8, placeholder='Тут будет отображаться ответ.')

	submit_button.click(fn=chat_with_doc, inputs=query_input, outputs=response_output)
	#count_tokens_button.click(fn=lambda text_input: count_tokens(text_input, llm_model_name), inputs=[query_input], outputs=[count_tokens_output])

	# Launch the Gradio app
	if __name__ == "__main__":
	# Uncomment to run as CLI
	#query = input(f"Спросите что нибудь о документе {document_name}: ")
	#result = chat_with_doc(query)
	#print(result)

	# Run Gradio app
	demo.launch()