File size: 4,324 Bytes
67543b3 f2144bd 67543b3 b22cd20 ad752e9 67543b3 ad752e9 64c43f8 09cb57f 761832e 607b843 09cb57f 607b843 ad752e9 67543b3 ad752e9 9267818 ad752e9 67543b3 e0853cd 67543b3 cf0f32a 4e66e59 67543b3 3e3b032 67543b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
from llama_index import LLMPredictor, PromptHelper, StorageContext, ServiceContext, load_index_from_storage, SimpleDirectoryReader, GPTVectorStoreIndex
from langchain.chat_models import ChatOpenAI
import gradio as gr
import sys
import os
import openai
from ratelimit import limits, sleep_and_retry
from langchain import HuggingFaceHub
# fixing bugs
# 1. open ai key: https://stackoverflow.com/questions/76425556/tenacity-retryerror-retryerrorfuture-at-0x7f89bc35eb90-state-finished-raised
# 2. rate limit error in lang_chain default version - install langchain==0.0.188. https://github.com/jerryjliu/llama_index/issues/924
# 3. added true Config variable in langchain: https://github.com/pydantic/pydantic/issues/3320
# 4. deploy on huggingfaces https://huggingface.co/welcome
# create huggingfaces token https://huggingface.co/settings/tokens
# login: huggingface-cli login
# add requirements.txt file https://huggingface.co/docs/hub/spaces-dependencies
os.environ["OPENAI_API_KEY"] = os.environ.get("openai_key")
openai.api_key = os.environ["OPENAI_API_KEY"]
# Define the rate limit for API calls (requests per second)
RATE_LIMIT = 3
# Implement the rate limiting decorator
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def create_service_context():
# prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
# max_input_size = 4096
# num_outputs = 512
# max_chunk_overlap = 20
# chunk_size_limit = 600
# prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit)
#LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
# Constraint parameters
# max_input_size = 32768 # gpt-4-32k
# num_outputs = 30000 # gpt-4-32k
# num_outputs = 3500 # max 4096 # gpt-4o
# max_input_size = 8192 # gpt-4
# num_outputs = 7500 # gpt-4
# max_input_size = 16384 # gpt-3.5-turbo-0125
# num_outputs = 15000 # gpt-3.5-turbo-0125
max_input_size = 4096
num_outputs = 3300
max_chunk_overlap = 15
chunk_size_limit = 600
# Allows the user to explicitly set certain constraint parameters
prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio=0.1, chunk_size_limit=chunk_size_limit)
# LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) # gpt-4-32k
#constructs service_context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
return service_context
# Implement the rate limiting decorator
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def data_ingestion_indexing(directory_path):
#loads data from the specified directory path
documents = SimpleDirectoryReader(directory_path).load_data()
#when first building the index
index = GPTVectorStoreIndex.from_documents(
documents, service_context=create_service_context()
)
#persist index to disk, default "storage" folder
index.storage_context.persist()
return index
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def data_querying(input_text):
#rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
#loads index from storage
index = load_index_from_storage(storage_context, service_context=create_service_context())
#queries the index with the input text
response = index.as_query_engine().query(input_text)
return response.response
iface = gr.Interface(fn=data_querying,
inputs=gr.components.Textbox(lines=20, label="Enter your question"),
outputs=gr.components.Textbox(lines=25, label="Response", style="height: 400px; overflow-y: scroll;"),
title="Legi GPT - Monitorul oficial 25.06 - 5.07.2024, 157 pdfs, 150mb")
#passes in data directory
index = data_ingestion_indexing("books-philosophy")
iface.launch(inline=True)
|