Spaces:
Sleeping
Sleeping
Ilyas KHIAT
commited on
Commit
·
55b4c5a
1
Parent(s):
6db239f
delete docs
Browse files- main.py +17 -8
- prompts.py +2 -0
- rag.py +15 -5
main.py
CHANGED
|
@@ -8,10 +8,13 @@ import os
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from rag import *
|
| 10 |
from fastapi.responses import StreamingResponse
|
|
|
|
|
|
|
| 11 |
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
|
|
|
|
| 15 |
|
| 16 |
pc = Pinecone(api_key=pinecone_api_key)
|
| 17 |
|
|
@@ -84,14 +87,15 @@ async def upload_file(file: UploadFile, enterprise_data: Json[EnterpriseData]):
|
|
| 84 |
text_chunks = get_text_chunks(text)
|
| 85 |
|
| 86 |
# Create a vector store
|
| 87 |
-
vector_store = get_vectorstore(text_chunks, filename=file.filename, file_type="pdf", namespace=enterprise_data.id, index=index)
|
| 88 |
|
| 89 |
if vector_store:
|
| 90 |
return {
|
| 91 |
"file_name":file.filename,
|
| 92 |
"enterprise_id": enterprise_data.id,
|
| 93 |
"number_of_chunks": len(text_chunks),
|
| 94 |
-
"filename_id":vector_store["filename_id"]
|
|
|
|
| 95 |
}
|
| 96 |
else:
|
| 97 |
raise HTTPException(status_code=500, detail="Could not create vector store")
|
|
@@ -137,11 +141,11 @@ import asyncio
|
|
| 137 |
|
| 138 |
GENERATION_TIMEOUT_SEC = 60
|
| 139 |
|
| 140 |
-
async def stream_generator(response):
|
| 141 |
async with async_timeout.timeout(GENERATION_TIMEOUT_SEC):
|
| 142 |
try:
|
| 143 |
async for chunk in response:
|
| 144 |
-
yield "
|
| 145 |
except asyncio.TimeoutError:
|
| 146 |
raise HTTPException(status_code=504, detail="Stream timed out")
|
| 147 |
|
|
@@ -152,17 +156,22 @@ def generate_answer(user_input: UserInput):
|
|
| 152 |
prompt = user_input.prompt
|
| 153 |
enterprise_id = user_input.enterprise_id
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
if not context:
|
| 157 |
context = "No context found"
|
| 158 |
|
| 159 |
if user_input.style_tonality is None:
|
| 160 |
-
answer = generate_response_via_langchain(prompt, model="gpt-4o",stream=user_input.stream,context = context , messages=user_input.messages)
|
| 161 |
else:
|
| 162 |
-
answer = generate_response_via_langchain(prompt, model="gpt-4o",stream=user_input.stream,context = context , messages=user_input.messages,style=user_input.style_tonality.style,tonality=user_input.style_tonality.tonality)
|
| 163 |
|
| 164 |
if user_input.stream:
|
| 165 |
-
return StreamingResponse(answer, media_type="application/json")
|
| 166 |
|
| 167 |
return {
|
| 168 |
"prompt": prompt,
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from rag import *
|
| 10 |
from fastapi.responses import StreamingResponse
|
| 11 |
+
import json
|
| 12 |
+
from prompts import *
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
|
| 17 |
+
common_namespace = os.environ.get("COMMON_NAMESPACE")
|
| 18 |
|
| 19 |
pc = Pinecone(api_key=pinecone_api_key)
|
| 20 |
|
|
|
|
| 87 |
text_chunks = get_text_chunks(text)
|
| 88 |
|
| 89 |
# Create a vector store
|
| 90 |
+
vector_store = get_vectorstore(text_chunks, filename=file.filename, file_type="pdf", namespace=enterprise_data.id, index=index,enterprise_name=enterprise_name)
|
| 91 |
|
| 92 |
if vector_store:
|
| 93 |
return {
|
| 94 |
"file_name":file.filename,
|
| 95 |
"enterprise_id": enterprise_data.id,
|
| 96 |
"number_of_chunks": len(text_chunks),
|
| 97 |
+
"filename_id":vector_store["filename_id"],
|
| 98 |
+
"enterprise_name":enterprise_name
|
| 99 |
}
|
| 100 |
else:
|
| 101 |
raise HTTPException(status_code=500, detail="Could not create vector store")
|
|
|
|
| 141 |
|
| 142 |
GENERATION_TIMEOUT_SEC = 60
|
| 143 |
|
| 144 |
+
async def stream_generator(response,prompt):
|
| 145 |
async with async_timeout.timeout(GENERATION_TIMEOUT_SEC):
|
| 146 |
try:
|
| 147 |
async for chunk in response:
|
| 148 |
+
yield json.dumps({"prompt": prompt, "content": chunk})
|
| 149 |
except asyncio.TimeoutError:
|
| 150 |
raise HTTPException(status_code=504, detail="Stream timed out")
|
| 151 |
|
|
|
|
| 156 |
prompt = user_input.prompt
|
| 157 |
enterprise_id = user_input.enterprise_id
|
| 158 |
|
| 159 |
+
template_prompt = base_template
|
| 160 |
+
|
| 161 |
+
context = get_retreive_answer(enterprise_id, prompt, index, common_namespace)
|
| 162 |
+
|
| 163 |
+
#final_prompt_simplified = prompt_formatting(prompt,template,context)
|
| 164 |
+
|
| 165 |
if not context:
|
| 166 |
context = "No context found"
|
| 167 |
|
| 168 |
if user_input.style_tonality is None:
|
| 169 |
+
answer = generate_response_via_langchain(prompt, model="gpt-4o",stream=user_input.stream,context = context , messages=user_input.messages,template=template_prompt)
|
| 170 |
else:
|
| 171 |
+
answer = generate_response_via_langchain(prompt, model="gpt-4o",stream=user_input.stream,context = context , messages=user_input.messages,style=user_input.style_tonality.style,tonality=user_input.style_tonality.tonality,template=template_prompt)
|
| 172 |
|
| 173 |
if user_input.stream:
|
| 174 |
+
return StreamingResponse(stream_generator(answer,prompt), media_type="application/json")
|
| 175 |
|
| 176 |
return {
|
| 177 |
"prompt": prompt,
|
prompts.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_template = "Vous êtes un spécialiste de la communication marketing responsable avec une expertise en stratégie de communication numérique, en gestion des médias sociaux, en création de contenu, en notoriété de la marque et en engagement communautaire, réponds avec un style {style} et une tonalité {tonality} dans ta communcation pour l'entreprise {entreprise}, sachant le context des documents suivants, {context}, et l'historique de la conversation, {messages}, {query}"
|
| 2 |
+
|
rag.py
CHANGED
|
@@ -37,7 +37,7 @@ def get_text_chunks(text):
|
|
| 37 |
chunks = text_splitter.split_text(text)
|
| 38 |
return chunks
|
| 39 |
|
| 40 |
-
def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
| 41 |
try:
|
| 42 |
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 43 |
vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=namespace)
|
|
@@ -52,7 +52,7 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
|
| 52 |
|
| 53 |
document = Document(
|
| 54 |
page_content=chunk,
|
| 55 |
-
metadata={"filename":filename,"file_type":file_type, "filename_id":clean_filename},
|
| 56 |
)
|
| 57 |
|
| 58 |
uuid = f"{clean_filename}_{i}"
|
|
@@ -83,7 +83,7 @@ def get_retreive_answer(enterprise_id,prompt,index,common_id):
|
|
| 83 |
|
| 84 |
retriever_commun = vector_store_commun.as_retriever(
|
| 85 |
search_type="similarity_score_threshold",
|
| 86 |
-
search_kwargs={"k": 3, "score_threshold": 0.
|
| 87 |
)
|
| 88 |
|
| 89 |
response = retriever.invoke(prompt) + retriever_commun.invoke(prompt)
|
|
@@ -97,9 +97,19 @@ def get_retreive_answer(enterprise_id,prompt,index,common_id):
|
|
| 97 |
print(e)
|
| 98 |
return False
|
| 99 |
|
| 100 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
# Define the prompt template
|
| 102 |
-
template
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
prompt = PromptTemplate.from_template(template)
|
|
|
|
| 37 |
chunks = text_splitter.split_text(text)
|
| 38 |
return chunks
|
| 39 |
|
| 40 |
+
def get_vectorstore(text_chunks,filename, file_type,namespace,index,enterprise_name):
|
| 41 |
try:
|
| 42 |
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 43 |
vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=namespace)
|
|
|
|
| 52 |
|
| 53 |
document = Document(
|
| 54 |
page_content=chunk,
|
| 55 |
+
metadata={"filename":filename,"file_type":file_type, "filename_id":clean_filename, "entreprise_name":enterprise_name},
|
| 56 |
)
|
| 57 |
|
| 58 |
uuid = f"{clean_filename}_{i}"
|
|
|
|
| 83 |
|
| 84 |
retriever_commun = vector_store_commun.as_retriever(
|
| 85 |
search_type="similarity_score_threshold",
|
| 86 |
+
search_kwargs={"k": 3, "score_threshold": 0.5},
|
| 87 |
)
|
| 88 |
|
| 89 |
response = retriever.invoke(prompt) + retriever_commun.invoke(prompt)
|
|
|
|
| 97 |
print(e)
|
| 98 |
return False
|
| 99 |
|
| 100 |
+
def reformat_prompt(prompt,enterprise_name,context,messages,query):
|
| 101 |
+
docs_names = []
|
| 102 |
+
for chunk in context:
|
| 103 |
+
chunk_name = chunk["metadata"]["filename"]
|
| 104 |
+
if chunk_name not in docs_names:
|
| 105 |
+
docs_names.append(chunk_name)
|
| 106 |
+
context = ", ".join(docs_names)
|
| 107 |
+
return prompt.format(entreprise=enterprise_name,context=context,messages=messages,query=query)
|
| 108 |
+
|
| 109 |
+
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = [],style:str="formal",tonality:str="neutral",template:str = ""):
|
| 110 |
# Define the prompt template
|
| 111 |
+
if template == "":
|
| 112 |
+
template = "En tant qu'IA experte en marketing, réponds avec un style {style} et une tonalité {tonality} dans ta communcation, sachant le context suivant: {context}, et l'historique de la conversation, {messages}, {query}"
|
| 113 |
|
| 114 |
|
| 115 |
prompt = PromptTemplate.from_template(template)
|