Spaces:
Sleeping
Sleeping
| import os | |
| os.environ['TOKENIZERS_PARALLELISM'] = 'true' | |
| from dotenv import load_dotenv | |
| load_dotenv() # load .env api keys | |
| mistral_api_key = os.getenv("MISTRAL_API_KEY") | |
| print("mistral_api_key", mistral_api_key) | |
| import pandas as pd | |
| from langchain.output_parsers import PandasDataFrameOutputParser | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_mistralai import MistralAIEmbeddings | |
| from langchain import hub | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| from typing import Literal | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_mistralai import ChatMistralAI | |
| from pathlib import Path | |
| from langchain.retrievers import ( | |
| MergerRetriever, | |
| ) | |
| import pprint | |
| from typing import Any, Dict | |
| from huggingface_hub import login | |
| login(token=os.getenv("HUGGING_FACE_TOKEN")) | |
| def load_chunk_persist_pdf(task) -> Chroma: | |
| pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) | |
| documents = [] | |
| for file in os.listdir(pdf_folder_path): | |
| if file.endswith('.pdf'): | |
| pdf_path = os.path.join(pdf_folder_path, file) | |
| loader = PyPDFLoader(pdf_path) | |
| documents.extend(loader.load()) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
| chunked_documents = text_splitter.split_documents(documents) | |
| os.makedirs("data/chroma_store/", exist_ok=True) | |
| vectorstore = Chroma.from_documents( | |
| documents=chunked_documents, | |
| embedding=MistralAIEmbeddings(), | |
| persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) | |
| ) | |
| vectorstore.persist() | |
| return vectorstore | |
| df = pd.DataFrame( | |
| { | |
| "exercise": ["Squat","Bench Press","Lunges","Pull ups"], | |
| "sets": [4, 4, 3, 3], | |
| "repetitions": [10, 8, 8, 8], | |
| "rest":["2:30","2:00","1:30","2:00"] | |
| } | |
| ) | |
| # parser = PandasDataFrameOutputParser(dataframe=df) | |
| # personal_info_vectorstore = load_chunk_persist_pdf("personal_info") | |
| # zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") | |
| # bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") | |
| # nutrition_vectorstore = load_chunk_persist_pdf("nutrition") | |
| # workout_vectorstore = load_chunk_persist_pdf("workout") | |
| # zero2hero_retriever = zero2hero_vectorstore.as_retriever() | |
| # nutrition_retriever = nutrition_vectorstore.as_retriever() | |
| # bodyweight_retriever = bodyweight_vectorstore.as_retriever() | |
| # workout_retriever = workout_vectorstore.as_retriever() | |
| # personal_info_retriever = personal_info_vectorstore.as_retriever() | |
| llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) | |
| # prompt = PromptTemplate( | |
| # template=""" | |
| # You are a professional AI coach specialized in building fitness plans, full workout programs. | |
| # You must adapt to the user according to personal informations in the context. A You are gentle and motivative. | |
| # Use the following pieces of retrieved context to answer the user's query. | |
| # Context: {context} | |
| # \n{format_instructions}\n{question}\n | |
| # """, | |
| # input_variables=["question","context"], | |
| # partial_variables={"format_instructions": parser.get_format_instructions()}, | |
| # ) | |
| # def format_docs(docs): | |
| # return "\n\n".join(doc.page_content for doc in docs) | |
| # def format_parser_output(parser_output: Dict[str, Any]) -> None: | |
| # for key in parser_output.keys(): | |
| # parser_output[key] = parser_output[key].to_dict() | |
| # return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output) | |
| # retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever]) | |
| # chain = ( | |
| # {"context": zero2hero_retriever | format_docs, "question": RunnablePassthrough()} | |
| # | prompt | |
| # | llm | |
| # | parser | |
| # ) | |
| # # chain = prompt | llm | parser | |
| # format_parser_output(chain.invoke("Build me a full body workout plan for summer body.")) | |
| from pydantic import BaseModel, Field | |
| from typing import List | |
| from langchain_core.output_parsers import JsonOutputParser | |
| class Exercise(BaseModel): | |
| exercice: str = Field(description="Name of the exercise") | |
| nombre_series: int = Field(description="Number of sets for the exercise") | |
| nombre_repetitions: int = Field(description="Number of repetitions for the exercise") | |
| temps_repos: str = Field(description="Rest time between sets") | |
| class MusculationProgram(BaseModel): | |
| exercises: List[Exercise] | |
| from langchain.prompts import PromptTemplate | |
| # Define your query to get a musculation program. | |
| musculation_query = "Provide a musculation program with exercises, number of sets, number of repetitions, and rest time between sets." | |
| # Set up a parser + inject instructions into the prompt template. | |
| parser = JsonOutputParser(pydantic_object=MusculationProgram) | |
| prompt = PromptTemplate( | |
| template="Answer the user query.\n{format_instructions}\n{query}\n", | |
| input_variables=["query"], | |
| partial_variables={"format_instructions": parser.get_format_instructions()}, | |
| ) | |
| # Set up a chain to invoke the language model with the prompt and parser. | |
| workout_chain = prompt | llm | parser | |