lamm-mit
/

BioinspiredLLM

@@ -75,27 +75,9 @@ The figure below shows results from knowledge recall evaluation experiments of B
 ### Retrieval Augmented Generation (RAG)
-```
-import chromadb
-from llama_index import VectorStoreIndex, SimpleDirectoryReader
-from chromadb.config import Settings
-from llama_index.vector_stores import ChromaVectorStore
-from llama_index.storage.storage_context import StorageContext
-coll_name="Bioinspired"
-coll_path='./Bioinspired_Chroma'    ## PATH TO CHROMA DATABASE
-client = chromadb.PersistentClient(path=coll_path)
-collection = client.get_collection (name=coll_name,)
-db2 = chromadb.PersistentClient(path=coll_path)
-chroma_collection = db2.get_or_create_collection(coll_name)
-vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
-chroma_collection.count()
-```
-Set up BioinspiredLMM as custom LLM:
 ```
@@ -124,6 +106,29 @@ llm_custom = HuggingFaceLLM(context_window=2048,
                     tokenizer=tokenizer)
 llm_custom.model_name='BioinspiredLLM'
 ```
 Set up custom LLM service context and vector store indedx:
 ```
 from llama_index.llms import LlamaCPP
@@ -134,12 +139,12 @@ from llama_index.llms.llama_utils import (
     )
 service_context = ServiceContext.from_defaults(
-   llm=llm_custom,
     chunk_size=1024,
     embed_model="local:BAAI/bge-large-en"
 )
 index = VectorStoreIndex.from_vector_store(
-    vector_store,
      service_context=service_context,
 )
 ```
@@ -158,3 +163,33 @@ question = "Which horn does not have tubules? A) big horn sheep B) pronghorn C)
 response = query_engine.query(question)
 display(Markdown(f"<b>{response}</b>"))
 ```

 ### Retrieval Augmented Generation (RAG)
+Example based on Llama Index.
+First, set up BioinspiredLMM as custom LLM:
 ```
                     tokenizer=tokenizer)
 llm_custom.model_name='BioinspiredLLM'
 ```
+Use Chroma database collection (for the purpose of this example it has already been created, load here):
+```
+import chromadb
+from llama_index import VectorStoreIndex, SimpleDirectoryReader
+from chromadb.config import Settings
+from llama_index.vector_stores import ChromaVectorStore
+from llama_index.storage.storage_context import StorageContext
+coll_name="Bioinspired"
+coll_path='./Bioinspired_Chroma'    ## PATH TO CHROMA DATABASE
+client = chromadb.PersistentClient(path=coll_path)
+collection = client.get_collection (name=coll_name,)
+db2 = chromadb.PersistentClient(path=coll_path)
+chroma_collection = db2.get_or_create_collection(coll_name)
+vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
+chroma_collection.count()
+```
 Set up custom LLM service context and vector store indedx:
 ```
 from llama_index.llms import LlamaCPP
     )
 service_context = ServiceContext.from_defaults(
+    llm=llm_custom,
     chunk_size=1024,
     embed_model="local:BAAI/bge-large-en"
 )
 index = VectorStoreIndex.from_vector_store(
+     vector_store,
      service_context=service_context,
 )
 ```
 response = query_engine.query(question)
 display(Markdown(f"<b>{response}</b>"))
 ```
+Alternatively, load new documents, here with all-mpnet-base-v2 embeddings:
+```
+from langchain.embeddings import HuggingFaceEmbeddings
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2",
+)
+documents_graph = SimpleDirectoryReader(
+            input_files=[
+            "./XXXXXXXXXX/XXXXX.pdf",
+                    ]
+    ).load_data()
+index_doc = VectorStoreIndex.from_documents(documents_graph, service_context=
+                                            service_context,
+                                            show_progress=True,
+                                            embeddings=embeddings,
+                                           )
+```
+Query:
+```
+question="Which rapid prototyping techniques would be useful for creating hierarchical, bio-inspired materials?"
+response = index_doc.as_query_engine(service_context=service_context,
+    response_mode="tree_summarize",
+    similarity_top_k=5,
+    ).query(question,
+    )
+print(response)
+```