jseims commited on
Commit
c261abe
·
1 Parent(s): d5f0119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -43
app.py CHANGED
@@ -1,47 +1,129 @@
1
  import chainlit as cl
2
- from langchain.embeddings.openai import OpenAIEmbeddings
3
- from langchain.document_loaders.csv_loader import CSVLoader
4
- from langchain.embeddings import CacheBackedEmbeddings
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.vectorstores import FAISS
7
- from langchain.chains import RetrievalQA
8
- from langchain.chat_models import ChatOpenAI
9
- from langchain.storage import LocalFileStore
10
- from langchain.prompts.chat import (
11
- ChatPromptTemplate,
12
- SystemMessagePromptTemplate,
13
- HumanMessagePromptTemplate,
 
 
 
 
14
  )
15
- import chainlit as cl
 
 
 
 
 
 
16
 
17
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
18
 
19
- system_template = """
20
- Use the following pieces of context to answer the user's question.
21
- Please respond as if you were Ken from the movie Barbie. Ken is a well-meaning but naive character who loves to Beach. He talks like a typical Californian Beach Bro, but he doesn't use the word "Dude" so much.
22
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
23
- You can make inferences based on the context as long as it still faithfully represents the feedback.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- Example of your response should be:
 
26
 
27
- ```
28
- The answer is foo
29
- ```
30
 
31
- Begin!
32
- ----------------
33
- {context}"""
34
 
35
- messages = [
36
- SystemMessagePromptTemplate.from_template(system_template),
37
- HumanMessagePromptTemplate.from_template("{question}"),
38
- ]
39
- prompt = ChatPromptTemplate(messages=messages)
40
- chain_type_kwargs = {"prompt": prompt}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  @cl.author_rename
43
  def rename(orig_author: str):
44
- rename_dict = {"RetrievalQA": "Consulting The Kens"}
45
  return rename_dict.get(orig_author, orig_author)
46
 
47
  @cl.on_chat_start
@@ -49,17 +131,13 @@ async def init():
49
  msg = cl.Message(content=f"Building Index...")
50
  await msg.send()
51
 
52
- # build FAISS index from csv
53
- loader = CSVLoader(file_path="./data/barbie.csv", source_column="Review_Url")
54
- data = loader.load()
55
- documents = text_splitter.transform_documents(data)
56
- store = LocalFileStore("./cache/")
57
- core_embeddings_model = OpenAIEmbeddings()
58
- embedder = CacheBackedEmbeddings.from_bytes_store(
59
- core_embeddings_model, store, namespace=core_embeddings_model.model
60
- )
61
- # make async docsearch
62
- docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)
63
 
64
  chain = RetrievalQA.from_chain_type(
65
  ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True),
 
1
  import chainlit as cl
2
+ from llama_index import ServiceContext
3
+ from llama_index.node_parser.simple import SimpleNodeParser
4
+ from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
5
+ from llama_index.llms import OpenAI
6
+ from llama_index.embeddings.openai import OpenAIEmbedding
7
+ from llama_index import VectorStoreIndex
8
+ from llama_index.vector_stores import ChromaVectorStore
9
+ from llama_index.storage.storage_context import StorageContext
10
+ import chromadb
11
+ from llama_index.readers.wikipedia import WikipediaReader
12
+ from llama_index.tools import FunctionTool
13
+ from llama_index.vector_stores.types import (
14
+ VectorStoreInfo,
15
+ MetadataInfo,
16
+ ExactMatchFilter,
17
+ MetadataFilters,
18
  )
19
+ from llama_index.retrievers import VectorIndexRetriever
20
+ from llama_index.query_engine import RetrieverQueryEngine
21
+
22
+ from typing import List, Tuple, Any
23
+ from pydantic import BaseModel, Field
24
+ from llama_index.agent import OpenAIAgent
25
+
26
 
27
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
28
 
29
+ embed_model = OpenAIEmbedding()
30
+ chunk_size = 1000
31
+ llm = OpenAI(
32
+ temperature=0,
33
+ model="gpt-3.5-turbo",
34
+ streaming=True
35
+ )
36
+
37
+ service_context = ServiceContext.from_defaults(
38
+ llm=llm,
39
+ chunk_size=chunk_size,
40
+ embed_model=embed_model
41
+ )
42
+
43
+ text_splitter = TokenTextSplitter(
44
+ chunk_size=chunk_size
45
+ )
46
+
47
+ node_parser = SimpleNodeParser(
48
+ text_splitter=text_splitter
49
+ )
50
 
51
+ chroma_client = chromadb.Client()
52
+ chroma_collection = chroma_client.create_collection("wikipedia_barbie_opp")
53
 
54
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
55
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
56
+ wiki_vector_index = VectorStoreIndex([], storage_context=storage_context, service_context=service_context)
57
 
58
+ movie_list = ["Barbie (film)", "Oppenheimer (film)"]
 
 
59
 
60
+ wiki_docs = WikipediaReader().load_data(pages=movie_list, auto_suggest=False)
61
+
62
+ top_k = 3
63
+ vector_store_info = VectorStoreInfo(
64
+ content_info="semantic information about movies",
65
+ metadata_info=[MetadataInfo(
66
+ name="title",
67
+ type="str",
68
+ description="title of the movie, one of [Barbie (film), Oppenheimer (film)]",
69
+ )]
70
+ )
71
+
72
+ class AutoRetrieveModel(BaseModel):
73
+ query: str = Field(..., description="natural language query string")
74
+ filter_key_list: List[str] = Field(
75
+ ..., description="List of metadata filter field names"
76
+ )
77
+ filter_value_list: List[str] = Field(
78
+ ...,
79
+ description=(
80
+ "List of metadata filter field values (corresponding to names specified in filter_key_list)"
81
+ )
82
+ )
83
+
84
+ def auto_retrieve_fn(
85
+ query: str, filter_key_list: List[str], filter_value_list: List[str]
86
+ ):
87
+ """Auto retrieval function.
88
+
89
+ Performs auto-retrieval from a vector database, and then applies a set of filters.
90
+
91
+ """
92
+ query = query or "Query"
93
+
94
+ exact_match_filters = [
95
+ ExactMatchFilter(key=k, value=v)
96
+ for k, v in zip(filter_key_list, filter_value_list)
97
+ ]
98
+ retriever = VectorIndexRetriever(
99
+ wiki_vector_index, filters=MetadataFilters(filters=exact_match_filters), top_k=top_k
100
+ )
101
+ query_engine = RetrieverQueryEngine.from_args(retriever)
102
+
103
+ response = query_engine.query(query)
104
+ return str(response)
105
+
106
+ description = f"""\
107
+ Use this tool to look up semantic information about films.
108
+ The vector database schema is given below:
109
+ {vector_store_info.json()}
110
+ """
111
+
112
+ auto_retrieve_tool = FunctionTool.from_defaults(
113
+ fn=auto_retrieve_fn,
114
+ name="auto_retrieve_tool",
115
+ description=description,
116
+ fn_schema=AutoRetrieveModel,
117
+ )
118
+
119
+
120
+ agent = OpenAIAgent.from_tools(
121
+ [auto_retrieve_tool], llm=llm, verbose=True
122
+ )
123
 
124
  @cl.author_rename
125
  def rename(orig_author: str):
126
+ rename_dict = {"RetrievalQA": "Consulting The Llamaindex Tools"}
127
  return rename_dict.get(orig_author, orig_author)
128
 
129
  @cl.on_chat_start
 
131
  msg = cl.Message(content=f"Building Index...")
132
  await msg.send()
133
 
134
+ for movie, wiki_doc in zip(movie_list, wiki_docs):
135
+ nodes = node_parser.get_nodes_from_documents([wiki_doc])
136
+ for node in nodes:
137
+ node.metadata = {'title' : movie}
138
+ wiki_vector_index.insert_nodes(nodes)
139
+
140
+
 
 
 
 
141
 
142
  chain = RetrievalQA.from_chain_type(
143
  ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True),