HarshaBattula
commited on
Commit
·
f401ee6
1
Parent(s):
a544212
replaced gpt-3.5-turbo with LLaMA-2.0-chat
Browse files- README.md +4 -4
- chain.py +29 -29
- credentials.json +1 -0
- requirements.txt +2 -1
README.md
CHANGED
@@ -10,17 +10,17 @@ pinned: false
|
|
10 |
license: unknown
|
11 |
---
|
12 |
|
13 |
-
# Document Retrieval Augmented Language Model with LangChain and
|
14 |
|
15 |
## Description
|
16 |
|
17 |
-
This project involves the creation of a vector database using OpenAI embeddings and Chroma DB, followed by the retrieval of document snippets through a similarity search with LangChain's retrieval system. Upon retrieval of relevant snippets, the system uses
|
18 |
|
19 |
## Contents
|
20 |
|
21 |
1. **OpenAI Embeddings and Chroma DB**: Utilizes the rich semantic information in OpenAI embeddings and the efficient storage and retrieval capabilities of Chroma DB to create a performant and effective vector database.
|
22 |
2. **Document Retrieval**: Uses LangChain's retrieval system to perform similarity search and retrieve relevant snippets from documents based on input queries.
|
23 |
-
3. **Response Generation with
|
24 |
4. **ConversationBufferMemory**: Stores the history of the conversation to ensure context continuity and enhance the relevance of the responses generated.
|
25 |
|
26 |
## Getting Started
|
@@ -28,7 +28,7 @@ This project involves the creation of a vector database using OpenAI embeddings
|
|
28 |
### Prerequisites
|
29 |
Before you begin, ensure you have met the following requirements:
|
30 |
- You have installed Python 3.x.
|
31 |
-
- You have access to
|
32 |
- You have set up Chroma DB on your server/machine, and the documents in the database.
|
33 |
- You have access to LangChain's retrieval system.
|
34 |
|
|
|
10 |
license: unknown
|
11 |
---
|
12 |
|
13 |
+
# Document Retrieval Augmented Language Model version 2.0 with LangChain and Meta's LLaMA-2.0 Chat.
|
14 |
|
15 |
## Description
|
16 |
|
17 |
+
This project involves the creation of a vector database using OpenAI embeddings and Chroma DB, followed by the retrieval of document snippets through a similarity search with LangChain's retrieval system. Upon retrieval of relevant snippets, the system uses LLaMA-2.0 to generate responses to input questions using the retrieved snippets as context. The system also incorporates a ConversationBufferMemory to store the memory of the chat, enhancing the quality of the conversational context and the relevance of generated responses.
|
18 |
|
19 |
## Contents
|
20 |
|
21 |
1. **OpenAI Embeddings and Chroma DB**: Utilizes the rich semantic information in OpenAI embeddings and the efficient storage and retrieval capabilities of Chroma DB to create a performant and effective vector database.
|
22 |
2. **Document Retrieval**: Uses LangChain's retrieval system to perform similarity search and retrieve relevant snippets from documents based on input queries.
|
23 |
+
3. **Response Generation with LLaMA-2.0**: Leverages the advanced language understanding and generation capabilities of LLaMA-2.0 to generate responses to input questions using Langchain's `RetrievalQA`.
|
24 |
4. **ConversationBufferMemory**: Stores the history of the conversation to ensure context continuity and enhance the relevance of the responses generated.
|
25 |
|
26 |
## Getting Started
|
|
|
28 |
### Prerequisites
|
29 |
Before you begin, ensure you have met the following requirements:
|
30 |
- You have installed Python 3.x.
|
31 |
+
- You have access to Meta's LLaMA-2.0 and relevant API credentials.
|
32 |
- You have set up Chroma DB on your server/machine, and the documents in the database.
|
33 |
- You have access to LangChain's retrieval system.
|
34 |
|
chain.py
CHANGED
@@ -1,17 +1,21 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
from langchain.chat_models import ChatOpenAI
|
4 |
-
from langchain.chains import RetrievalQA
|
5 |
import openai
|
6 |
-
from langchain import HuggingFacePipeline
|
7 |
-
from transformers import AutoTokenizer
|
8 |
import transformers
|
9 |
-
import
|
|
|
10 |
from huggingface_hub import login
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
login(token = access_token_read)
|
13 |
|
14 |
-
openai.api_key = "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"
|
15 |
|
16 |
def create_juniper_prompt_template():
|
17 |
template = """You are a network engineer from Juniper Networks not a Language Model, use your knowledge, and the some pieces of context (delimited by <ctx></ctx>) to answer the user's question. \n Try to pretend as if you are a member of Juniper Networks. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.
|
@@ -41,7 +45,7 @@ def create_question_answering_chain(retriever):
|
|
41 |
Create a retrieval question answering (QA) chain.
|
42 |
|
43 |
This function initializes a QA chain that can be used to answer questions based on retrieved documents.
|
44 |
-
It uses the
|
45 |
relevant documents.
|
46 |
|
47 |
Args:
|
@@ -50,32 +54,29 @@ def create_question_answering_chain(retriever):
|
|
50 |
Returns:
|
51 |
qa_chain (obj): The initialized retrieval QA chain.
|
52 |
"""
|
53 |
-
# Initialize the
|
54 |
-
|
55 |
-
|
56 |
-
access_token = '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'
|
57 |
-
tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
|
58 |
|
59 |
pipeline = transformers.pipeline(
|
60 |
-
"text-generation",
|
61 |
-
model=
|
62 |
-
tokenizer=tokenizer,
|
63 |
-
torch_dtype=torch.bfloat16,
|
64 |
-
trust_remote_code=True,
|
65 |
-
device_map="auto",
|
66 |
-
max_length=1000,
|
67 |
-
do_sample=True,
|
68 |
-
top_k=10,
|
69 |
-
num_return_sequences=1,
|
70 |
-
eos_token_id=tokenizer.eos_token_id,
|
71 |
)
|
72 |
|
73 |
-
|
74 |
|
75 |
# Initialize the retrieval QA chain with the language model, chain type, document retriever,
|
76 |
# and a flag indicating whether to return source documents.
|
77 |
qa_chain = RetrievalQA.from_chain_type(
|
78 |
-
llm=
|
79 |
chain_type='stuff',
|
80 |
retriever=retriever,
|
81 |
verbose=False,
|
@@ -88,5 +89,4 @@ def create_question_answering_chain(retriever):
|
|
88 |
}
|
89 |
)
|
90 |
|
91 |
-
|
92 |
return qa_chain
|
|
|
1 |
+
import json
|
2 |
+
import torch
|
|
|
|
|
3 |
import openai
|
|
|
|
|
4 |
import transformers
|
5 |
+
from transformers import AutoTokenizer
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
from huggingface_hub import login
|
8 |
+
from langchain import HuggingFacePipeline
|
9 |
+
from langchain.memory import ConversationBufferMemory
|
10 |
+
from langchain import PromptTemplate
|
11 |
+
|
12 |
+
|
13 |
+
with open("credentials.json", "r") as file:
|
14 |
+
access_token_read = json.load(file)["access_token_read"]
|
15 |
+
openai.api_key = json.load(file)["openai_api_key"]
|
16 |
+
|
17 |
login(token = access_token_read)
|
18 |
|
|
|
19 |
|
20 |
def create_juniper_prompt_template():
|
21 |
template = """You are a network engineer from Juniper Networks not a Language Model, use your knowledge, and the some pieces of context (delimited by <ctx></ctx>) to answer the user's question. \n Try to pretend as if you are a member of Juniper Networks. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.
|
|
|
45 |
Create a retrieval question answering (QA) chain.
|
46 |
|
47 |
This function initializes a QA chain that can be used to answer questions based on retrieved documents.
|
48 |
+
It uses the Meta's 'LLaMA-2-chat' model for the language model (LLM), and a document retriever for finding
|
49 |
relevant documents.
|
50 |
|
51 |
Args:
|
|
|
54 |
Returns:
|
55 |
qa_chain (obj): The initialized retrieval QA chain.
|
56 |
"""
|
57 |
+
# Initialize the tokenizer and the language model.
|
58 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=access_token_read)
|
|
|
|
|
|
|
59 |
|
60 |
pipeline = transformers.pipeline(
|
61 |
+
"text-generation",
|
62 |
+
model = "meta-llama/Llama-2-7b-chat-hf",
|
63 |
+
tokenizer = tokenizer,
|
64 |
+
torch_dtype = torch.bfloat16,
|
65 |
+
trust_remote_code = True,
|
66 |
+
device_map = "auto",
|
67 |
+
max_length = 1000,
|
68 |
+
do_sample = True,
|
69 |
+
top_k = 10,
|
70 |
+
num_return_sequences = 1,
|
71 |
+
eos_token_id = tokenizer.eos_token_id,
|
72 |
)
|
73 |
|
74 |
+
hf_llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})
|
75 |
|
76 |
# Initialize the retrieval QA chain with the language model, chain type, document retriever,
|
77 |
# and a flag indicating whether to return source documents.
|
78 |
qa_chain = RetrievalQA.from_chain_type(
|
79 |
+
llm=hf_llm,
|
80 |
chain_type='stuff',
|
81 |
retriever=retriever,
|
82 |
verbose=False,
|
|
|
89 |
}
|
90 |
)
|
91 |
|
|
|
92 |
return qa_chain
|
credentials.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"access_token_read": "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", "openai_api_key": "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"}
|
requirements.txt
CHANGED
@@ -10,4 +10,5 @@ langchain
|
|
10 |
pypdf
|
11 |
gradio
|
12 |
einops
|
13 |
-
bitsandbytes
|
|
|
|
10 |
pypdf
|
11 |
gradio
|
12 |
einops
|
13 |
+
bitsandbytes
|
14 |
+
json
|