MostafaMSP commited on
Commit
a9ce8d6
·
verified ·
1 Parent(s): 3e625da

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf filter=lfs diff=lfs merge=lfs -text
37
+ vectorstore/db_faiss/index.faiss filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 AI Anytime
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: NewChatBot1
3
- emoji: 👀
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.9.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: NewChatBot1
12
- ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+
2
+
3
+ # Llama2 Medical Bot
4
+
5
+ The Llama2 Medical Bot is a powerful tool designed to provide medical information by answering user queries using state-of-the-art language models and vector stores. This README will guide you through the setup and usage of the Llama2 Medical Bot.
6
+
7
+ ## Table of Contents
8
+
9
+ - [Introduction](#langchain-medical-bot)
10
+ - [Table of Contents](#table-of-contents)
11
+ - [Prerequisites](#prerequisites)
12
+ - [Installation](#installation)
13
+ - [Getting Started](#getting-started)
14
+ - [Usage](#usage)
15
+ - [Contributing](#contributing)
16
+ - [License](#license)
17
+
18
+ ## Prerequisites
19
+
20
+ Before you can start using the Llama2 Medical Bot, make sure you have the following prerequisites installed on your system:
21
+
22
+ - Python 3.6 or higher
23
+ - Required Python packages (you can install them using pip):
24
+ - langchain
25
+ - chainlit
26
+ - sentence-transformers
27
+ - faiss
28
+ - PyPDF2 (for PDF document loading)
29
+
30
+ ## Installation
31
+
32
+ 1. Clone this repository to your local machine.
33
+
34
+ ```bash
35
+ git clone https://github.com/your-username/langchain-medical-bot.git
36
+ cd langchain-medical-bot
37
+ ```
38
+
39
+ 2. Create a Python virtual environment (optional but recommended):
40
+
41
+ ```bash
42
+ python -m venv venv
43
+ source venv/bin/activate # On Windows, use: venv\Scripts\activate
44
+ ```
45
+
46
+ 3. Install the required Python packages:
47
+
48
+ ```bash
49
+ pip install -r requirements.txt
50
+ ```
51
+
52
+ 4. Download the required language models and data. Please refer to the Langchain documentation for specific instructions on how to download and set up the language model and vector store.
53
+
54
+ 5. Set up the necessary paths and configurations in your project, including the `DB_FAISS_PATH` variable and other configurations as per your needs.
55
+
56
+ ## Getting Started
57
+
58
+ To get started with the Llama2 Medical Bot, you need to:
59
+
60
+ 1. Set up your environment and install the required packages as described in the Installation section.
61
+
62
+ 2. Configure your project by updating the `DB_FAISS_PATH` variable and any other custom configurations in the code.
63
+
64
+ 3. Prepare the language model and data as per the Langchain documentation.
65
+
66
+ 4. Start the bot by running the provided Python script or integrating it into your application.
67
+
68
+ ## Usage
69
+
70
+ The Llama2 Medical Bot can be used for answering medical-related queries. To use the bot, you can follow these steps:
71
+
72
+ 1. Start the bot by running your application or using the provided Python script.
73
+
74
+ 2. Send a medical-related query to the bot.
75
+
76
+ 3. The bot will provide a response based on the information available in its database.
77
+
78
+ 4. If sources are found, they will be provided alongside the answer.
79
+
80
+ 5. The bot can be customized to return specific information based on the query and context provided.
81
+
82
+ ## Contributing
83
+
84
+ Contributions to the Llama2 Medical Bot are welcome! If you'd like to contribute to the project, please follow these steps:
85
+
86
+ 1. Fork the repository to your own GitHub account.
87
+
88
+ 2. Create a new branch for your feature or bug fix.
89
+
90
+ 3. Make your changes and ensure that the code passes all tests.
91
+
92
+ 4. Create a pull request to the main repository, explaining your changes and improvements.
93
+
94
+ 5. Your pull request will be reviewed, and if approved, it will be merged into the main codebase.
95
+
96
+ ## License
97
+
98
+ This project is licensed under the MIT License.
99
+
100
  ---
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ For more information on how to use, configure, and extend the Llama2 Medical Bot, please refer to the Langchain documentation or contact the project maintainers.
103
+
104
+ Happy coding with Llama2 Medical Bot! 🚀
chainlit.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Llama2 Med-Bot! 🚀🤖
2
+
3
+ Hi there, 👋 We're excited to have you on board. This is a powerful bot designed to help you ask queries related to your data/knowledge.
4
+
5
+ ## Useful Links 🔗
6
+
7
+ - **Data:** This is the data which has been used as a knowledge base. [Knowledge Base](https://docs.chainlit.io) 📚
8
+ - **Join AI Anytime Community:** Join our friendly [WhatsApp Group](https://discord.gg/ZThrUxbAYw) to ask questions, share your projects, and connect with other developers! 💬
9
+
10
+ Happy chatting! 💻😊
11
+
data/71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:753cd53b7a3020bbd91f05629b0e3ddcfb6a114d7bbedb22c2298b66f5dd00cc
3
+ size 16127037
ingest.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings import HuggingFaceEmbeddings
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+
6
+ DATA_PATH = 'data/'
7
+ DB_FAISS_PATH = 'vectorstore/db_faiss'
8
+
9
+ # Create vector database
10
+ def create_vector_db():
11
+ loader = DirectoryLoader(DATA_PATH,
12
+ glob='*.pdf',
13
+ loader_cls=PyPDFLoader)
14
+
15
+ documents = loader.load()
16
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
17
+ chunk_overlap=50)
18
+ texts = text_splitter.split_documents(documents)
19
+
20
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
21
+ model_kwargs={'device': 'cpu'})
22
+
23
+ db = FAISS.from_documents(texts, embeddings)
24
+ db.save_local(DB_FAISS_PATH)
25
+
26
+ if __name__ == "__main__":
27
+ create_vector_db()
28
+
model.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_community.llms import CTransformers
6
+ from langchain.chains import RetrievalQA
7
+ import chainlit as cl
8
+
9
+ DB_FAISS_PATH = 'vectorstore/db_faiss'
10
+
11
+ custom_prompt_template = """Use the following pieces of information to answer the user's question.
12
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
13
+
14
+ Context: {context}
15
+ Question: {question}
16
+
17
+ Only return the helpful answer below and nothing else.
18
+ Helpful answer:
19
+ """
20
+
21
+ def set_custom_prompt():
22
+ """
23
+ Prompt template for QA retrieval for each vectorstore
24
+ """
25
+ prompt = PromptTemplate(template=custom_prompt_template,
26
+ input_variables=['context', 'question'])
27
+ return prompt
28
+
29
+ #Retrieval QA Chain
30
+ def retrieval_qa_chain(llm, prompt, db):
31
+ qa_chain = RetrievalQA.from_chain_type(llm=llm,
32
+ chain_type='stuff',
33
+ retriever=db.as_retriever(search_kwargs={'k': 2}),
34
+ return_source_documents=True,
35
+ chain_type_kwargs={'prompt': prompt}
36
+ )
37
+ return qa_chain
38
+
39
+ #Loading the model
40
+ def load_llm():
41
+ # Load the locally downloaded model here
42
+ llm = CTransformers(
43
+ model = "TheBloke/Llama-2-7B-Chat-GGML",
44
+ model_type="llama",
45
+ max_new_tokens = 512,
46
+ temperature = 0.5
47
+ )
48
+ return llm
49
+
50
+ #QA Model Function
51
+ def qa_bot():
52
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
53
+ model_kwargs={'device': 'cpu'})
54
+ db = FAISS.load_local(DB_FAISS_PATH, embeddings)
55
+ llm = load_llm()
56
+ qa_prompt = set_custom_prompt()
57
+ qa = retrieval_qa_chain(llm, qa_prompt, db)
58
+
59
+ return qa
60
+
61
+ #output function
62
+ def final_result(query):
63
+ qa_result = qa_bot()
64
+ response = qa_result({'query': query})
65
+ return response
66
+
67
+ #chainlit code
68
+ @cl.on_chat_start
69
+ async def start():
70
+ chain = qa_bot()
71
+ msg = cl.Message(content="Starting the bot...")
72
+ await msg.send()
73
+ msg.content = "Hi, Welcome to Medical Bot. What is your query?"
74
+ await msg.update()
75
+
76
+ cl.user_session.set("chain", chain)
77
+
78
+ @cl.on_message
79
+ async def main(message: cl.Message):
80
+ chain = cl.user_session.get("chain")
81
+ cb = cl.AsyncLangchainCallbackHandler(
82
+ stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
83
+ )
84
+ cb.answer_reached = True
85
+ res = await chain.acall(message.content, callbacks=[cb])
86
+ answer = res["result"]
87
+ sources = res["source_documents"]
88
+
89
+ if sources:
90
+ answer += f"\nSources:" + str(sources)
91
+ else:
92
+ answer += "\nNo sources found"
93
+
94
+ await cl.Message(content=answer).send()
95
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pypdf
2
+ langchain
3
+ torch
4
+ accelerate
5
+ bitsandbytes
6
+ ctransformers
7
+ sentence_transformers
8
+ faiss_cpu
9
+ chainlit
10
+ huggingface_hub
11
+ langchain_community
vectorstore/db_faiss/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c219be0c422137d6354fdf0db6f2a2fe719ba536215b2dcba2366723f00b6e9
3
+ size 10983981
vectorstore/db_faiss/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d75f6e95d75f5bad95668fcd18f2daffb0d562d33784e6228e5c0f785605ee0c
3
+ size 3567746