shahabkahn commited on
Commit
318f2bb
·
verified ·
1 Parent(s): 70340d3

Upload 11 files

Browse files
.dockerignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Include any files or directories that you don't want to be copied to your
2
+ # container here (e.g., local build artifacts, temporary files, etc.).
3
+ #
4
+ # For more help, visit the .dockerignore file reference guide at
5
+ # https://docs.docker.com/go/build-context-dockerignore/
6
+
7
+ **/.DS_Store
8
+ **/__pycache__
9
+ **/.venv
10
+ **/.classpath
11
+ **/.dockerignore
12
+ **/.env
13
+ **/.git
14
+ **/.gitignore
15
+ **/.project
16
+ **/.settings
17
+ **/.toolstarget
18
+ **/.vs
19
+ **/.vscode
20
+ **/*.*proj.user
21
+ **/*.dbmdl
22
+ **/*.jfm
23
+ **/bin
24
+ **/charts
25
+ **/docker-compose*
26
+ **/compose.y*ml
27
+ **/Dockerfile*
28
+ **/node_modules
29
+ **/npm-debug.log
30
+ **/obj
31
+ **/secrets.dev.yaml
32
+ **/values.dev.yaml
33
+ LICENSE
34
+ README.md
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Data/Notes[[:space:]]for[[:space:]]MRCP.pdf filter=lfs diff=lfs merge=lfs -text
37
+ vectorstore/db_faiss/index.faiss filter=lfs diff=lfs merge=lfs -text
2.jpg ADDED
Data/Notes for MRCP.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12fc84682467687c558b308794171eeed335c7c8dee8c8da983256c18b7ce6c1
3
+ size 42930908
Dockerfile ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+
3
+ # Comments are provided throughout this file to help you get started.
4
+ # If you need more help, visit the Dockerfile reference guide at
5
+ # https://docs.docker.com/go/dockerfile-reference/
6
+
7
+ # Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
8
+
9
+ ARG PYTHON_VERSION=3.12.3
10
+ FROM python:${PYTHON_VERSION}-slim as base
11
+
12
+ # Prevents Python from writing pyc files.
13
+ ENV PYTHONDONTWRITEBYTECODE=1
14
+
15
+ # Keeps Python from buffering stdout and stderr to avoid situations where
16
+ # the application crashes without emitting any logs due to buffering.
17
+ ENV PYTHONUNBUFFERED=1
18
+
19
+ WORKDIR /app
20
+
21
+ # Create a non-privileged user that the app will run under.
22
+ # See https://docs.docker.com/go/dockerfile-user-best-practices/
23
+ ARG UID=10001
24
+ RUN adduser \
25
+ --disabled-password \
26
+ --gecos "" \
27
+ --home "/nonexistent" \
28
+ --shell "/sbin/nologin" \
29
+ --no-create-home \
30
+ --uid "${UID}" \
31
+ appuser
32
+
33
+ # Download dependencies as a separate step to take advantage of Docker's caching.
34
+ # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
35
+ # Leverage a bind mount to requirements.txt to avoid having to copy them into
36
+ # into this layer.
37
+ RUN --mount=type=cache,target=/root/.cache/pip \
38
+ --mount=type=bind,source=requirements.txt,target=requirements.txt \
39
+ python -m pip install -r requirements.txt
40
+
41
+
42
+ # Create a directory named 'data' and assign its ownership to appuser
43
+ RUN mkdir -p /data
44
+ RUN chown appuser /data
45
+
46
+
47
+ # Switch to the non-privileged user to run the application.
48
+ USER appuser
49
+
50
+ # Set the TRANSFORMERS_CACHE environment variable
51
+ ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
52
+
53
+ # Create the cache folder with appropriate permissions
54
+ RUN mkdir -p $TRANSFORMERS_CACHE && chmod -R 777 $TRANSFORMERS_CACHE
55
+
56
+
57
+ # Copy the source code into the container.
58
+ COPY . .
59
+
60
+ # Expose the port that the application listens on.
61
+ EXPOSE 7860
62
+ EXPOSE 8501
63
+
64
+ # Run the application.
65
+ # Run the application.
66
+ CMD ["bash", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860 & streamlit run BrainBot.py --server.port 8501 --server.enableXsrfProtection false"]
67
+
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_community.llms import CTransformers
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
+ import re
10
+ import uvicorn
11
+ import logging
12
+
13
+ app = FastAPI()
14
+
15
+ # CORS configuration
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["*"],
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ # Set up logging
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Load embeddings and vector database
29
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
30
+ try:
31
+ db = FAISS.load_local("vectorstore/db_faiss", embeddings, allow_dangerous_deserialization=True)
32
+ logger.info("Vector database loaded successfully!")
33
+ except Exception as e:
34
+ logger.error(f"Failed to load vector database: {e}")
35
+ raise e
36
+
37
+ # Load LLM
38
+ try:
39
+ llm = CTransformers(
40
+ model="llama-2-7b-chat.ggmlv3.q4_0.bin",
41
+ model_type="llama",
42
+ max_new_tokens=128,
43
+ temperature=0.5,
44
+ )
45
+ logger.info("LLM model loaded successfully!")
46
+ except Exception as e:
47
+ logger.error(f"Failed to load LLM model: {e}")
48
+ raise e
49
+
50
+ # Define custom prompt template
51
+ custom_prompt_template = """Use the following pieces of information to answer the user's question.
52
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
53
+
54
+ Context: {context}
55
+ Question: {question}
56
+
57
+ Only return the helpful answer below and nothing else.
58
+ Helpful answer:
59
+ """
60
+ qa_prompt = PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
61
+
62
+ # Set up RetrievalQA chain
63
+ qa_chain = RetrievalQA.from_chain_type(
64
+ llm=llm,
65
+ chain_type="stuff",
66
+ retriever=db.as_retriever(search_kwargs={"k": 2}),
67
+ return_source_documents=True,
68
+ chain_type_kwargs={"prompt": qa_prompt},
69
+ )
70
+
71
+ class QuestionRequest(BaseModel):
72
+ question: str
73
+
74
+ class AnswerResponse(BaseModel):
75
+ answer: str
76
+
77
+ def clean_answer(answer):
78
+ # Remove unnecessary characters and symbols
79
+ cleaned_answer = re.sub(r'[^\w\s.,-]', '', answer)
80
+
81
+ # Remove repetitive phrases by identifying repeated words or sequences
82
+ cleaned_answer = re.sub(r'\b(\w+)( \1\b)+', r'\1', cleaned_answer)
83
+
84
+ # Remove any trailing or leading spaces
85
+ cleaned_answer = cleaned_answer.strip()
86
+
87
+ # Replace multiple spaces with a single space
88
+ cleaned_answer = re.sub(r'\s+', ' ', cleaned_answer)
89
+
90
+ # Replace \n with newline character in markdown
91
+ cleaned_answer = re.sub(r'\\n', '\n', cleaned_answer)
92
+
93
+ # Check for bullet points and replace with markdown syntax
94
+ cleaned_answer = re.sub(r'^\s*-\s+(.*)$', r'* \1', cleaned_answer, flags=re.MULTILINE)
95
+
96
+ # Check for numbered lists and replace with markdown syntax
97
+ cleaned_answer = re.sub(r'^\s*\d+\.\s+(.*)$', r'1. \1', cleaned_answer, flags=re.MULTILINE)
98
+
99
+ # Check for headings and replace with markdown syntax
100
+ cleaned_answer = re.sub(r'^\s*(#+)\s+(.*)$', r'\1 \2', cleaned_answer, flags=re.MULTILINE)
101
+
102
+ return cleaned_answer
103
+
104
+ def format_sources(sources):
105
+ formatted_sources = []
106
+ for source in sources:
107
+ metadata = source.metadata
108
+ page = metadata.get('page', 'Unknown page')
109
+ source_str = f"{metadata.get('source', 'Unknown source')}, page {page}"
110
+ formatted_sources.append(source_str)
111
+ return "\n".join(formatted_sources)
112
+
113
+ @app.post("/query", response_model=AnswerResponse)
114
+ async def query(question_request: QuestionRequest):
115
+ try:
116
+ question = question_request.question
117
+ if not question:
118
+ raise HTTPException(status_code=400, detail="Question is required")
119
+
120
+ result = qa_chain({"query": question})
121
+ answer = result.get("result")
122
+ sources = result.get("source_documents")
123
+
124
+ if sources:
125
+ formatted_sources = format_sources(sources)
126
+ answer += "\nSources:\n" + formatted_sources
127
+ else:
128
+ answer += "\nNo sources found"
129
+
130
+ # Clean up the answer
131
+ cleaned_answer = clean_answer(answer)
132
+
133
+ # Return cleaned_answer wrapped in a dictionary
134
+ return {"answer": cleaned_answer}
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error processing query: {e}")
138
+ raise HTTPException(status_code=500, detail="Internal Server Error")
139
+
140
+
141
+ if __name__ == '__main__':
142
+ uvicorn.run(app, host='0.0.0.0', port=8000)
compose.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Comments are provided throughout this file to help you get started.
2
+ # If you need more help, visit the Docker Compose reference guide at
3
+ # https://docs.docker.com/go/compose-spec-reference/
4
+
5
+ # Here the instructions define your application as a service called "server".
6
+ # This service is built from the Dockerfile in the current directory.
7
+ # You can add other services your application may depend on here, such as a
8
+ # database or a cache. For examples, see the Awesome Compose repository:
9
+ # https://github.com/docker/awesome-compose
10
+ services:
11
+ server:
12
+ build:
13
+ context: .
14
+ ports:
15
+ - 8000:8000
16
+
17
+ # The commented out section below is an example of how to define a PostgreSQL
18
+ # database that your application can use. `depends_on` tells Docker Compose to
19
+ # start the database before your application. The `db-data` volume persists the
20
+ # database data between container restarts. The `db-password` secret is used
21
+ # to set the database password. You must create `db/password.txt` and add
22
+ # a password of your choosing to it before running `docker compose up`.
23
+ # depends_on:
24
+ # db:
25
+ # condition: service_healthy
26
+ # db:
27
+ # image: postgres
28
+ # restart: always
29
+ # user: postgres
30
+ # secrets:
31
+ # - db-password
32
+ # volumes:
33
+ # - db-data:/var/lib/postgresql/data
34
+ # environment:
35
+ # - POSTGRES_DB=example
36
+ # - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
37
+ # expose:
38
+ # - 5432
39
+ # healthcheck:
40
+ # test: [ "CMD", "pg_isready" ]
41
+ # interval: 10s
42
+ # timeout: 5s
43
+ # retries: 5
44
+ # volumes:
45
+ # db-data:
46
+ # secrets:
47
+ # db-password:
48
+ # file: db/password.txt
49
+
db.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ingest.py
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+
7
+ DATA_PATH = "data/"
8
+ DB_FAISS_PATH = "vectorstore/db_faiss"
9
+
10
+ def create_vector_db():
11
+ loader = DirectoryLoader(
12
+ DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader
13
+ )
14
+
15
+ documents = loader.load()
16
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
17
+ texts = text_splitter.split_documents(documents)
18
+
19
+ embeddings = HuggingFaceEmbeddings(
20
+ model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
21
+ )
22
+
23
+ db = FAISS.from_documents(texts, embeddings)
24
+ db.save_local(DB_FAISS_PATH)
25
+
26
+ if __name__ == "__main__":
27
+ create_vector_db()
frontend.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ # Define the URL of your FastAPI endpoint
9
+ url = "http://localhost:8000/query"
10
+
11
+ # Initialize the session state
12
+ st.session_state.setdefault("chat_history", [])
13
+
14
+ # Function to handle the new chat button click
15
+ def new_chat():
16
+ st.session_state.chat_history = [] # Clear the chat history
17
+
18
+ # Streamlit app
19
+ def app():
20
+ st.title("Doctor's Medical Assistant")
21
+ st.sidebar.button("New Chat", on_click=new_chat)
22
+ st.image("2.jpg", width=300)
23
+
24
+ # Display Welcome message
25
+ st.write("<span style='font-size:20px; font-weight:bold;'>Welcome! How Can I Help You</span>",
26
+ unsafe_allow_html=True)
27
+
28
+ # Placeholder text for the input box
29
+ input_placeholder = st.empty()
30
+ input_text = input_placeholder.text_input("", key="user_input", help="Type your question here...")
31
+
32
+ # JavaScript to handle the placeholder behavior
33
+ placeholder_script = f"""
34
+ <script>
35
+ const inputElement = document.querySelector('input[data-baseweb="input"]');
36
+ inputElement.placeholder = "Enter your question";
37
+ </script>
38
+ """
39
+ st.markdown(placeholder_script, unsafe_allow_html=True)
40
+
41
+ # Handle form submission
42
+ submit_button = st.button("➡️")
43
+ if submit_button:
44
+ user_input = input_text.strip()
45
+ if user_input:
46
+ # Create the request payload
47
+ payload = {"question": user_input}
48
+ try:
49
+ # Send the POST request to the FastAPI endpoint
50
+ response = requests.post(url, json=payload)
51
+ # Check if the request was successful
52
+ if response.ok:
53
+ # Get the answer from the FastAPI endpoint
54
+ answer = response.json().get("answer")
55
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
56
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
57
+ else:
58
+ st.error(f"Error: {response.status_code} {response.text}")
59
+ except requests.RequestException as e:
60
+ st.error(f"Error: {e}")
61
+
62
+ # Display chat history
63
+ for chat in st.session_state.chat_history:
64
+ if chat["role"] == "user":
65
+ st.write(f"**You:** {chat['content']}")
66
+ else:
67
+ st.write(f"**Assistant:** {chat['content']}")
68
+
69
+ if __name__ == "__main__":
70
+ app()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ torch
3
+ accelerate
4
+ transformers
5
+ sentence_transformers
6
+ streamlit
7
+ streamlit_chat
8
+ faiss-cpu
9
+ huggingface-hub
10
+ fastapi
11
+ python-dotenv
12
+ requests
13
+ validators
14
+ uvicorn
15
+ pypdf
16
+ ctransformers
vectorstore/db_faiss/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac2e8b1d4030e8af6d0ed42d9e3d8c8d96961b24f046ff637cdfe51c3c0b282
3
+ size 14865453
vectorstore/db_faiss/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945bc1c891119b0df8e58c4ae9c1e60a22fbf7baa9beeeb190ff3f9b519ad394
3
+ size 5250471