Spaces:

shahabkahn
/

Medical-Assistant

Sleeping

App Files Files Community

shahabkahn commited on May 19, 2024

Commit

318f2bb

verified ·

1 Parent(s): 70340d3

Upload 11 files

Browse files

Files changed (12) hide show

.dockerignore +34 -0
.gitattributes +2 -0
2.jpg +0 -0
Data/Notes for MRCP.pdf +3 -0
Dockerfile +67 -0
app.py +142 -0
compose.yaml +49 -0
db.py +27 -0
frontend.py +70 -0
requirements.txt +16 -0
vectorstore/db_faiss/index.faiss +3 -0
vectorstore/db_faiss/index.pkl +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,34 @@

+# Include any files or directories that you don't want to be copied to your
+# container here (e.g., local build artifacts, temporary files, etc.).
+#
+# For more help, visit the .dockerignore file reference guide at
+# https://docs.docker.com/go/build-context-dockerignore/
+**/.DS_Store
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose.y*ml
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Data/Notes[[:space:]]for[[:space:]]MRCP.pdf filter=lfs diff=lfs merge=lfs -text
+vectorstore/db_faiss/index.faiss filter=lfs diff=lfs merge=lfs -text

2.jpg ADDED Viewed

Data/Notes for MRCP.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12fc84682467687c558b308794171eeed335c7c8dee8c8da983256c18b7ce6c1
+size 42930908

Dockerfile ADDED Viewed

	@@ -0,0 +1,67 @@

+# syntax=docker/dockerfile:1
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/go/dockerfile-reference/
+# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
+ARG PYTHON_VERSION=3.12.3
+FROM python:${PYTHON_VERSION}-slim as base
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/nonexistent" \
+    --shell "/sbin/nologin" \
+    --no-create-home \
+    --uid "${UID}" \
+    appuser
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    python -m pip install -r requirements.txt
+# Create a directory named 'data' and assign its ownership to appuser
+RUN mkdir -p /data
+RUN chown appuser /data
+# Switch to the non-privileged user to run the application.
+USER appuser
+# Set the TRANSFORMERS_CACHE environment variable
+ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
+# Create the cache folder with appropriate permissions
+RUN mkdir -p $TRANSFORMERS_CACHE && chmod -R 777 $TRANSFORMERS_CACHE
+# Copy the source code into the container.
+COPY . .
+# Expose the port that the application listens on.
+EXPOSE 7860
+EXPOSE 8501
+# Run the application.
+# Run the application.
+CMD ["bash", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860 & streamlit run BrainBot.py --server.port 8501 --server.enableXsrfProtection false"]

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from langchain.chains import RetrievalQA
+from langchain_community.llms import CTransformers
+from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import re
+import uvicorn
+import logging
+app = FastAPI()
+# CORS configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load embeddings and vector database
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
+try:
+    db = FAISS.load_local("vectorstore/db_faiss", embeddings, allow_dangerous_deserialization=True)
+    logger.info("Vector database loaded successfully!")
+except Exception as e:
+    logger.error(f"Failed to load vector database: {e}")
+    raise e
+# Load LLM
+try:
+    llm = CTransformers(
+        model="llama-2-7b-chat.ggmlv3.q4_0.bin",
+        model_type="llama",
+        max_new_tokens=128,
+        temperature=0.5,
+    )
+    logger.info("LLM model loaded successfully!")
+except Exception as e:
+    logger.error(f"Failed to load LLM model: {e}")
+    raise e
+# Define custom prompt template
+custom_prompt_template = """Use the following pieces of information to answer the user's question.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+Context: {context}
+Question: {question}
+Only return the helpful answer below and nothing else.
+Helpful answer:
+"""
+qa_prompt = PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
+# Set up RetrievalQA chain
+qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever=db.as_retriever(search_kwargs={"k": 2}),
+    return_source_documents=True,
+    chain_type_kwargs={"prompt": qa_prompt},
+)
+class QuestionRequest(BaseModel):
+    question: str
+class AnswerResponse(BaseModel):
+    answer: str
+def clean_answer(answer):
+    # Remove unnecessary characters and symbols
+    cleaned_answer = re.sub(r'[^\w\s.,-]', '', answer)
+    # Remove repetitive phrases by identifying repeated words or sequences
+    cleaned_answer = re.sub(r'\b(\w+)( \1\b)+', r'\1', cleaned_answer)
+    # Remove any trailing or leading spaces
+    cleaned_answer = cleaned_answer.strip()
+    # Replace multiple spaces with a single space
+    cleaned_answer = re.sub(r'\s+', ' ', cleaned_answer)
+    # Replace \n with newline character in markdown
+    cleaned_answer = re.sub(r'\\n', '\n', cleaned_answer)
+    # Check for bullet points and replace with markdown syntax
+    cleaned_answer = re.sub(r'^\s*-\s+(.*)$', r'* \1', cleaned_answer, flags=re.MULTILINE)
+    # Check for numbered lists and replace with markdown syntax
+    cleaned_answer = re.sub(r'^\s*\d+\.\s+(.*)$', r'1. \1', cleaned_answer, flags=re.MULTILINE)
+    # Check for headings and replace with markdown syntax
+    cleaned_answer = re.sub(r'^\s*(#+)\s+(.*)$', r'\1 \2', cleaned_answer, flags=re.MULTILINE)
+    return cleaned_answer
+def format_sources(sources):
+    formatted_sources = []
+    for source in sources:
+        metadata = source.metadata
+        page = metadata.get('page', 'Unknown page')
+        source_str = f"{metadata.get('source', 'Unknown source')}, page {page}"
+        formatted_sources.append(source_str)
+    return "\n".join(formatted_sources)
+@app.post("/query", response_model=AnswerResponse)
+async def query(question_request: QuestionRequest):
+    try:
+        question = question_request.question
+        if not question:
+            raise HTTPException(status_code=400, detail="Question is required")
+        result = qa_chain({"query": question})
+        answer = result.get("result")
+        sources = result.get("source_documents")
+        if sources:
+            formatted_sources = format_sources(sources)
+            answer += "\nSources:\n" + formatted_sources
+        else:
+            answer += "\nNo sources found"
+        # Clean up the answer
+        cleaned_answer = clean_answer(answer)
+        # Return cleaned_answer wrapped in a dictionary
+        return {"answer": cleaned_answer}
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        raise HTTPException(status_code=500, detail="Internal Server Error")
+if __name__ == '__main__':
+    uvicorn.run(app, host='0.0.0.0', port=8000)

compose.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker Compose reference guide at
+# https://docs.docker.com/go/compose-spec-reference/
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+services:
+  server:
+    build:
+      context: .
+    ports:
+      - 8000:8000
+# The commented out section below is an example of how to define a PostgreSQL
+# database that your application can use. `depends_on` tells Docker Compose to
+# start the database before your application. The `db-data` volume persists the
+# database data between container restarts. The `db-password` secret is used
+# to set the database password. You must create `db/password.txt` and add
+# a password of your choosing to it before running `docker compose up`.
+#     depends_on:
+#       db:
+#         condition: service_healthy
+#   db:
+#     image: postgres
+#     restart: always
+#     user: postgres
+#     secrets:
+#       - db-password
+#     volumes:
+#       - db-data:/var/lib/postgresql/data
+#     environment:
+#       - POSTGRES_DB=example
+#       - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
+#     expose:
+#       - 5432
+#     healthcheck:
+#       test: [ "CMD", "pg_isready" ]
+#       interval: 10s
+#       timeout: 5s
+#       retries: 5
+# volumes:
+#   db-data:
+# secrets:
+#   db-password:
+#     file: db/password.txt

db.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# ingest.py
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+DATA_PATH = "data/"
+DB_FAISS_PATH = "vectorstore/db_faiss"
+def create_vector_db():
+    loader = DirectoryLoader(
+        DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader
+    )
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    texts = text_splitter.split_documents(documents)
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
+    )
+    db = FAISS.from_documents(texts, embeddings)
+    db.save_local(DB_FAISS_PATH)
+if __name__ == "__main__":
+    create_vector_db()

frontend.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import requests
+import streamlit as st
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# Define the URL of your FastAPI endpoint
+url = "http://localhost:8000/query"
+# Initialize the session state
+st.session_state.setdefault("chat_history", [])
+# Function to handle the new chat button click
+def new_chat():
+    st.session_state.chat_history = []  # Clear the chat history
+# Streamlit app
+def app():
+    st.title("Doctor's Medical Assistant")
+    st.sidebar.button("New Chat", on_click=new_chat)
+    st.image("2.jpg", width=300)
+    # Display Welcome message
+    st.write("<span style='font-size:20px; font-weight:bold;'>Welcome! How Can I Help You</span>",
+             unsafe_allow_html=True)
+    # Placeholder text for the input box
+    input_placeholder = st.empty()
+    input_text = input_placeholder.text_input("", key="user_input", help="Type your question here...")
+    # JavaScript to handle the placeholder behavior
+    placeholder_script = f"""
+    <script>
+        const inputElement = document.querySelector('input[data-baseweb="input"]');
+        inputElement.placeholder = "Enter your question";
+    </script>
+    """
+    st.markdown(placeholder_script, unsafe_allow_html=True)
+    # Handle form submission
+    submit_button = st.button("➡️")
+    if submit_button:
+        user_input = input_text.strip()
+        if user_input:
+            # Create the request payload
+            payload = {"question": user_input}
+            try:
+                # Send the POST request to the FastAPI endpoint
+                response = requests.post(url, json=payload)
+                # Check if the request was successful
+                if response.ok:
+                    # Get the answer from the FastAPI endpoint
+                    answer = response.json().get("answer")
+                    st.session_state.chat_history.append({"role": "user", "content": user_input})
+                    st.session_state.chat_history.append({"role": "assistant", "content": answer})
+                else:
+                    st.error(f"Error: {response.status_code} {response.text}")
+            except requests.RequestException as e:
+                st.error(f"Error: {e}")
+    # Display chat history
+    for chat in st.session_state.chat_history:
+        if chat["role"] == "user":
+            st.write(f"**You:** {chat['content']}")
+        else:
+            st.write(f"**Assistant:** {chat['content']}")
+if __name__ == "__main__":
+    app()

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+langchain
+torch
+accelerate
+transformers
+sentence_transformers
+streamlit
+streamlit_chat
+faiss-cpu
+huggingface-hub
+fastapi
+python-dotenv
+requests
+validators
+uvicorn
+pypdf
+ctransformers

vectorstore/db_faiss/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ac2e8b1d4030e8af6d0ed42d9e3d8c8d96961b24f046ff637cdfe51c3c0b282
+size 14865453

vectorstore/db_faiss/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:945bc1c891119b0df8e58c4ae9c1e60a22fbf7baa9beeeb190ff3f9b519ad394
+size 5250471