Spaces:

Mdean77
/

Informed_Consent

Runtime error

App Files Files Community

Mdean77 commited on Jan 6

Commit

2edf2fb

1 Parent(s): 004e22c

Have refactored dockerfile to start with qdrant image and add the other stuff.

Browse files

Files changed (4) hide show

Dockerfile +18 -20
app.py +47 -6
entrypoint.sh +17 -0
getVectorstore.py +6 -5

Dockerfile CHANGED Viewed

@@ -1,34 +1,32 @@
-# Dockerfile for the Clinical Trial Project
-# December 31, 2024
-# Happy New Year!
-# Get a distribution that has uv already installed
-FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
-# Add user - this is the user that will run the app
-# If you do not set user, the app will run as root (undesirable)
-RUN useradd -m -u 1000 user
-USER user
-# Set the home directory and path
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
-# NEEDED FOR CHAINLIT IN HUGGING FACE SPACES
 ENV UVICORN_WS_PROTOCOL=websockets
-# Set the working directory
 WORKDIR $HOME/app
-# Copy the app to the container
 COPY --chown=user . $HOME/app
-# Install the dependencies
 RUN uv sync --frozen
-# RUN uv sync
-# Expose the port
-EXPOSE 7860
-# Run the app
-CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

+# Trying to put qdrant into same container as chainlit application
+FROM qdrant/qdrant:latest
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# # Set te home directory and path
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
+ENV PATH="/root/.local/bin:$PATH"
+# # NEEDED FOR CHAINLIT IN HUGGING FACE SPACES
 ENV UVICORN_WS_PROTOCOL=websockets
+# # Set the working directory
 WORKDIR $HOME/app
+# # Copy the app to the container
 COPY --chown=user . $HOME/app
+RUN chmod +x entrypoint.sh
+# # Install the dependencies
 RUN uv sync --frozen
+# # RUN uv sync
+# # Expose the port
+EXPOSE 7860 6333
+ENTRYPOINT ["./entrypoint.sh"]

app.py CHANGED Viewed

@@ -9,12 +9,20 @@ import pymupdf
 import tiktoken
 from langchain_core.documents.base import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-# def tiktoken_len(text):
-#     tokens = tiktoken.encoding_for_model("gpt-4o").encode(
-#         text,
-#     )
-#     return len(tokens)
 @cl.on_chat_start
 async def on_chat_start():
@@ -92,4 +100,37 @@ async def on_chat_start():
         """
     )
-    await msg.send()

 import tiktoken
 from langchain_core.documents.base import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+import getVectorstore
+from getVectorstore import getVectorstore
+from qdrant_client.http import models as rest
+from langchain.prompts import ChatPromptTemplate
+import prompts
+from prompts import rag_prompt_template
+from defaults import default_llm
+from operator import itemgetter
+from langchain.schema.output_parser import StrOutputParser
 @cl.on_chat_start
 async def on_chat_start():
         """
     )
+    await msg.send()
+    qdrant_vectorstore = getVectorstore(document, file.path)
+    document_titles = ["protocol.pdf", "consent.pdf"]
+    # protocol_retriever = qdrant_vectorstore.as_retriever()
+    # protocol_retriever = create_protocol_retriever(document_titles)
+    protocol_retriever = qdrant_vectorstore.as_retriever(
+        search_kwargs={
+            'filter': rest.Filter(
+                must=[
+                    rest.FieldCondition(
+                        key="metadata.document_title",
+                        match=rest.MatchAny(any=document_titles)
+                    )
+                ]
+            ),
+            'k':15,
+        }
+    )
+    # Create prompt
+    rag_prompt = ChatPromptTemplate.from_template(prompts.rag_prompt_template)
+    llm = default_llm
+    rag_chain = (
+        {"context": itemgetter("question") | protocol_retriever, "question": itemgetter("question")}
+        | rag_prompt | llm | StrOutputParser()
+    )

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+# Start Qdrant in the background
+echo "Starting Qdrant server..."
+/qdrant/qdrant  &
+# Wait for Qdrant to be ready
+# echo "Waiting for Qdrant to be ready..."
+# until curl -s http://localhost:6333/health | grep '"status":"ok"'; do
+#   sleep 1
+# done
+echo "Qdrant is ready!"
+# Start Chainlit application using Uvicorn
+echo "Starting Chainlit application..."
+uv run chainlit run app.py --host 0.0.0.0 --port 7860

getVectorstore.py CHANGED Viewed

@@ -33,8 +33,8 @@ def getVectorstore(document, file_path):
     for doc in document:
         doc.metadata['document_title'] = file_path.split('/')[-1]
-    # client = QdrantClient(url=qdrant_url)
-    client = QdrantClient(":memory:")
     # If the collection exists, then we need to check to see if our document is already
     # present, in which case we would not want to store it again.
     if client.collection_exists("protocol_collection"):
@@ -42,8 +42,8 @@ def getVectorstore(document, file_path):
         qdrant_vectorstore = QdrantVectorStore.from_existing_collection(
             embedding=embedding_model,
             collection_name="protocol_collection",
-            # url=qdrant_url
-            location = ":memory:"
         )
         # Check for existing documents and only add new ones
@@ -83,6 +83,7 @@ def getVectorstore(document, file_path):
             documents=document,
             embedding=embedding_model,
             collection_name="protocol_collection",
-            location = ":memory:"
         )
     return qdrant_vectorstore

     for doc in document:
         doc.metadata['document_title'] = file_path.split('/')[-1]
+    client = QdrantClient( url=qdrant_url)
+    # client = QdrantClient(":memory:")
     # If the collection exists, then we need to check to see if our document is already
     # present, in which case we would not want to store it again.
     if client.collection_exists("protocol_collection"):
         qdrant_vectorstore = QdrantVectorStore.from_existing_collection(
             embedding=embedding_model,
             collection_name="protocol_collection",
+            url=qdrant_url
+            # location = ":memory:"
         )
         # Check for existing documents and only add new ones
             documents=document,
             embedding=embedding_model,
             collection_name="protocol_collection",
+            # location = ":memory:"
+            url=qdrant_url
         )
     return qdrant_vectorstore