Mdean77 commited on
Commit
2edf2fb
·
1 Parent(s): 004e22c

Have refactored dockerfile to start with qdrant image and add the other stuff.

Browse files
Files changed (4) hide show
  1. Dockerfile +18 -20
  2. app.py +47 -6
  3. entrypoint.sh +17 -0
  4. getVectorstore.py +6 -5
Dockerfile CHANGED
@@ -1,34 +1,32 @@
1
- # Dockerfile for the Clinical Trial Project
2
- # December 31, 2024
3
- # Happy New Year!
4
 
5
- # Get a distribution that has uv already installed
6
- FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
 
 
7
 
8
- # Add user - this is the user that will run the app
9
- # If you do not set user, the app will run as root (undesirable)
10
- RUN useradd -m -u 1000 user
11
- USER user
12
 
13
- # Set the home directory and path
14
  ENV HOME=/home/user \
15
  PATH=/home/user/.local/bin:$PATH
16
 
17
- # NEEDED FOR CHAINLIT IN HUGGING FACE SPACES
 
 
18
  ENV UVICORN_WS_PROTOCOL=websockets
19
 
20
- # Set the working directory
21
  WORKDIR $HOME/app
22
 
23
- # Copy the app to the container
24
  COPY --chown=user . $HOME/app
25
-
26
- # Install the dependencies
27
  RUN uv sync --frozen
28
- # RUN uv sync
29
 
30
- # Expose the port
31
- EXPOSE 7860
 
32
 
33
- # Run the app
34
- CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Trying to put qdrant into same container as chainlit application
 
 
2
 
3
+ FROM qdrant/qdrant:latest
4
+ RUN apt-get update && apt-get install -y \
5
+ curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
 
8
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 
 
 
9
 
10
+ # # Set te home directory and path
11
  ENV HOME=/home/user \
12
  PATH=/home/user/.local/bin:$PATH
13
 
14
+ ENV PATH="/root/.local/bin:$PATH"
15
+
16
+ # # NEEDED FOR CHAINLIT IN HUGGING FACE SPACES
17
  ENV UVICORN_WS_PROTOCOL=websockets
18
 
19
+ # # Set the working directory
20
  WORKDIR $HOME/app
21
 
22
+ # # Copy the app to the container
23
  COPY --chown=user . $HOME/app
24
+ RUN chmod +x entrypoint.sh
25
+ # # Install the dependencies
26
  RUN uv sync --frozen
27
+ # # RUN uv sync
28
 
29
+ # # Expose the port
30
+ EXPOSE 7860 6333
31
+ ENTRYPOINT ["./entrypoint.sh"]
32
 
 
 
app.py CHANGED
@@ -9,12 +9,20 @@ import pymupdf
9
  import tiktoken
10
  from langchain_core.documents.base import Document
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # def tiktoken_len(text):
14
- # tokens = tiktoken.encoding_for_model("gpt-4o").encode(
15
- # text,
16
- # )
17
- # return len(tokens)
18
 
19
  @cl.on_chat_start
20
  async def on_chat_start():
@@ -92,4 +100,37 @@ async def on_chat_start():
92
  """
93
  )
94
 
95
- await msg.send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import tiktoken
10
  from langchain_core.documents.base import Document
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ import getVectorstore
13
+ from getVectorstore import getVectorstore
14
+ from qdrant_client.http import models as rest
15
+ from langchain.prompts import ChatPromptTemplate
16
+ import prompts
17
+ from prompts import rag_prompt_template
18
+ from defaults import default_llm
19
+ from operator import itemgetter
20
+ from langchain.schema.output_parser import StrOutputParser
21
+
22
+
23
+
24
+
25
 
 
 
 
 
 
26
 
27
  @cl.on_chat_start
28
  async def on_chat_start():
 
100
  """
101
  )
102
 
103
+ await msg.send()
104
+
105
+
106
+ qdrant_vectorstore = getVectorstore(document, file.path)
107
+
108
+ document_titles = ["protocol.pdf", "consent.pdf"]
109
+
110
+ # protocol_retriever = qdrant_vectorstore.as_retriever()
111
+
112
+ # protocol_retriever = create_protocol_retriever(document_titles)
113
+ protocol_retriever = qdrant_vectorstore.as_retriever(
114
+ search_kwargs={
115
+ 'filter': rest.Filter(
116
+ must=[
117
+ rest.FieldCondition(
118
+ key="metadata.document_title",
119
+ match=rest.MatchAny(any=document_titles)
120
+ )
121
+ ]
122
+ ),
123
+ 'k':15,
124
+ }
125
+ )
126
+
127
+
128
+ # Create prompt
129
+ rag_prompt = ChatPromptTemplate.from_template(prompts.rag_prompt_template)
130
+
131
+ llm = default_llm
132
+
133
+ rag_chain = (
134
+ {"context": itemgetter("question") | protocol_retriever, "question": itemgetter("question")}
135
+ | rag_prompt | llm | StrOutputParser()
136
+ )
entrypoint.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start Qdrant in the background
4
+ echo "Starting Qdrant server..."
5
+ /qdrant/qdrant &
6
+
7
+ # Wait for Qdrant to be ready
8
+ # echo "Waiting for Qdrant to be ready..."
9
+ # until curl -s http://localhost:6333/health | grep '"status":"ok"'; do
10
+ # sleep 1
11
+ # done
12
+ echo "Qdrant is ready!"
13
+
14
+ # Start Chainlit application using Uvicorn
15
+ echo "Starting Chainlit application..."
16
+ uv run chainlit run app.py --host 0.0.0.0 --port 7860
17
+
getVectorstore.py CHANGED
@@ -33,8 +33,8 @@ def getVectorstore(document, file_path):
33
  for doc in document:
34
  doc.metadata['document_title'] = file_path.split('/')[-1]
35
 
36
- # client = QdrantClient(url=qdrant_url)
37
- client = QdrantClient(":memory:")
38
  # If the collection exists, then we need to check to see if our document is already
39
  # present, in which case we would not want to store it again.
40
  if client.collection_exists("protocol_collection"):
@@ -42,8 +42,8 @@ def getVectorstore(document, file_path):
42
  qdrant_vectorstore = QdrantVectorStore.from_existing_collection(
43
  embedding=embedding_model,
44
  collection_name="protocol_collection",
45
- # url=qdrant_url
46
- location = ":memory:"
47
  )
48
 
49
  # Check for existing documents and only add new ones
@@ -83,6 +83,7 @@ def getVectorstore(document, file_path):
83
  documents=document,
84
  embedding=embedding_model,
85
  collection_name="protocol_collection",
86
- location = ":memory:"
 
87
  )
88
  return qdrant_vectorstore
 
33
  for doc in document:
34
  doc.metadata['document_title'] = file_path.split('/')[-1]
35
 
36
+ client = QdrantClient( url=qdrant_url)
37
+ # client = QdrantClient(":memory:")
38
  # If the collection exists, then we need to check to see if our document is already
39
  # present, in which case we would not want to store it again.
40
  if client.collection_exists("protocol_collection"):
 
42
  qdrant_vectorstore = QdrantVectorStore.from_existing_collection(
43
  embedding=embedding_model,
44
  collection_name="protocol_collection",
45
+ url=qdrant_url
46
+ # location = ":memory:"
47
  )
48
 
49
  # Check for existing documents and only add new ones
 
83
  documents=document,
84
  embedding=embedding_model,
85
  collection_name="protocol_collection",
86
+ # location = ":memory:"
87
+ url=qdrant_url
88
  )
89
  return qdrant_vectorstore