robinroy03 commited on
Commit
caa5775
·
1 Parent(s): 64d60d8

test-llama3, not sure how it'll go on langserve

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. main.py +61 -0
  3. requirements.txt +196 -0
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.9
2
 
3
  WORKDIR /code
4
 
@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
 
9
  COPY . .
10
 
11
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.11.9
2
 
3
  WORKDIR /code
4
 
 
8
 
9
  COPY . .
10
 
11
+ CMD ["python", "main.py"]
main.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ from langchain_community.llms import LlamaCpp
4
+ from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
5
+ from langchain_core.prompts import PromptTemplate
6
+
7
+ from langserve import add_routes
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+
10
+ app = FastAPI(
11
+ title="LangChain Server",
12
+ version="1.0",
13
+ description="A simple api server using Langchain's Runnable interfaces",
14
+ )
15
+
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=['*'],
19
+ allow_methods=['*'],
20
+ allow_headers=['*'],
21
+ allow_credentials=True
22
+ )
23
+
24
+
25
+ template = """Give a very concise one word answer to question.
26
+ Question: {question}
27
+ Answer:
28
+ """
29
+
30
+ prompt = PromptTemplate.from_template(template)
31
+
32
+ # Callbacks support token-wise streaming
33
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
34
+
35
+ n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
36
+ n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
37
+
38
+ # Make sure the model path is correct for your system!
39
+ llm = LlamaCpp(
40
+ model_path="Meta-Llama-3-8B-Instruct-v2.Q4_K_S.gguf",
41
+ n_gpu_layers=n_gpu_layers,
42
+ n_batch=n_batch,
43
+ callback_manager=callback_manager,
44
+ verbose=True, # Verbose is required to pass to the callback manager
45
+ )
46
+
47
+ add_routes(
48
+ app,
49
+ prompt | llm,
50
+ path='/test'
51
+ )
52
+
53
+ if __name__ == "__main__":
54
+ import uvicorn
55
+
56
+ uvicorn.run(app)
57
+
58
+ # llm_chain = prompt | llm
59
+
60
+ # question = "Hi"
61
+ # x = llm_chain.invoke({"question": question})
requirements.txt CHANGED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.30.1
2
+ aiofiles==23.2.1
3
+ aiohttp==3.9.5
4
+ aiosignal==1.3.1
5
+ altair==5.3.0
6
+ annotated-types==0.6.0
7
+ anyio==4.3.0
8
+ asgiref==3.8.1
9
+ asttokens==2.4.1
10
+ attrs==23.2.0
11
+ backoff==2.2.1
12
+ bcrypt==4.1.2
13
+ beautifulsoup4==4.12.3
14
+ build==1.2.1
15
+ cachetools==5.3.3
16
+ certifi==2024.2.2
17
+ charset-normalizer==3.3.2
18
+ chroma-hnswlib==0.7.3
19
+ chromadb==0.5.0
20
+ click==8.1.7
21
+ coloredlogs==15.0.1
22
+ comm==0.2.2
23
+ contourpy==1.2.1
24
+ cycler==0.12.1
25
+ dataclasses-json==0.6.5
26
+ datasets==2.19.1
27
+ debugpy==1.8.1
28
+ decorator==5.1.1
29
+ Deprecated==1.2.14
30
+ dill==0.3.8
31
+ diskcache==5.6.3
32
+ distro==1.9.0
33
+ dnspython==2.6.1
34
+ email_validator==2.1.1
35
+ evaluate==0.4.2
36
+ executing==2.0.1
37
+ faiss-cpu==1.8.0
38
+ fastapi==0.111.0
39
+ fastapi-cli==0.0.2
40
+ ffmpy==0.3.2
41
+ filelock==3.14.0
42
+ flatbuffers==24.3.25
43
+ fonttools==4.51.0
44
+ frozenlist==1.4.1
45
+ fsspec==2024.3.1
46
+ google-auth==2.29.0
47
+ googleapis-common-protos==1.63.0
48
+ gradio==4.31.0
49
+ gradio_client==0.16.2
50
+ greenlet==3.0.3
51
+ grpcio==1.63.0
52
+ h11==0.14.0
53
+ httpcore==1.0.5
54
+ httptools==0.6.1
55
+ httpx==0.27.0
56
+ huggingface-hub==0.23.0
57
+ humanfriendly==10.0
58
+ idna==3.7
59
+ importlib-metadata==7.0.0
60
+ importlib_resources==6.4.0
61
+ ipykernel==6.29.4
62
+ ipython==8.24.0
63
+ jedi==0.19.1
64
+ Jinja2==3.1.3
65
+ jsonpatch==1.33
66
+ jsonpointer==2.4
67
+ jsonschema==4.22.0
68
+ jsonschema-specifications==2023.12.1
69
+ jupyter_client==8.6.1
70
+ jupyter_core==5.7.2
71
+ kiwisolver==1.4.5
72
+ kubernetes==29.0.0
73
+ langchain==0.1.17
74
+ langchain-community==0.0.36
75
+ langchain-core==0.1.50
76
+ langchain-openai==0.1.6
77
+ langchain-text-splitters==0.0.1
78
+ langsmith==0.1.53
79
+ llama_cpp_python==0.2.75
80
+ markdown-it-py==3.0.0
81
+ MarkupSafe==2.1.5
82
+ marshmallow==3.21.2
83
+ matplotlib==3.8.4
84
+ matplotlib-inline==0.1.7
85
+ mdurl==0.1.2
86
+ mmh3==4.1.0
87
+ monotonic==1.6
88
+ mpmath==1.3.0
89
+ multidict==6.0.5
90
+ multiprocess==0.70.16
91
+ mypy-extensions==1.0.0
92
+ nest-asyncio==1.6.0
93
+ networkx==3.3
94
+ numpy==1.26.4
95
+ nvidia-cublas-cu12==12.1.3.1
96
+ nvidia-cuda-cupti-cu12==12.1.105
97
+ nvidia-cuda-nvrtc-cu12==12.1.105
98
+ nvidia-cuda-runtime-cu12==12.1.105
99
+ nvidia-cudnn-cu12==8.9.2.26
100
+ nvidia-cufft-cu12==11.0.2.54
101
+ nvidia-curand-cu12==10.3.2.106
102
+ nvidia-cusolver-cu12==11.4.5.107
103
+ nvidia-cusparse-cu12==12.1.0.106
104
+ nvidia-nccl-cu12==2.20.5
105
+ nvidia-nvjitlink-cu12==12.4.127
106
+ nvidia-nvtx-cu12==12.1.105
107
+ oauthlib==3.2.2
108
+ onnxruntime==1.17.3
109
+ openai==1.25.1
110
+ opentelemetry-api==1.24.0
111
+ opentelemetry-exporter-otlp-proto-common==1.24.0
112
+ opentelemetry-exporter-otlp-proto-grpc==1.24.0
113
+ opentelemetry-instrumentation==0.45b0
114
+ opentelemetry-instrumentation-asgi==0.45b0
115
+ opentelemetry-instrumentation-fastapi==0.45b0
116
+ opentelemetry-proto==1.24.0
117
+ opentelemetry-sdk==1.24.0
118
+ opentelemetry-semantic-conventions==0.45b0
119
+ opentelemetry-util-http==0.45b0
120
+ orjson==3.10.3
121
+ overrides==7.7.0
122
+ packaging==23.2
123
+ pandas==2.2.2
124
+ parso==0.8.4
125
+ pexpect==4.9.0
126
+ pillow==10.3.0
127
+ platformdirs==4.2.1
128
+ posthog==3.5.0
129
+ prompt-toolkit==3.0.43
130
+ protobuf==4.25.3
131
+ psutil==5.9.8
132
+ ptyprocess==0.7.0
133
+ pure-eval==0.2.2
134
+ pyarrow==16.1.0
135
+ pyarrow-hotfix==0.6
136
+ pyasn1==0.6.0
137
+ pyasn1_modules==0.4.0
138
+ pydantic==2.7.1
139
+ pydantic_core==2.18.2
140
+ pydub==0.25.1
141
+ Pygments==2.17.2
142
+ pyparsing==3.1.2
143
+ PyPika==0.48.9
144
+ pyproject_hooks==1.1.0
145
+ python-dateutil==2.9.0.post0
146
+ python-dotenv==1.0.1
147
+ python-multipart==0.0.9
148
+ pytz==2024.1
149
+ PyYAML==6.0.1
150
+ pyzmq==26.0.3
151
+ referencing==0.35.1
152
+ regex==2024.4.28
153
+ requests==2.31.0
154
+ requests-oauthlib==2.0.0
155
+ rich==13.7.1
156
+ rpds-py==0.18.1
157
+ rsa==4.9
158
+ ruff==0.4.4
159
+ safetensors==0.4.3
160
+ semantic-version==2.10.0
161
+ shellingham==1.5.4
162
+ six==1.16.0
163
+ sniffio==1.3.1
164
+ soupsieve==2.5
165
+ SQLAlchemy==2.0.29
166
+ stack-data==0.6.3
167
+ starlette==0.37.2
168
+ sympy==1.12
169
+ tenacity==8.2.3
170
+ tiktoken==0.6.0
171
+ tokenizers==0.19.1
172
+ tomlkit==0.12.0
173
+ toolz==0.12.1
174
+ torch==2.3.0
175
+ tornado==6.4
176
+ tqdm==4.66.4
177
+ traitlets==5.14.3
178
+ transformers==4.41.0
179
+ triton==2.3.0
180
+ typer==0.12.3
181
+ typing-inspect==0.9.0
182
+ typing_extensions==4.11.0
183
+ tzdata==2024.1
184
+ ujson==5.9.0
185
+ urllib3==2.2.1
186
+ uvicorn==0.29.0
187
+ uvloop==0.19.0
188
+ vtk==9.3.0
189
+ watchfiles==0.21.0
190
+ wcwidth==0.2.13
191
+ websocket-client==1.8.0
192
+ websockets==11.0.3
193
+ wrapt==1.16.0
194
+ xxhash==3.4.1
195
+ yarl==1.9.4
196
+ zipp==3.18.1