sksstudio
commited on
Commit
·
59ee418
1
Parent(s):
ca0012e
Add application file twoo5
Browse files- app.py +4 -2
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -20,8 +20,10 @@ model_path = huggingface_hub.hf_hub_download(
|
|
| 20 |
# Initialize the model with the downloaded file
|
| 21 |
llm = Llama(
|
| 22 |
model_path=model_path,
|
| 23 |
-
n_ctx=2048,
|
| 24 |
-
n_threads=4
|
|
|
|
|
|
|
| 25 |
)
|
| 26 |
|
| 27 |
class GenerationRequest(BaseModel):
|
|
|
|
| 20 |
# Initialize the model with the downloaded file
|
| 21 |
llm = Llama(
|
| 22 |
model_path=model_path,
|
| 23 |
+
n_ctx=2048, # Context window
|
| 24 |
+
n_threads=4, # Number of CPU threads to use
|
| 25 |
+
n_batch=512, # Number of tokens to process in parallel
|
| 26 |
+
verbose=True # Enable verbose logging for debugging
|
| 27 |
)
|
| 28 |
|
| 29 |
class GenerationRequest(BaseModel):
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.4.2
|
| 4 |
-
llama-cpp-python
|
| 5 |
huggingface-hub>=0.19.0
|
|
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.4.2
|
| 4 |
+
llama-cpp-python>=0.2.20
|
| 5 |
huggingface-hub>=0.19.0
|