Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -2,11 +2,11 @@ from fastapi import FastAPI
|
|
2 |
from pydantic import BaseModel
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import uvicorn
|
5 |
-
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
-
client = InferenceClient("Qwen/Qwen2.5-
|
10 |
|
11 |
class Item(BaseModel):
|
12 |
prompt: str
|
@@ -26,9 +26,7 @@ def format_prompt(message, history):
|
|
26 |
return prompt
|
27 |
|
28 |
def generate(item: Item):
|
29 |
-
temperature = float(item.temperature)
|
30 |
-
if temperature < 1e-2:
|
31 |
-
temperature = 1e-2
|
32 |
top_p = float(item.top_p)
|
33 |
|
34 |
generate_kwargs = dict(
|
@@ -50,4 +48,16 @@ def generate(item: Item):
|
|
50 |
|
51 |
@app.post("/generate/")
|
52 |
async def generate_text(item: Item):
|
53 |
-
return {"response": generate(item)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from pydantic import BaseModel
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import uvicorn
|
5 |
+
import asyncio
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
+
client = InferenceClient(model="Qwen/Qwen2.5-7B")
|
10 |
|
11 |
class Item(BaseModel):
|
12 |
prompt: str
|
|
|
26 |
return prompt
|
27 |
|
28 |
def generate(item: Item):
|
29 |
+
temperature = max(float(item.temperature), 1e-2)
|
|
|
|
|
30 |
top_p = float(item.top_p)
|
31 |
|
32 |
generate_kwargs = dict(
|
|
|
48 |
|
49 |
@app.post("/generate/")
|
50 |
async def generate_text(item: Item):
|
51 |
+
return {"response": generate(item)}
|
52 |
+
|
53 |
+
@app.on_event("startup")
|
54 |
+
async def preload_model():
|
55 |
+
# Check if the model is already loaded
|
56 |
+
status = client.get_model_status()
|
57 |
+
if not status.loaded:
|
58 |
+
# Trigger model loading by making a dummy request
|
59 |
+
dummy_prompt = "This is a dummy prompt to load the model."
|
60 |
+
client.text_generation(dummy_prompt, max_new_tokens=1)
|
61 |
+
# Optionally, wait until the model is loaded
|
62 |
+
while not client.get_model_status().loaded:
|
63 |
+
await asyncio.sleep(5) # Wait for 5 seconds before checking again
|