igroman574 commited on
Commit
1bd9398
·
verified ·
1 Parent(s): ed9eb68

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +16 -6
main.py CHANGED
@@ -2,11 +2,11 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
4
  import uvicorn
5
-
6
 
7
  app = FastAPI()
8
 
9
- client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
10
 
11
  class Item(BaseModel):
12
  prompt: str
@@ -26,9 +26,7 @@ def format_prompt(message, history):
26
  return prompt
27
 
28
  def generate(item: Item):
29
- temperature = float(item.temperature)
30
- if temperature < 1e-2:
31
- temperature = 1e-2
32
  top_p = float(item.top_p)
33
 
34
  generate_kwargs = dict(
@@ -50,4 +48,16 @@ def generate(item: Item):
50
 
51
  @app.post("/generate/")
52
  async def generate_text(item: Item):
53
- return {"response": generate(item)}
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
4
  import uvicorn
5
+ import asyncio
6
 
7
  app = FastAPI()
8
 
9
+ client = InferenceClient(model="Qwen/Qwen2.5-7B")
10
 
11
  class Item(BaseModel):
12
  prompt: str
 
26
  return prompt
27
 
28
  def generate(item: Item):
29
+ temperature = max(float(item.temperature), 1e-2)
 
 
30
  top_p = float(item.top_p)
31
 
32
  generate_kwargs = dict(
 
48
 
49
  @app.post("/generate/")
50
  async def generate_text(item: Item):
51
+ return {"response": generate(item)}
52
+
53
+ @app.on_event("startup")
54
+ async def preload_model():
55
+ # Check if the model is already loaded
56
+ status = client.get_model_status()
57
+ if not status.loaded:
58
+ # Trigger model loading by making a dummy request
59
+ dummy_prompt = "This is a dummy prompt to load the model."
60
+ client.text_generation(dummy_prompt, max_new_tokens=1)
61
+ # Optionally, wait until the model is loaded
62
+ while not client.get_model_status().loaded:
63
+ await asyncio.sleep(5) # Wait for 5 seconds before checking again