asimsultan commited on
Commit
5898430
·
1 Parent(s): 872347f

Updated docker file

Browse files
Files changed (1) hide show
  1. app.py +25 -10
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from fastapi import FastAPI, Request
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from huggingface_hub import InferenceClient
@@ -22,15 +23,29 @@ app.add_middleware(
22
  class ChatRequest(BaseModel):
23
  message: str
24
 
25
- @app.post("/chat")
26
- async def chat(req: ChatRequest):
27
- try:
28
- messages = [{"role": "user", "content": req.message}]
29
- response = client.chat_completion(
 
 
 
 
 
 
 
 
 
 
 
 
30
  model=MODEL_ID,
31
- messages=messages,
 
 
32
  temperature=0.7,
33
- )
34
- return {"response": response.choices[0].message.content}
35
- except Exception as e:
36
- return {"error": str(e)}
 
1
  from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from huggingface_hub import InferenceClient
 
23
  class ChatRequest(BaseModel):
24
  message: str
25
 
26
+ # @app.post("/chat")
27
+ # async def chat(req: ChatRequest):
28
+ # try:
29
+ # messages = [{"role": "user", "content": req.message}]
30
+ # response = client.chat_completion(
31
+ # model=MODEL_ID,
32
+ # messages=messages,
33
+ # temperature=0.7,
34
+ # )
35
+ # return {"response": response.choices[0].message.content}
36
+ # except Exception as e:
37
+ # return {"error": str(e)}
38
+
39
+
40
+ async def chat_endpoint(data: ChatRequest):
41
+ def stream():
42
+ for chunk in client.text_generation(
43
  model=MODEL_ID,
44
+ prompt=data.message,
45
+ stream=True,
46
+ max_new_tokens=512,
47
  temperature=0.7,
48
+ ):
49
+ yield chunk
50
+
51
+ return StreamingResponse(stream(), media_type="text/plain")