Spaces:
Sleeping
Sleeping
Commit
·
5898430
1
Parent(s):
872347f
Updated docker file
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from fastapi import FastAPI, Request
|
|
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
from pydantic import BaseModel
|
4 |
from huggingface_hub import InferenceClient
|
@@ -22,15 +23,29 @@ app.add_middleware(
|
|
22 |
class ChatRequest(BaseModel):
|
23 |
message: str
|
24 |
|
25 |
-
@app.post("/chat")
|
26 |
-
async def chat(req: ChatRequest):
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
model=MODEL_ID,
|
31 |
-
|
|
|
|
|
32 |
temperature=0.7,
|
33 |
-
)
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
1 |
from fastapi import FastAPI, Request
|
2 |
+
from fastapi.responses import StreamingResponse
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
from pydantic import BaseModel
|
5 |
from huggingface_hub import InferenceClient
|
|
|
23 |
class ChatRequest(BaseModel):
|
24 |
message: str
|
25 |
|
26 |
+
# @app.post("/chat")
|
27 |
+
# async def chat(req: ChatRequest):
|
28 |
+
# try:
|
29 |
+
# messages = [{"role": "user", "content": req.message}]
|
30 |
+
# response = client.chat_completion(
|
31 |
+
# model=MODEL_ID,
|
32 |
+
# messages=messages,
|
33 |
+
# temperature=0.7,
|
34 |
+
# )
|
35 |
+
# return {"response": response.choices[0].message.content}
|
36 |
+
# except Exception as e:
|
37 |
+
# return {"error": str(e)}
|
38 |
+
|
39 |
+
|
40 |
+
async def chat_endpoint(data: ChatRequest):
|
41 |
+
def stream():
|
42 |
+
for chunk in client.text_generation(
|
43 |
model=MODEL_ID,
|
44 |
+
prompt=data.message,
|
45 |
+
stream=True,
|
46 |
+
max_new_tokens=512,
|
47 |
temperature=0.7,
|
48 |
+
):
|
49 |
+
yield chunk
|
50 |
+
|
51 |
+
return StreamingResponse(stream(), media_type="text/plain")
|