Spaces:

greghmerrill
/

gorilla-test2

Runtime error

App Files Files Community

gmerrill commited on Mar 9, 2024

Commit

f423eb3

1 Parent(s): 3e57e69

update

Browse files

Files changed (1) hide show

main.py +24 -18

main.py CHANGED Viewed

@@ -2,9 +2,13 @@ from fastapi import FastAPI, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import json
 import torch
 def get_prompt(user_query: str, functions: list = []) -> str:
     """
     Generates a conversation prompt based on the user's query and a list of functions.
@@ -25,37 +29,38 @@ device : str = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model_id : str = "gorilla-llm/gorilla-openfunctions-v1"
-print('AutoTokenizer.from_pretrained ...')
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-print('AutoModelForCausalLM.from_pretrained ...')
 model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True)
-print('mode.to(device) ...')
 model.to(device)
-print('Pipeline setup ...')
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=128,
-    batch_size=16,
-    torch_dtype=torch_dtype,
-    device=device,
-)
-print('FastAPI setup ...')
 app = FastAPI()
 @app.post("/query_gorilla")
 async def query_gorilla(req: Request):
     body = await req.body()
     parsedBody = json.loads(body)
-    print(parsedBody['query'])
-    print(parsedBody['functions'])
-    print('Generate prompt and obtain model output')
     prompt = get_prompt(parsedBody['query'], functions=parsedBody['functions'])
     output = pipe(prompt)
     return {
@@ -69,3 +74,4 @@ def index() -> FileResponse:
     return FileResponse(path="/app/static/index.html", media_type="text/html")

 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import datetime
 import json
 import torch
+def log(msg):
+    print(str(datetime.datetime.now()) + ': ' + msg)
 def get_prompt(user_query: str, functions: list = []) -> str:
     """
     Generates a conversation prompt based on the user's query and a list of functions.
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model_id : str = "gorilla-llm/gorilla-openfunctions-v1"
+log('AutoTokenizer.from_pretrained ...')
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+log('AutoModelForCausalLM.from_pretrained ...')
 model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True)
+log('mode.to(device) ...')
 model.to(device)
+log('FastAPI setup ...')
 app = FastAPI()
 @app.post("/query_gorilla")
 async def query_gorilla(req: Request):
     body = await req.body()
     parsedBody = json.loads(body)
+    log(parsedBody['query'])
+    log(parsedBody['functions'])
+    log('Generate prompt and obtain model output')
     prompt = get_prompt(parsedBody['query'], functions=parsedBody['functions'])
+    log('Pipeline setup ...')
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=128,
+        batch_size=16,
+        torch_dtype=torch_dtype,
+        device=device,
+    )
     output = pipe(prompt)
     return {
     return FileResponse(path="/app/static/index.html", media_type="text/html")
+log('Initialization done.')