Uhhy commited on
Commit
c8e35b7
·
verified ·
1 Parent(s): 2aca525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -20
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import FastAPI, HTTPException, Request
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
@@ -5,18 +6,12 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  import uvicorn
6
  import re
7
  from dotenv import load_dotenv
8
- from spaces.zero import ZeroGPU
9
  import spaces
10
 
11
  load_dotenv()
12
 
13
  app = FastAPI()
14
 
15
- try:
16
- ZeroGPU.initialize()
17
- except Exception:
18
- pass
19
-
20
  global_data = {
21
  'models': {},
22
  'tokens': {
@@ -60,7 +55,8 @@ class ModelManager:
60
  def load_model(self, model_config):
61
  try:
62
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
63
- except Exception:
 
64
  pass
65
 
66
  def load_all_models(self):
@@ -79,7 +75,8 @@ class ModelManager:
79
  global_data['models'] = {model['name']: model['model'] for model in models}
80
  self.loaded = True
81
  return global_data['models']
82
- except Exception:
 
83
  pass
84
  return {}
85
 
@@ -115,12 +112,14 @@ def remove_repetitive_responses(responses):
115
  normalized_response = remove_duplicates(response['response'])
116
  if normalized_response not in seen:
117
  seen.add(normalized_response)
 
 
118
  unique_responses.append({'model': response['model'], 'response': normalized_response})
119
  return unique_responses
120
 
121
- @app.post("/generate/")
122
  @spaces.GPU(duration=0)
123
- async def generate(request: ChatRequest):
124
  try:
125
  normalized_message = normalize_input(request.message)
126
  with ThreadPoolExecutor() as executor:
@@ -128,17 +127,13 @@ async def generate(request: ChatRequest):
128
  top_k=request.top_k, top_p=request.top_p, temperature=request.temperature)
129
  for model in global_data['models'].values()]
130
  responses = []
131
- for future, model_name in zip(as_completed(futures), global_data['models']):
132
- generated_text = future.result()
133
- responses.append({'model': model_name, 'response': generated_text})
134
-
135
- return remove_repetitive_responses(responses)
136
- except NotImplementedError as nie:
137
- raise HTTPException(status_code=500, detail=str(nie))
138
- except ZeroGPU.ZeroGPUException as gpu_exc:
139
- raise HTTPException(status_code=500, detail=f"ZeroGPU Error: {gpu_exc}")
140
  except Exception as e:
141
- raise HTTPException(status_code=500, detail=str(e))
142
 
143
  if __name__ == "__main__":
144
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+
2
  from fastapi import FastAPI, HTTPException, Request
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
 
6
  import uvicorn
7
  import re
8
  from dotenv import load_dotenv
 
9
  import spaces
10
 
11
  load_dotenv()
12
 
13
  app = FastAPI()
14
 
 
 
 
 
 
15
  global_data = {
16
  'models': {},
17
  'tokens': {
 
55
  def load_model(self, model_config):
56
  try:
57
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
58
+ except Exception as e:
59
+ print(f"Error loading model {model_config['name']}: {e}")
60
  pass
61
 
62
  def load_all_models(self):
 
75
  global_data['models'] = {model['name']: model['model'] for model in models}
76
  self.loaded = True
77
  return global_data['models']
78
+ except Exception as e:
79
+ print(f"Error loading models: {e}")
80
  pass
81
  return {}
82
 
 
112
  normalized_response = remove_duplicates(response['response'])
113
  if normalized_response not in seen:
114
  seen.add(normalized_response)
115
+
116
+
117
  unique_responses.append({'model': response['model'], 'response': normalized_response})
118
  return unique_responses
119
 
120
+ @app.post("/chat/")
121
  @spaces.GPU(duration=0)
122
+ async def chat(request: ChatRequest):
123
  try:
124
  normalized_message = normalize_input(request.message)
125
  with ThreadPoolExecutor() as executor:
 
127
  top_k=request.top_k, top_p=request.top_p, temperature=request.temperature)
128
  for model in global_data['models'].values()]
129
  responses = []
130
+ for future, model_name in zip(as_completed(futures), global_data['models'].keys()):
131
+ response = future.result()
132
+ responses.append({'model': model_name, 'response': response})
133
+ unique_responses = remove_repetitive_responses(responses)
134
+ return unique_responses
 
 
 
 
135
  except Exception as e:
136
+ raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
137
 
138
  if __name__ == "__main__":
139
  uvicorn.run(app, host="0.0.0.0", port=8000)