Uhhy commited on
Commit
4e76cb1
verified
1 Parent(s): ec80f26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -12,7 +12,6 @@ load_dotenv()
12
 
13
  app = FastAPI()
14
 
15
- # Diccionario global para almacenar modelos y tokens
16
  global_data = {
17
  'models': {},
18
  'tokens': {
@@ -27,7 +26,6 @@ global_data = {
27
  }
28
  }
29
 
30
- # Configuraci贸n de modelos
31
  model_configs = [
32
  {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
33
  {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
@@ -59,7 +57,7 @@ class ModelManager:
59
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
60
  except Exception:
61
  pass
62
-
63
  def load_all_models(self):
64
  if self.loaded:
65
  return global_data['models']
@@ -116,19 +114,13 @@ def remove_repetitive_responses(responses):
116
  for response in responses:
117
  normalized_response = remove_duplicates(response['response'])
118
  if normalized_response not in seen:
119
- unique_responses.append(response)
120
  seen.add(normalized_response)
 
121
  return unique_responses
122
 
123
- def select_best_response(responses):
124
- if not responses:
125
- return ""
126
- responses = remove_repetitive_responses(responses)
127
- return max(set(responses), key=lambda x: x['response'].count("user"))
128
-
129
  @app.post("/generate")
130
  @spaces.GPU(duration=0)
131
- def generate_chat(request: ChatRequest):
132
  try:
133
  global_data['models'] = model_manager.load_all_models()
134
  responses = []
@@ -145,6 +137,7 @@ def generate_chat(request: ChatRequest):
145
  if not responses:
146
  raise HTTPException(status_code=500, detail="Error: No responses generated.")
147
 
 
148
  best_response = select_best_response(responses)
149
  return {
150
  "best_response": best_response,
 
12
 
13
  app = FastAPI()
14
 
 
15
  global_data = {
16
  'models': {},
17
  'tokens': {
 
26
  }
27
  }
28
 
 
29
  model_configs = [
30
  {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
31
  {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
 
57
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
58
  except Exception:
59
  pass
60
+
61
  def load_all_models(self):
62
  if self.loaded:
63
  return global_data['models']
 
114
  for response in responses:
115
  normalized_response = remove_duplicates(response['response'])
116
  if normalized_response not in seen:
 
117
  seen.add(normalized_response)
118
+ unique_responses.append(response)
119
  return unique_responses
120
 
 
 
 
 
 
 
121
  @app.post("/generate")
122
  @spaces.GPU(duration=0)
123
+ async def generate(request: ChatRequest):
124
  try:
125
  global_data['models'] = model_manager.load_all_models()
126
  responses = []
 
137
  if not responses:
138
  raise HTTPException(status_code=500, detail="Error: No responses generated.")
139
 
140
+ responses = remove_repetitive_responses(responses)
141
  best_response = select_best_response(responses)
142
  return {
143
  "best_response": best_response,