Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,6 @@ load_dotenv()
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
15 |
-
# Diccionario global para almacenar modelos y tokens
|
16 |
global_data = {
|
17 |
'models': {},
|
18 |
'tokens': {
|
@@ -27,7 +26,6 @@ global_data = {
|
|
27 |
}
|
28 |
}
|
29 |
|
30 |
-
# Configuraci贸n de modelos
|
31 |
model_configs = [
|
32 |
{"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
|
33 |
{"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
|
@@ -59,7 +57,7 @@ class ModelManager:
|
|
59 |
return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
|
60 |
except Exception:
|
61 |
pass
|
62 |
-
|
63 |
def load_all_models(self):
|
64 |
if self.loaded:
|
65 |
return global_data['models']
|
@@ -116,19 +114,13 @@ def remove_repetitive_responses(responses):
|
|
116 |
for response in responses:
|
117 |
normalized_response = remove_duplicates(response['response'])
|
118 |
if normalized_response not in seen:
|
119 |
-
unique_responses.append(response)
|
120 |
seen.add(normalized_response)
|
|
|
121 |
return unique_responses
|
122 |
|
123 |
-
def select_best_response(responses):
|
124 |
-
if not responses:
|
125 |
-
return ""
|
126 |
-
responses = remove_repetitive_responses(responses)
|
127 |
-
return max(set(responses), key=lambda x: x['response'].count("user"))
|
128 |
-
|
129 |
@app.post("/generate")
|
130 |
@spaces.GPU(duration=0)
|
131 |
-
def
|
132 |
try:
|
133 |
global_data['models'] = model_manager.load_all_models()
|
134 |
responses = []
|
@@ -145,6 +137,7 @@ def generate_chat(request: ChatRequest):
|
|
145 |
if not responses:
|
146 |
raise HTTPException(status_code=500, detail="Error: No responses generated.")
|
147 |
|
|
|
148 |
best_response = select_best_response(responses)
|
149 |
return {
|
150 |
"best_response": best_response,
|
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
|
|
15 |
global_data = {
|
16 |
'models': {},
|
17 |
'tokens': {
|
|
|
26 |
}
|
27 |
}
|
28 |
|
|
|
29 |
model_configs = [
|
30 |
{"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
|
31 |
{"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
|
|
|
57 |
return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
|
58 |
except Exception:
|
59 |
pass
|
60 |
+
|
61 |
def load_all_models(self):
|
62 |
if self.loaded:
|
63 |
return global_data['models']
|
|
|
114 |
for response in responses:
|
115 |
normalized_response = remove_duplicates(response['response'])
|
116 |
if normalized_response not in seen:
|
|
|
117 |
seen.add(normalized_response)
|
118 |
+
unique_responses.append(response)
|
119 |
return unique_responses
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
@app.post("/generate")
|
122 |
@spaces.GPU(duration=0)
|
123 |
+
async def generate(request: ChatRequest):
|
124 |
try:
|
125 |
global_data['models'] = model_manager.load_all_models()
|
126 |
responses = []
|
|
|
137 |
if not responses:
|
138 |
raise HTTPException(status_code=500, detail="Error: No responses generated.")
|
139 |
|
140 |
+
responses = remove_repetitive_responses(responses)
|
141 |
best_response = select_best_response(responses)
|
142 |
return {
|
143 |
"best_response": best_response,
|