asasasText

Runtime error

App Files Files Community

Hjgugugjhuhjggg commited on Nov 23, 2024

Commit

678a7bb

verified ·

1 Parent(s): abccdc4

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -28

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from pydantic import BaseModel
 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import re
@@ -7,45 +6,49 @@ from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 import os
 from dotenv import load_dotenv
-import json
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-app = FastAPI()
-origins = ["*"]  # Adjust as needed for production
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 model_configs = [
     {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
-    # Add more models here
 ]
 models = {}
 def load_model(model_config):
-    if model_config['name'] not in models:
         try:
             model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
-            models[model_config['name']] = model
-            print(f"Model '{model_config['name']}' loaded successfully.")
             return model
         except Exception as e:
-            print(f"Error loading model {model_config['name']}: {e}")
             return None
 for config in model_configs:
-    load_model(config) #Load models on startup
 class ChatRequest(BaseModel):
@@ -72,18 +75,24 @@ def generate_model_response(model, inputs):
         response = model(inputs)
         return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
-        print(f"Error generating model response: {e}")
         return f"Error: {e}"
 @app.post("/generate")
 async def generate(request: ChatRequest):
     inputs = normalize_input(request.message)
     with ThreadPoolExecutor() as executor:
-        futures = [
-            executor.submit(generate_model_response, model, inputs)
-            for model in models.values()
-        ]
         responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
     unique_responses = {}
@@ -97,7 +106,28 @@ async def generate(request: ChatRequest):
     return {"response": formatted_response}
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    uvicorn.run(app, host="0.0.0.0", port=port)

 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import re
 from fastapi.middleware.cors import CORSMiddleware
 import os
 from dotenv import load_dotenv
+import gradio as gr
+import requests
+import asyncio
+from pydantic import BaseModel
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+global_data = {
+    'models': {},
+    'tokens': {
+        'eos': 'eos_token',
+        'pad': 'pad_token',
+        'padding': 'padding_token',
+        'unk': 'unk_token',
+        'bos': 'bos_token',
+        'sep': 'sep_token',
+        'cls': 'cls_token',
+        'mask': 'mask_token'
+    }
+}
 model_configs = [
     {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
 ]
 models = {}
 def load_model(model_config):
+    model_name = model_config['name']
+    if model_name not in models:
         try:
             model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
+            models[model_name] = model
+            global_data['models'] = models
             return model
         except Exception as e:
+            print(f"Error loading model {model_name}: {e}")
+            models[model_name] = None
             return None
 for config in model_configs:
+    load_model(config)
 class ChatRequest(BaseModel):
         response = model(inputs)
         return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
+        print(f"Error generating response: {e}")
         return f"Error: {e}"
+app = FastAPI()
+origins = ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 @app.post("/generate")
 async def generate(request: ChatRequest):
     inputs = normalize_input(request.message)
     with ThreadPoolExecutor() as executor:
+        futures = [executor.submit(generate_model_response, model, inputs) for model in models.values()]
         responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
     unique_responses = {}
     return {"response": formatted_response}
+async def process_message(message, history):
+    try:
+        port = int(os.environ.get("PORT", 7860))
+        response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
+        formatted_response = response["response"]
+        history.append((message, formatted_response))
+        return history, ""
+    except requests.exceptions.RequestException as e:
+        return history, f"Error communicating with the backend: {e}"
+iface = gr.Interface(
+    fn=process_message,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your message here..."),
+        gr.State([])
+    ],
+    outputs=[gr.Chatbot(), gr.Textbox(visible=False)],
+    title="Multi-Model LLM API",
+    description="Enter a message and get responses from multiple LLMs.",
+)
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+    iface.launch(server_port=7860)