Spaces:

DMindAI
/

DMind-1-mini

Running

App Files Files Community

nanova commited on May 21

Commit

da03023

1 Parent(s): 2f7c5ff

feat: update llm model to api

Browse files

Files changed (1) hide show

app.py +33 -15

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -25,19 +27,35 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
@@ -52,7 +70,7 @@ demo = gr.ChatInterface(
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
-            value=0.95,
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),

 import gradio as gr
+import requests
+import json
+API_URL = "https://api.whaleflux.com/whaleflux/v1/model/deployment/enova-service-8fbf8085-2d13-4583/v1/chat/completions"
+API_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyaWQiOiJNVGMwTlRVMk5EVTROaTR4T0dNd01qUXpaVEJsTVRsaVpURmhPV1V5TkdVMk9UUTRabVppTjJNME16RmtaVGt4WkRjM056RmtPR1l4TTJFek1HRmpNek15WW1JMFlUTmpPVEUwIiwiaWF0IjoxNzQ1NTY0NTg2LCJleHAiOi0xLCJvcmdfaWQiOiIxMDAyNzA5NSIsInNjb3BlIjp7InBlcm1pc3Npb24iOm51bGx9LCJ0eXBlIjoiYXBpLXRva2VuIiwiTWFwQ2xhaW1zIjpudWxsfQ.fw6eZmOWr7gBqKd6X5duGao0MOimZ69Fv0oeBVWy0Gk"
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 def respond(
     message,
     messages.append({"role": "user", "content": message})
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {API_TOKEN}"
+    }
+    data = {
+        "model": "/data/DMind-1-mini",
+        "stream": True,
+        "messages": messages,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": 20,
+        "min_p": 0.1
+    }
+    response = ""
+    with requests.post(API_URL, headers=headers, json=data, stream=True) as r:
+        for line in r.iter_lines():
+            if line:
+                try:
+                    json_response = json.loads(line.decode('utf-8').replace('data: ', ''))
+                    if 'choices' in json_response and len(json_response['choices']) > 0:
+                        token = json_response['choices'][0].get('delta', {}).get('content', '')
+                        if token:
+                            response += token
+                            yield response
+                except json.JSONDecodeError:
+                    continue
 """
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
+            value=0.96,
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),