sebdg commited on
Commit
fc7cbfb
·
verified ·
1 Parent(s): 708593b

Update app.py

Browse files

change to inference

Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -5,7 +5,19 @@ from huggingface_hub import InferenceClient
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
- client = InferenceClient(model="https://aq0teqpujnx3bv68.us-east-1.aws.endpoints.huggingface.cloud/")
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -24,30 +36,15 @@ def respond(
24
  max_tokens = 512
25
  temperature = 0.7
26
  top_p = 0.95
 
 
 
 
 
 
 
 
27
 
28
- messages = [{"role": "system", "content": system_message}]
29
-
30
- for val in history:
31
- if val[0]:
32
- messages.append({"role": "user", "content": val[0]})
33
- if val[1]:
34
- messages.append({"role": "assistant", "content": val[1]})
35
-
36
- messages.append({"role": "user", "content": message})
37
-
38
- response = ""
39
-
40
- for message in client.chat_completion(
41
- messages,
42
- max_tokens=max_tokens,
43
- stream=True,
44
- temperature=temperature,
45
- top_p=top_p,
46
- ):
47
- token = message.choices[0].delta.content
48
-
49
- response += token
50
- yield response
51
 
52
 
53
  """
 
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
+ #client = InferenceClient(model="https://aq0teqpujnx3bv68.us-east-1.aws.endpoints.huggingface.cloud/")
9
+ import requests
10
+
11
+ API_URL = "https://aq0teqpujnx3bv68.us-east-1.aws.endpoints.huggingface.cloud"
12
+ headers = {
13
+ "Accept" : "application/json",
14
+ "Content-Type": "application/json"
15
+ }
16
+
17
+ def query(payload):
18
+ response = requests.post(API_URL, headers=headers, json=payload)
19
+ return response.json()
20
+
21
 
22
  def respond(
23
  message,
 
36
  max_tokens = 512
37
  temperature = 0.7
38
  top_p = 0.95
39
+
40
+ output = query({
41
+ "inputs":system_message + "\n\n" + message,
42
+ "parameters": {
43
+ "max_new_tokens": 150
44
+ }
45
+ })
46
+ return output
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  """