Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +70 -8
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import json
|
2 |
|
3 |
import gradio as gr
|
|
|
4 |
|
5 |
import spacy
|
6 |
spacy.cli.download('en_core_web_sm')
|
@@ -17,18 +18,79 @@ import wikipediaapi
|
|
17 |
wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
|
18 |
|
19 |
## ctransformers disabled for now
|
20 |
-
from ctransformers import AutoModelForCausalLM
|
21 |
-
model = AutoModelForCausalLM.from_pretrained(
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
)
|
26 |
|
27 |
# Use a pipeline as a high-level helper
|
28 |
from transformers import pipeline
|
29 |
topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
|
30 |
#model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON", device=0)
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def merlin_chat(message, history):
|
33 |
chat_text = ""
|
34 |
chat_json = ""
|
@@ -79,12 +141,12 @@ def merlin_chat(message, history):
|
|
79 |
prompt = "[" + json.dumps(system_msg) + chat_json + json.dumps(user_msg) + "{'role': 'assistant, 'content': '*recalls \""
|
80 |
for attempt in range(3):
|
81 |
# result = model(prompt, max_new_tokens=250, return_full_text=False, handle_long_generation="hole")
|
82 |
-
result =
|
83 |
response = result[0]['generated_text']
|
84 |
start = 0
|
85 |
end = 0
|
86 |
cleanStr = response.lstrip()
|
87 |
-
cleanStr = cleanStr.replace(prompt,"")
|
88 |
start = cleanStr.find('{') # this should skip over whatever it recalls to what it says next
|
89 |
if start<=0:
|
90 |
continue
|
|
|
1 |
import json
|
2 |
|
3 |
import gradio as gr
|
4 |
+
import os
|
5 |
|
6 |
import spacy
|
7 |
spacy.cli.download('en_core_web_sm')
|
|
|
18 |
wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
|
19 |
|
20 |
## ctransformers disabled for now
|
21 |
+
# from ctransformers import AutoModelForCausalLM
|
22 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
23 |
+
# "Colby/StarCoder-3B-WoW-JSON",
|
24 |
+
# model_file="StarCoder-3B-WoW-JSON-ggml.bin",
|
25 |
+
# model_type="gpt_bigcode"
|
26 |
+
# )
|
27 |
|
28 |
# Use a pipeline as a high-level helper
|
29 |
from transformers import pipeline
|
30 |
topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
|
31 |
#model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON", device=0)
|
32 |
|
33 |
+
import requests
|
34 |
+
|
35 |
+
# function for Huggingface API calls
|
36 |
+
def query(payload, model_path, headers):
|
37 |
+
API_URL = "https://api-inference.huggingface.co/models/" + model_path
|
38 |
+
for retry in range(3):
|
39 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
40 |
+
if response.status_code == requests.codes.ok:
|
41 |
+
try:
|
42 |
+
results = response.json()
|
43 |
+
return results
|
44 |
+
except:
|
45 |
+
print('Invalid response received from server')
|
46 |
+
print(response)
|
47 |
+
return None
|
48 |
+
else:
|
49 |
+
# Not connected to internet maybe?
|
50 |
+
if response.status_code==404:
|
51 |
+
print('Are you connected to the internet?')
|
52 |
+
print('URL attempted = '+API_URL)
|
53 |
+
break
|
54 |
+
if response.status_code==503:
|
55 |
+
print(response.json())
|
56 |
+
continue
|
57 |
+
if response.status_code==504:
|
58 |
+
print('504 Gateway Timeout')
|
59 |
+
else:
|
60 |
+
print('Unsuccessful request, status code '+ str(response.status_code))
|
61 |
+
# print(response.json()) #debug only
|
62 |
+
print(payload)
|
63 |
+
|
64 |
+
def generate_text(prompt, model_path, text_generation_parameters, headers):
|
65 |
+
start_time = time.time()
|
66 |
+
options = {'use_cache': False, 'wait_for_model': True}
|
67 |
+
payload = {"inputs": prompt, "parameters": text_generation_parameters, "options": options}
|
68 |
+
output_list = query(payload, model_path, headers)
|
69 |
+
if not output_list:
|
70 |
+
print('Generation failed')
|
71 |
+
end_time = time.time()
|
72 |
+
duration = round(end_time - start_time, 1)
|
73 |
+
stringlist = []
|
74 |
+
if output_list and 'generated_text' in output_list[0].keys():
|
75 |
+
print(f'{len(output_list)} sample(s) of text generated in {duration} seconds.')
|
76 |
+
for gendict in output_list:
|
77 |
+
stringlist.append(gendict['generated_text'])
|
78 |
+
else:
|
79 |
+
print(output_list)
|
80 |
+
return(stringlist)
|
81 |
+
|
82 |
+
model_path = "Colby/StarCoder-3B-WoW-JSON"
|
83 |
+
parameters = {
|
84 |
+
"max_new_tokens": 250,
|
85 |
+
"return_full_text": False,
|
86 |
+
"do_sample": True,
|
87 |
+
"temperature": 0.8,
|
88 |
+
"top_p": 0.9,
|
89 |
+
"top_k": 50,
|
90 |
+
"repetition_penalty": 1.1
|
91 |
+
}
|
92 |
+
headers = "Bearer " + os.environ['HF_TOKEN']
|
93 |
+
|
94 |
def merlin_chat(message, history):
|
95 |
chat_text = ""
|
96 |
chat_json = ""
|
|
|
141 |
prompt = "[" + json.dumps(system_msg) + chat_json + json.dumps(user_msg) + "{'role': 'assistant, 'content': '*recalls \""
|
142 |
for attempt in range(3):
|
143 |
# result = model(prompt, max_new_tokens=250, return_full_text=False, handle_long_generation="hole")
|
144 |
+
result = generate_text(prompt, model_path, parameters)
|
145 |
response = result[0]['generated_text']
|
146 |
start = 0
|
147 |
end = 0
|
148 |
cleanStr = response.lstrip()
|
149 |
+
# cleanStr = cleanStr.replace(prompt,"")
|
150 |
start = cleanStr.find('{') # this should skip over whatever it recalls to what it says next
|
151 |
if start<=0:
|
152 |
continue
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
ctransformers
|
|
|
2 |
transformers
|
3 |
torch
|
4 |
spacy
|
|
|
1 |
+
#ctransformers
|
2 |
+
requests
|
3 |
transformers
|
4 |
torch
|
5 |
spacy
|