Colby commited on
Commit
1511e8d
·
verified ·
1 Parent(s): e9d0c91

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +70 -8
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
 
3
  import gradio as gr
 
4
 
5
  import spacy
6
  spacy.cli.download('en_core_web_sm')
@@ -17,18 +18,79 @@ import wikipediaapi
17
  wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
18
 
19
  ## ctransformers disabled for now
20
- from ctransformers import AutoModelForCausalLM
21
- model = AutoModelForCausalLM.from_pretrained(
22
- "Colby/StarCoder-3B-WoW-JSON",
23
- model_file="StarCoder-3B-WoW-JSON-ggml.bin",
24
- model_type="gpt_bigcode"
25
- )
26
 
27
  # Use a pipeline as a high-level helper
28
  from transformers import pipeline
29
  topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
30
  #model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON", device=0)
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def merlin_chat(message, history):
33
  chat_text = ""
34
  chat_json = ""
@@ -79,12 +141,12 @@ def merlin_chat(message, history):
79
  prompt = "[" + json.dumps(system_msg) + chat_json + json.dumps(user_msg) + "{'role': 'assistant, 'content': '*recalls \""
80
  for attempt in range(3):
81
  # result = model(prompt, max_new_tokens=250, return_full_text=False, handle_long_generation="hole")
82
- result = model(prompt, stop=["]"])
83
  response = result[0]['generated_text']
84
  start = 0
85
  end = 0
86
  cleanStr = response.lstrip()
87
- cleanStr = cleanStr.replace(prompt,"")
88
  start = cleanStr.find('{') # this should skip over whatever it recalls to what it says next
89
  if start<=0:
90
  continue
 
1
  import json
2
 
3
  import gradio as gr
4
+ import os
5
 
6
  import spacy
7
  spacy.cli.download('en_core_web_sm')
 
18
  wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
19
 
20
  ## ctransformers disabled for now
21
+ # from ctransformers import AutoModelForCausalLM
22
+ # model = AutoModelForCausalLM.from_pretrained(
23
+ # "Colby/StarCoder-3B-WoW-JSON",
24
+ # model_file="StarCoder-3B-WoW-JSON-ggml.bin",
25
+ # model_type="gpt_bigcode"
26
+ # )
27
 
28
  # Use a pipeline as a high-level helper
29
  from transformers import pipeline
30
  topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
31
  #model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON", device=0)
32
 
33
+ import requests
34
+
35
+ # function for Huggingface API calls
36
+ def query(payload, model_path, headers):
37
+ API_URL = "https://api-inference.huggingface.co/models/" + model_path
38
+ for retry in range(3):
39
+ response = requests.post(API_URL, headers=headers, json=payload)
40
+ if response.status_code == requests.codes.ok:
41
+ try:
42
+ results = response.json()
43
+ return results
44
+ except:
45
+ print('Invalid response received from server')
46
+ print(response)
47
+ return None
48
+ else:
49
+ # Not connected to internet maybe?
50
+ if response.status_code==404:
51
+ print('Are you connected to the internet?')
52
+ print('URL attempted = '+API_URL)
53
+ break
54
+ if response.status_code==503:
55
+ print(response.json())
56
+ continue
57
+ if response.status_code==504:
58
+ print('504 Gateway Timeout')
59
+ else:
60
+ print('Unsuccessful request, status code '+ str(response.status_code))
61
+ # print(response.json()) #debug only
62
+ print(payload)
63
+
64
+ def generate_text(prompt, model_path, text_generation_parameters, headers):
65
+ start_time = time.time()
66
+ options = {'use_cache': False, 'wait_for_model': True}
67
+ payload = {"inputs": prompt, "parameters": text_generation_parameters, "options": options}
68
+ output_list = query(payload, model_path, headers)
69
+ if not output_list:
70
+ print('Generation failed')
71
+ end_time = time.time()
72
+ duration = round(end_time - start_time, 1)
73
+ stringlist = []
74
+ if output_list and 'generated_text' in output_list[0].keys():
75
+ print(f'{len(output_list)} sample(s) of text generated in {duration} seconds.')
76
+ for gendict in output_list:
77
+ stringlist.append(gendict['generated_text'])
78
+ else:
79
+ print(output_list)
80
+ return(stringlist)
81
+
82
+ model_path = "Colby/StarCoder-3B-WoW-JSON"
83
+ parameters = {
84
+ "max_new_tokens": 250,
85
+ "return_full_text": False,
86
+ "do_sample": True,
87
+ "temperature": 0.8,
88
+ "top_p": 0.9,
89
+ "top_k": 50,
90
+ "repetition_penalty": 1.1
91
+ }
92
+ headers = "Bearer " + os.environ['HF_TOKEN']
93
+
94
  def merlin_chat(message, history):
95
  chat_text = ""
96
  chat_json = ""
 
141
  prompt = "[" + json.dumps(system_msg) + chat_json + json.dumps(user_msg) + "{'role': 'assistant, 'content': '*recalls \""
142
  for attempt in range(3):
143
  # result = model(prompt, max_new_tokens=250, return_full_text=False, handle_long_generation="hole")
144
+ result = generate_text(prompt, model_path, parameters)
145
  response = result[0]['generated_text']
146
  start = 0
147
  end = 0
148
  cleanStr = response.lstrip()
149
+ # cleanStr = cleanStr.replace(prompt,"")
150
  start = cleanStr.find('{') # this should skip over whatever it recalls to what it says next
151
  if start<=0:
152
  continue
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- ctransformers
 
2
  transformers
3
  torch
4
  spacy
 
1
+ #ctransformers
2
+ requests
3
  transformers
4
  torch
5
  spacy