Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,7 @@ from share_btn import community_icon_html, loading_icon_html, share_js, share_bt
|
|
12 |
|
13 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
14 |
|
15 |
-
|
16 |
-
API_URL ="https://api-inference.huggingface.co/models/bigcode/starcoderbase/"
|
17 |
|
18 |
with open("./HHH_prompt_short.txt", "r") as f:
|
19 |
HHH_PROMPT = f.read() + "\n\n"
|
@@ -21,6 +20,8 @@ with open("./HHH_prompt_short.txt", "r") as f:
|
|
21 |
with open("./TA_prompt_v0.txt", "r") as f:
|
22 |
TA_PROMPT = f.read()
|
23 |
|
|
|
|
|
24 |
FIM_PREFIX = "<fim_prefix>"
|
25 |
FIM_MIDDLE = "<fim_middle>"
|
26 |
FIM_SUFFIX = "<fim_suffix>"
|
@@ -48,16 +49,10 @@ theme = gr.themes.Monochrome(
|
|
48 |
],
|
49 |
)
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
)
|
54 |
-
|
55 |
-
client_p = Client(
|
56 |
-
API_URL_P, headers={"Authorization": f"Bearer {HF_TOKEN}"},
|
57 |
)
|
58 |
|
59 |
-
|
60 |
-
|
61 |
def generate(
|
62 |
prompt,
|
63 |
temperature=0.9,
|
@@ -82,28 +77,27 @@ def generate(
|
|
82 |
truncate=7500,
|
83 |
do_sample=True,
|
84 |
seed=42,
|
85 |
-
stop_sequences=["\nHuman", "\n-----"],
|
86 |
)
|
87 |
|
88 |
if chat_mode == "HHH prompt":
|
89 |
base_prompt = HHH_PROMPT
|
90 |
-
|
91 |
base_prompt = TA_PROMPT
|
|
|
|
|
92 |
|
93 |
-
chat_prompt = prompt + "\n\
|
94 |
prompt = base_prompt + chat_prompt
|
95 |
|
96 |
-
|
97 |
-
stream = client.generate_stream(prompt, **generate_kwargs)
|
98 |
-
else:
|
99 |
-
stream = client_p.generate_stream(prompt, **generate_kwargs)
|
100 |
|
101 |
output = ""
|
102 |
previous_token = ""
|
103 |
|
104 |
for response in stream:
|
105 |
if (
|
106 |
-
(response.token.text in ["
|
107 |
and previous_token in ["\n", "-----"])
|
108 |
or response.token.text == "<|endoftext|>"
|
109 |
):
|
@@ -131,7 +125,7 @@ def bot(
|
|
131 |
):
|
132 |
# concat history of prompts with answers expect for last empty answer only add prompt
|
133 |
prompt = "\n".join(
|
134 |
-
[f"
|
135 |
)
|
136 |
|
137 |
bot_message = generate(
|
@@ -174,8 +168,8 @@ _Note:_ this is an internal chat playground - **please do not share**. The deplo
|
|
174 |
column_1, column_2 = gr.Column(scale=3), gr.Column(scale=1)
|
175 |
with column_2:
|
176 |
chat_mode = gr.Dropdown(
|
177 |
-
["TA prompt", "HHH prompt"],
|
178 |
-
value="
|
179 |
label="Chat mode",
|
180 |
info="Use Anthropic's HHH prompt or our custom tech prompt to turn the model into an assistant.",
|
181 |
)
|
@@ -215,12 +209,12 @@ _Note:_ this is an internal chat playground - **please do not share**. The deplo
|
|
215 |
interactive=True,
|
216 |
info="Penalize repeated tokens",
|
217 |
)
|
218 |
-
version = gr.Dropdown(
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
)
|
224 |
with column_1:
|
225 |
# output = gr.Code(elem_id="q-output")
|
226 |
# add visibl=False and update if chat_mode True
|
|
|
12 |
|
13 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
14 |
|
15 |
+
API_URL_G = "https://api-inference.huggingface.co/models/ArmelR/starcoder-gradio-v0/"
|
|
|
16 |
|
17 |
with open("./HHH_prompt_short.txt", "r") as f:
|
18 |
HHH_PROMPT = f.read() + "\n\n"
|
|
|
20 |
with open("./TA_prompt_v0.txt", "r") as f:
|
21 |
TA_PROMPT = f.read()
|
22 |
|
23 |
+
NO_PROMPT = ""
|
24 |
+
|
25 |
FIM_PREFIX = "<fim_prefix>"
|
26 |
FIM_MIDDLE = "<fim_middle>"
|
27 |
FIM_SUFFIX = "<fim_suffix>"
|
|
|
49 |
],
|
50 |
)
|
51 |
|
52 |
+
client_g = Client(
|
53 |
+
API_URL_G, headers={"Authorization": f"Bearer {HF_TOKEN}"},
|
|
|
|
|
|
|
|
|
54 |
)
|
55 |
|
|
|
|
|
56 |
def generate(
|
57 |
prompt,
|
58 |
temperature=0.9,
|
|
|
77 |
truncate=7500,
|
78 |
do_sample=True,
|
79 |
seed=42,
|
80 |
+
stop_sequences=["\nHuman", "\n-----", "Question:", "Answer:"],
|
81 |
)
|
82 |
|
83 |
if chat_mode == "HHH prompt":
|
84 |
base_prompt = HHH_PROMPT
|
85 |
+
elif chat_mode == "TA prompt":
|
86 |
base_prompt = TA_PROMPT
|
87 |
+
else :
|
88 |
+
base_prompt = NO_PROMPT
|
89 |
|
90 |
+
chat_prompt = prompt + "\n\nAnswer:"
|
91 |
prompt = base_prompt + chat_prompt
|
92 |
|
93 |
+
stream = client_g.generate_stream(prompt, **generate_kwargs)
|
|
|
|
|
|
|
94 |
|
95 |
output = ""
|
96 |
previous_token = ""
|
97 |
|
98 |
for response in stream:
|
99 |
if (
|
100 |
+
(response.token.text in ["Question:", "-----"]
|
101 |
and previous_token in ["\n", "-----"])
|
102 |
or response.token.text == "<|endoftext|>"
|
103 |
):
|
|
|
125 |
):
|
126 |
# concat history of prompts with answers expect for last empty answer only add prompt
|
127 |
prompt = "\n".join(
|
128 |
+
[f"Question: {prompt}\n\nAnswer: {answer}" for prompt, answer in history[:-1]] + [f"\nQuestion: {history[-1][0]}"]
|
129 |
)
|
130 |
|
131 |
bot_message = generate(
|
|
|
168 |
column_1, column_2 = gr.Column(scale=3), gr.Column(scale=1)
|
169 |
with column_2:
|
170 |
chat_mode = gr.Dropdown(
|
171 |
+
["NO prompt","TA prompt", "HHH prompt"],
|
172 |
+
value="NO prompt",
|
173 |
label="Chat mode",
|
174 |
info="Use Anthropic's HHH prompt or our custom tech prompt to turn the model into an assistant.",
|
175 |
)
|
|
|
209 |
interactive=True,
|
210 |
info="Penalize repeated tokens",
|
211 |
)
|
212 |
+
#version = gr.Dropdown(
|
213 |
+
# ["StarCoderBase", "StarCoder"],
|
214 |
+
# value="StarCoderBase",
|
215 |
+
# label="Version",
|
216 |
+
# info="",
|
217 |
+
#)
|
218 |
with column_1:
|
219 |
# output = gr.Code(elem_id="q-output")
|
220 |
# add visibl=False and update if chat_mode True
|