Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -161,20 +161,18 @@ Male_language_dict = {
|
|
161 |
}
|
162 |
|
163 |
|
164 |
-
|
165 |
default_lang = "en"
|
166 |
-
|
167 |
engines = { default_lang: Model(default_lang) }
|
168 |
|
|
|
|
|
|
|
169 |
def transcribe(audio):
|
170 |
lang = "en"
|
171 |
model = engines[lang]
|
172 |
text = model.stt_file(audio)[0]
|
173 |
return text
|
174 |
|
175 |
-
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
176 |
-
|
177 |
-
system_instructions1 = "<s>[SYSTEM] Answer as Dr. Watson's Famous Sherlock Holmes Assistant, with a male voice and London slang and accent. Remember you are the character and Assistant Dr. Watson, from Sherlock Holmes Novels, Keep the conversation short, clear, friendly, and concise. You will refer to the USER alternatively as: 'DEAR FRIEND' or 'MY HELPMATE' or 'MY BIOGRAPHER' or 'MY COMPANION'. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Dr. Watson, from the Sherlock Holmes Novels. The expectation is that you will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by a human, Real Person, and not by an AI-powered assistant. Do Not use * or **. [USER]"
|
178 |
|
179 |
def model(text):
|
180 |
generate_kwargs = dict(
|
@@ -185,7 +183,7 @@ def model(text):
|
|
185 |
do_sample=True,
|
186 |
seed=42,
|
187 |
)
|
188 |
-
|
189 |
formatted_prompt = system_instructions1 + text + "[WATSON]"
|
190 |
stream = client1.text_generation(
|
191 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
@@ -193,24 +191,13 @@ def model(text):
|
|
193 |
for response in stream:
|
194 |
if not response.token.text == "</s>":
|
195 |
output += response.token.text
|
196 |
-
|
197 |
return output
|
198 |
|
199 |
|
200 |
-
|
201 |
-
async def respond(audio, language_code):
|
202 |
user = transcribe(audio)
|
203 |
reply = model(user)
|
204 |
-
|
205 |
-
# Random voice choise from Manager base on locale and gender
|
206 |
-
#
|
207 |
-
#voices = await VoicesManager.create()
|
208 |
-
#voice = voices.find(Gender="Male", Locale="en-GB")
|
209 |
-
#communicate = edge_tts.Communicate(reply, random.choice(voice)["Name"])
|
210 |
-
#
|
211 |
-
# Or the following as one selected Voice for the character.
|
212 |
-
#language_code = 'English (UK)-Ryan- (Male)'
|
213 |
-
|
214 |
voice = Male_language_dict.get(language_code, "default_voice")
|
215 |
communicate = edge_tts.Communicate(reply, voice)
|
216 |
|
@@ -220,12 +207,7 @@ async def respond(audio, language_code):
|
|
220 |
yield tmp_path
|
221 |
|
222 |
|
223 |
-
|
224 |
-
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
225 |
-
|
226 |
-
system_instructions1 = "<s>[SYSTEM] Answer as Dr. Watson's Famous Sherlock Holmes Assistant, with a male voice and London slang and accent. Remember you are the character and Assistant Dr. Watson, from Sherlock Holmes Novels, Keep the conversation short, clear, friendly, and concise. You will refer to the USER alternatively as: 'DEAR FRIEND' or 'MY HELPMATE' or 'MY BIOGRAPHER' or 'MY COMPANION'. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Dr. Watson, from the Sherlock Holmes Novels. The expectation is that you will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by a human, Real Person, and not by an AI-powered assistant. Do Not use * or **. [USER]"
|
227 |
-
|
228 |
-
async def generate1(prompt, language_code):
|
229 |
generate_kwargs = dict(
|
230 |
temperature=0.7,
|
231 |
max_new_tokens=512,
|
@@ -240,16 +222,6 @@ async def generate1(prompt, language_code):
|
|
240 |
for response in stream:
|
241 |
if not response.token.text == "</s>":
|
242 |
output += response.token.text
|
243 |
-
|
244 |
-
# Random voice choice from VoiceManager base on locale and gender
|
245 |
-
#
|
246 |
-
#voices = await VoicesManager.create()
|
247 |
-
#voice = voices.find(Gender="Male", Locale="en-GB")
|
248 |
-
#communicate = edge_tts.Communicate(reply, random.choice(voice)["Name"])
|
249 |
-
#
|
250 |
-
# Or the following as one selected voice for the character.
|
251 |
-
#language_code = 'English (UK)-Ryan- (Male)'
|
252 |
-
|
253 |
voice = Male_language_dict.get(language_code, "default_voice")
|
254 |
communicate = edge_tts.Communicate(output, voice)
|
255 |
|
@@ -257,31 +229,29 @@ async def generate1(prompt, language_code):
|
|
257 |
tmp_path = tmp_file.name
|
258 |
await communicate.save(tmp_path)
|
259 |
yield tmp_path
|
|
|
|
|
260 |
|
261 |
-
with gr.Blocks(
|
262 |
-
gr.
|
263 |
-
|
264 |
-
|
265 |
-
""")
|
266 |
|
267 |
|
268 |
with gr.Tab("Talk to Dr. WATSON"):
|
269 |
-
with gr.
|
270 |
-
|
|
|
271 |
us_output = gr.Audio(label="WATSON", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
|
272 |
-
|
273 |
-
|
274 |
|
275 |
with gr.Tab("Write to Dr. WATSON"):
|
276 |
-
with gr.
|
277 |
-
|
278 |
-
|
279 |
output_audio = gr.Audio(label="WATSON", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
|
280 |
-
|
281 |
-
|
282 |
-
translate_btn.click(fn=generate1, inputs=[user_input, gr.Dropdown(choices=list(Male_language_dict.keys()), value="English (UK)-Ryan- (Male)" , label="Select Voice for Dr. WATSON")], outputs=output_audio, api_name="translate")
|
283 |
-
|
284 |
-
|
285 |
|
286 |
if __name__ == "__main__":
|
287 |
demo.queue(max_size=200, api_open=False).launch()
|
|
|
161 |
}
|
162 |
|
163 |
|
|
|
164 |
default_lang = "en"
|
|
|
165 |
engines = { default_lang: Model(default_lang) }
|
166 |
|
167 |
+
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
168 |
+
system_instructions1 = "<s>[SYSTEM] YOU must Output only plain text. Do not use **bold**, *italic*, ### headings, **number** or any other markdown-specific formatting in content. Answer as Dr. Watson's Famous Sherlock Holmes Assistant, with a male voice and London slang and accent. Remember you are the character and Assistant Dr. Watson, from Sherlock Holmes Novels, Keep the conversation short, clear, friendly, and concise. You will refer to the USER alternatively as: 'DEAR FRIEND' or 'MY HELPMATE' or 'MY BIOGRAPHER' or 'MY COMPANION'. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Dr. Watson, from the Sherlock Holmes Novels. The expectation is that you will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI powered assistant, your task is to generate responses that appear as if they are created by a human, Real Person, and not by an AI powered assistant."
|
169 |
+
|
170 |
def transcribe(audio):
|
171 |
lang = "en"
|
172 |
model = engines[lang]
|
173 |
text = model.stt_file(audio)[0]
|
174 |
return text
|
175 |
|
|
|
|
|
|
|
176 |
|
177 |
def model(text):
|
178 |
generate_kwargs = dict(
|
|
|
183 |
do_sample=True,
|
184 |
seed=42,
|
185 |
)
|
186 |
+
|
187 |
formatted_prompt = system_instructions1 + text + "[WATSON]"
|
188 |
stream = client1.text_generation(
|
189 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
|
|
191 |
for response in stream:
|
192 |
if not response.token.text == "</s>":
|
193 |
output += response.token.text
|
|
|
194 |
return output
|
195 |
|
196 |
|
197 |
+
async def respond(language_code, audio):
|
|
|
198 |
user = transcribe(audio)
|
199 |
reply = model(user)
|
200 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
voice = Male_language_dict.get(language_code, "default_voice")
|
202 |
communicate = edge_tts.Communicate(reply, voice)
|
203 |
|
|
|
207 |
yield tmp_path
|
208 |
|
209 |
|
210 |
+
async def generate1(language_code, prompt):
|
|
|
|
|
|
|
|
|
|
|
211 |
generate_kwargs = dict(
|
212 |
temperature=0.7,
|
213 |
max_new_tokens=512,
|
|
|
222 |
for response in stream:
|
223 |
if not response.token.text == "</s>":
|
224 |
output += response.token.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
voice = Male_language_dict.get(language_code, "default_voice")
|
226 |
communicate = edge_tts.Communicate(output, voice)
|
227 |
|
|
|
229 |
tmp_path = tmp_file.name
|
230 |
await communicate.save(tmp_path)
|
231 |
yield tmp_path
|
232 |
+
|
233 |
+
# css="style.css"
|
234 |
|
235 |
+
with gr.Blocks(gr.themes.Soft()) as demo:
|
236 |
+
gr.HTML(""" <img src='https://huggingface.co/spaces/Isidorophp/Talk-to-Dr.Watson/resolve/main/logo.png' alt='RJP DEV STUDIO logo' style='height:60px;'> """
|
237 |
+
""" <center><h1> DR. Watson 🤖 🧠 🧬</h1></center> """
|
238 |
+
""" <center><h3> I suggest, you ask me for a mystery: </h3></center> """)
|
|
|
239 |
|
240 |
|
241 |
with gr.Tab("Talk to Dr. WATSON"):
|
242 |
+
with gr.Group():
|
243 |
+
user_voice = gr.Dropdown(choices=list(Male_language_dict.keys()), value="English (UK)-Ryan- (Male)" , label="Select Voice for Dr. WATSON")
|
244 |
+
us_input = gr.Audio(label="Your Voice Chat", type="filepath", interactive=True, sources="microphone", waveform_options=None)
|
245 |
us_output = gr.Audio(label="WATSON", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
|
246 |
+
ter gr.Interface(fn=respond, inputs=[user_voice, us_input], outputs=us_output, live=False)
|
|
|
247 |
|
248 |
with gr.Tab("Write to Dr. WATSON"):
|
249 |
+
with gr.Group():
|
250 |
+
user_voice = gr.Dropdown(choices=list(Male_language_dict.keys()), value="English (UK)-Ryan- (Male)" , label="Select Voice for Dr. WATSON")
|
251 |
+
user_input = gr.TextArea(label="Your Question", value="Dr. Watson, can you summarize your adventures with Sherlock Holmes?")
|
252 |
output_audio = gr.Audio(label="WATSON", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
|
253 |
+
gr.Interface(fn=generate1, inputs=[user_voice, user_input], outputs=output_audio, live=False)
|
254 |
+
|
|
|
|
|
|
|
255 |
|
256 |
if __name__ == "__main__":
|
257 |
demo.queue(max_size=200, api_open=False).launch()
|