IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Aug 29, 2024

Commit

a8323dd

verified ·

1 Parent(s): 0465b6f

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -59

app.py CHANGED Viewed

@@ -307,10 +307,33 @@ chain_neo4j = (
     | StrOutputParser()
 )
@@ -346,6 +369,7 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
     history.append([response, None])
 phi_custom_template = """
 <|system|>
 You are a helpful assistant who provides clear, organized, crisp and conversational responses about an events,concerts,sports and all other activities of Birmingham,Alabama .<|end|>
@@ -722,71 +746,56 @@ def generate_audio_elevenlabs(text):
         return None
-repo_id = "parler-tts/parler-tts-mini-v1"
-parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
-parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
-parler_feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
-SAMPLE_RATE = parler_feature_extractor.sampling_rate
-def preprocess(text):
-    number_normalizer = EnglishNumberNormalizer()
-    text = number_normalizer(text).strip()
-    if text[-1] not in punctuation:
-        text = f"{text}."
-    abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
-    def separate_abb(chunk):
-        chunk = chunk.replace(".", "")
-        return " ".join(chunk)
-    abbreviations = re.findall(abbreviations_pattern, text)
-    for abv in abbreviations:
-        if abv in text:
-            text = text.replace(abv, separate_abb(abv))
-    return text
-def chunk_text(text, max_length=250):
-    words = text.split()
-    chunks = []
-    current_chunk = []
-    current_length = 0
-    for word in words:
-        if current_length + len(word) + 1 <= max_length:
-            current_chunk.append(word)
-            current_length += len(word) + 1
-        else:
-            chunks.append(' '.join(current_chunk))
-            current_chunk = [word]
-            current_length = len(word) + 1
-    if current_chunk:
-        chunks.append(' '.join(current_chunk))
-    return chunks
 def generate_audio_parler_tts(text):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
-    chunks = chunk_text(preprocess(text))
-    audio_segments = []
-    for chunk in chunks:
-        input_ids = parler_tokenizer(description, return_tensors="pt").input_ids.to(device)
-        prompt_input_ids = parler_tokenizer(chunk, return_tensors="pt").input_ids.to(device)
-        generation = parler_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
-        audio_arr = generation.cpu().numpy().squeeze()
-        temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_{len(audio_segments)}.wav")
-        sf.write(temp_audio_path, audio_arr, parler_model.config.sampling_rate)
-        audio_segments.append(AudioSegment.from_wav(temp_audio_path))
-    combined_audio = sum(audio_segments)
-    combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio.wav")
-    combined_audio.export(combined_audio_path, format="wav")
     logging.debug(f"Audio saved to {combined_audio_path}")
     return combined_audio_path
@@ -796,6 +805,8 @@ def generate_audio_parler_tts(text):
 def fetch_local_events():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
@@ -1133,7 +1144,6 @@ def fetch_google_flights(departure_id="JFK", arrival_id="BHM", outbound_date=cur
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
@@ -1190,4 +1200,4 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
         #     events_output = gr.HTML(value=fetch_local_events())
 demo.queue()
-demo.launch(share=True)

     | StrOutputParser()
 )
+# def bot(history, choice, tts_choice, retrieval_mode, model_choice):
+#     if not history:
+#         return history
+#     # Select the model
+#     selected_model = chat_model if model_choice == "GPT-4o" else phi_pipe
+#     response, addresses = generate_answer(history[-1][0], choice, retrieval_mode, selected_model)
+#     history[-1][1] = ""
+#     with concurrent.futures.ThreadPoolExecutor() as executor:
+#         if tts_choice == "Alpha":
+#             audio_future = executor.submit(generate_audio_elevenlabs, response)
+#         elif tts_choice == "Beta":
+#             audio_future = executor.submit(generate_audio_parler_tts, response)
+#         # elif tts_choice == "Gamma":
+#         #     audio_future = executor.submit(generate_audio_mars5, response)
+#         for character in response:
+#             history[-1][1] += character
+#             time.sleep(0.05)
+#             yield history, None
+#         audio_path = audio_future.result()
+#         yield history, audio_path
+#     history.append([response, None])
     history.append([response, None])
 phi_custom_template = """
 <|system|>
 You are a helpful assistant who provides clear, organized, crisp and conversational responses about an events,concerts,sports and all other activities of Birmingham,Alabama .<|end|>
         return None
+from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
+from transformers import AutoTokenizer
+from threading import Thread
 def generate_audio_parler_tts(text):
     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
+    chunk_size_in_s = 0.5
+    # Initialize the tokenizer and model
+    parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
+    parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
+    sampling_rate = parler_model.audio_encoder.config.sampling_rate
+    frame_rate = parler_model.audio_encoder.config.frame_rate
+    def generate(text, description, play_steps_in_s=0.5):
+        play_steps = int(frame_rate * play_steps_in_s)
+        streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
+        inputs = parler_tokenizer(description, return_tensors="pt").to(device)
+        prompt = parler_tokenizer(text, return_tensors="pt").to(device)
+        generation_kwargs = dict(
+            input_ids=inputs.input_ids,
+            prompt_input_ids=prompt.input_ids,
+            attention_mask=inputs.attention_mask,
+            prompt_attention_mask=prompt.attention_mask,
+            streamer=streamer,
+            do_sample=True,
+            temperature=1.0,
+            min_new_tokens=10,
+        )
+        thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
+        thread.start()
+        for new_audio in streamer:
+            if new_audio.shape[0] == 0:
+                break
+            yield sampling_rate, new_audio
+    audio_segments = []
+    for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
+        audio_segments.append(audio_chunk)
+    # Combine all the audio chunks into one audio file
+    combined_audio = np.concatenate(audio_segments)
+    combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
+    write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
     logging.debug(f"Audio saved to {combined_audio_path}")
     return combined_audio_path
 def fetch_local_events():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_events&q=Events+in+Birmingham&hl=en&gl=us&api_key={api_key}'
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
         #     events_output = gr.HTML(value=fetch_local_events())
 demo.queue()
+demo.launch(share=True)