IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 7, 2024

Commit

8b59821

verified ·

1 Parent(s): 584eaab

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -18

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ from huggingface_hub import login
 from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 from string import punctuation
 # Check if the token is already set in the environment variables
@@ -317,7 +318,7 @@ def fetch_local_news():
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
     response = requests.get(url)
-    if response.status_code == 200:
         results = response.json().get("news_results", [])
         news_html = """
         <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
@@ -494,21 +495,51 @@ def preprocess(text):
             text = text.replace(abv, separate_abb(abv))
     return text
 def generate_audio_parler_tts(text):
     description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
-    inputs = parler_tokenizer(description, return_tensors="pt").to(device)
-    prompt = parler_tokenizer(preprocess(text), return_tensors="pt").to(device)
-    set_seed(SEED)
-    generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
-    audio_arr = generation.cpu().numpy().squeeze()
-    temp_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_audio.wav")
-    from scipy.io.wavfile import write as write_wav
-    write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
-    logging.debug(f"Audio saved to {temp_audio_path}")
-    return temp_audio_path
 pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
 pipe.to(device)
@@ -553,14 +584,14 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
-            # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
-            # location_output = gr.HTML()
-            # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
-        # with gr.Column():
-        #     weather_output = gr.HTML(value=fetch_local_weather())
-        #     news_output = gr.HTML(value=fetch_local_news())
-        #     events_output = gr.HTML(value=fetch_local_events())
         with gr.Column():
             image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
@@ -572,3 +603,4 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
 demo.queue()
 demo.launch(share=True)

 from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
+from scipy.io.wavfile import write as write_wav
 from string import punctuation
 # Check if the token is already set in the environment variables
     api_key = os.environ['SERP_API']
     url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
     response = requests.get(url)
+    if response.status_code == 200):
         results = response.json().get("news_results", [])
         news_html = """
         <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
             text = text.replace(abv, separate_abb(abv))
     return text
+def chunk_text(text, max_length=250):
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for word in words:
+        if current_length + len(word) + 1 <= max_length:
+            current_chunk.append(word)
+            current_length += len(word) + 1
+        else:
+            chunks.append(' '.join(current_chunk))
+            current_chunk = [word]
+            current_length = len(word) + 1
+    if current_chunk:
+        chunks.append(' '.join(current_chunk))
+    return chunks
 def generate_audio_parler_tts(text):
     description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
+    chunks = chunk_text(preprocess(text))
+    audio_paths = []
+    for chunk in chunks:
+        inputs = parler_tokenizer(description, return_tensors="pt").to(device)
+        prompt = parler_tokenizer(chunk, return_tensors="pt").to(device)
+        set_seed(SEED)
+        generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
+        audio_arr = generation.cpu().numpy().squeeze()
+        temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_{len(audio_paths)}.wav")
+        write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
+        audio_paths.append(temp_audio_path)
+    combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio.wav")
+    with open(combined_audio_path, "wb") as f:
+        for path in audio_paths:
+            with open(path, "rb") as part_f:
+                f.write(part_f.read())
+    logging.debug(f"Audio saved to {combined_audio_path}")
+    return combined_audio_path
 pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
 pipe.to(device)
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
             audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
+            gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
+            location_output = gr.HTML()
+            bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
+        with gr.Column():
+            weather_output = gr.HTML(value=fetch_local_weather())
+            news_output = gr.HTML(value=fetch_local_news())
+            events_output = gr.HTML(value=fetch_local_events())
         with gr.Column():
             image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
 demo.queue()
 demo.launch(share=True)