pm6six commited on
Commit
8990976
·
verified ·
1 Parent(s): 2290b1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -27
app.py CHANGED
@@ -1,38 +1,89 @@
1
  import streamlit as st
2
- from io import BytesIO
3
- from urllib.request import urlopen
4
- import librosa
5
- from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
6
- import pyttsx3 # For text-to-speech
7
-
8
- # Load Qwen2Audio model and processor
9
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
10
- model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
11
- tts_engine = pyttsx3.init()
12
 
13
  # Streamlit app UI
14
  st.title("Text-to-Audio App")
15
- st.text("This app generates audio from text input using Hugging Face models.")
16
 
17
  # User input
18
- text_input = st.text_area("Enter some text for the model:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  if st.button("Generate Audio"):
20
- conversation = [{"role": "user", "content": text_input}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Preprocess conversation
23
- text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
24
- inputs = processor(text=text, return_tensors="pt", padding=True)
25
- inputs.input_ids = inputs.input_ids.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Generate response
28
- generate_ids = model.generate(**inputs, max_length=256)
29
- generate_ids = generate_ids[:, inputs.input_ids.size(1):]
 
 
 
 
 
 
30
 
31
- # Decode response
32
- response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
33
- st.text(f"Model Response: {response}")
34
 
35
- # Convert response to speech
36
- tts_engine.say(response)
37
- tts_engine.runAndWait()
38
- st.success("Audio generated and played!")
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from gtts import gTTS
3
+ import os
4
+ import base64
5
+ import PyPDF2
 
 
 
 
 
 
6
 
7
  # Streamlit app UI
8
  st.title("Text-to-Audio App")
9
+ st.text("This app converts your text input or PDF content into audio using TTS.")
10
 
11
  # User input
12
+ text_input = st.text_area("Enter some text:")
13
+
14
+ # PDF file upload
15
+ uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
16
+ if uploaded_file is not None:
17
+ try:
18
+ # Read PDF file
19
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
20
+ extracted_text = ""
21
+ for page in pdf_reader.pages:
22
+ extracted_text += page.extract_text()
23
+
24
+ if extracted_text.strip():
25
+ text_input = extracted_text
26
+ st.success("Text extracted from the uploaded PDF!")
27
+ st.text_area("Extracted Text:", text_input, height=200)
28
+ else:
29
+ st.error("No extractable text found in the uploaded PDF.")
30
+ except Exception as e:
31
+ st.error(f"An error occurred while reading the PDF: {e}")
32
+
33
  if st.button("Generate Audio"):
34
+ if not text_input.strip():
35
+ st.error("Please enter some text or upload a PDF with extractable text!")
36
+ else:
37
+ try:
38
+ # Generate speech using gTTS
39
+ tts = gTTS(text=text_input, lang="en")
40
+ audio_file = "output.wav"
41
+ tts.save(audio_file)
42
+
43
+ # Check if file exists
44
+ if os.path.exists(audio_file):
45
+ # Encode audio file to base64
46
+ with open(audio_file, "rb") as f:
47
+ audio_data = f.read()
48
+ audio_base64 = base64.b64encode(audio_data).decode()
49
 
50
+ # Embed custom HTML audio player with speed adjustment
51
+ audio_html = f"""
52
+ <audio id="audio" controls style="width: 100%; margin-top: 10px;">
53
+ <source src="data:audio/wav;base64,{audio_base64}" type="audio/wav">
54
+ Your browser does not support the audio element.
55
+ </audio>
56
+ <div style="margin-top: 10px;">
57
+ <label for="speed" style="font-weight: bold;">Playback Speed:</label>
58
+ <input type="range" id="speed" min="0.5" max="2.0" value="1.0" step="0.1" style="width: 50%; margin-left: 10px;">
59
+ <span id="speed-value">1.0x</span>
60
+ </div>
61
+ <script>
62
+ const audio = document.getElementById("audio");
63
+ const speedSlider = document.getElementById("speed");
64
+ const speedValue = document.getElementById("speed-value");
65
 
66
+ // Update playback speed dynamically
67
+ speedSlider.addEventListener("input", () => {{
68
+ const speed = parseFloat(speedSlider.value);
69
+ audio.playbackRate = speed;
70
+ speedValue.textContent = speed.toFixed(1) + "x";
71
+ }});
72
+ </script>
73
+ """
74
+ st.components.v1.html(audio_html, height=200)
75
 
76
+ st.success("Audio generated successfully!")
 
 
77
 
78
+ # Provide download option
79
+ with open(audio_file, "rb") as f:
80
+ st.download_button(
81
+ label="Download Audio",
82
+ data=f.read(),
83
+ file_name="output.wav",
84
+ mime="audio/wav",
85
+ )
86
+ else:
87
+ st.error("Audio file could not be generated.")
88
+ except Exception as e:
89
+ st.error(f"An error occurred: {e}")