Spaces:
Sleeping
Sleeping
import streamlit as st | |
import moviepy.editor as mp | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import tempfile | |
import os | |
import io | |
import requests | |
import execjs | |
import re | |
import json | |
# Function to convert video to audio | |
def video_to_audio(video_file): | |
# Load the video using moviepy | |
video = mp.VideoFileClip(video_file) | |
# Extract audio | |
audio = video.audio | |
temp_audio_path = tempfile.mktemp(suffix=".mp3") | |
# Write the audio to a file | |
audio.write_audiofile(temp_audio_path) | |
return temp_audio_path | |
# Function to convert MP3 audio to WAV | |
def convert_mp3_to_wav(mp3_file): | |
# Load the MP3 file using pydub | |
audio = AudioSegment.from_mp3(mp3_file) | |
# Create a temporary WAV file | |
temp_wav_path = tempfile.mktemp(suffix=".wav") | |
# Export the audio to the temporary WAV file | |
audio.export(temp_wav_path, format="wav") | |
return temp_wav_path | |
# Function to transcribe audio to text | |
def transcribe_audio(audio_file): | |
# Initialize recognizer | |
recognizer = sr.Recognizer() | |
# Load the audio file using speech_recognition | |
audio = sr.AudioFile(audio_file) | |
with audio as source: | |
audio_data = recognizer.record(source) | |
try: | |
# Transcribe the audio data to text using Google Web Speech API | |
text = recognizer.recognize_google(audio_data) | |
return text | |
except sr.UnknownValueError: | |
return "Audio could not be understood." | |
except sr.RequestError: | |
return "Could not request results from Google Speech Recognition service." | |
# Function to get the HTML of the page | |
def gethtml(url): | |
headers = { | |
"cache-Control": "no-cache", | |
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", | |
"accept-encoding": "gzip, deflate, br", | |
"accept-language": "en-US,en;q=0.9", | |
"content-type": "application/x-www-form-urlencoded", | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" | |
} | |
kv = {"sf_url": url, | |
"sf_submit": "", | |
"new": "1", | |
"lang": "en", | |
"app": "", | |
"country": "us", | |
"os": "Windows", | |
"browser": "Chrome"} | |
r = requests.post(url="https://en.savefrom.net/savefrom.php", headers=headers, data=kv) | |
r.raise_for_status() | |
return r.text | |
# Function to extract the video download URL | |
def extract_video_url(youtube_url): | |
try: | |
# Get the HTML content of the YouTube page | |
reo = gethtml(youtube_url) | |
# Try extracting the relevant script tag containing download information | |
try: | |
reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0] | |
except IndexError: | |
raise ValueError("Could not find the script containing video data in the HTML response.") | |
# Modify the script to allow extraction | |
reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};") | |
reA = reo.split("\n") | |
if len(reA) < 3: | |
raise ValueError("Could not extract valid script data from the YouTube page.") | |
# Extract the JSON object containing the video download URLs | |
name = reA[len(reA) - 3].split(";")[0] + ";" | |
addition = """ | |
const jsdom = require("jsdom"); | |
const { JSDOM } = jsdom; | |
const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`); | |
window = dom.window; | |
document = window.document; | |
XMLHttpRequest = window.XMLHttpRequest; | |
""" | |
ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules') | |
text = ct.eval(name.split("=")[1].replace(";", "")) | |
# Extract and parse the JSON | |
try: | |
result = re.search('show\((.*?)\);;', text, re.I | re.M) | |
if result is None: | |
raise ValueError("No valid video download URL found in the extracted data.") | |
result = result.group(0).replace("show(", "").replace(");;", "") | |
j = json.loads(result) | |
# Ensure the JSON contains the expected download URLs | |
if "url" not in j or len(j["url"]) == 0: | |
raise ValueError("No valid download links found in the extracted data.") | |
# Assuming the first video URL is what we want (or try a different index if necessary) | |
downurl = j["url"][0]["url"] | |
return downurl | |
except (IndexError, KeyError, json.JSONDecodeError) as e: | |
raise ValueError(f"Error occurred while extracting the download URL: {e}") | |
except Exception as e: | |
raise ValueError(f"Error occurred while extracting the download URL: {e}") | |
# Streamlit app layout | |
st.title("Video and Audio to Text Transcription") | |
st.write("Upload a video or audio file to convert it to transcription, or enter a YouTube URL to download the video.") | |
# Create tabs to separate video, audio, and YouTube download options | |
tab = st.selectbox("Select the type of file to upload or download", ["Video", "Audio", "YouTube"]) | |
if tab == "Video": | |
# File uploader for video | |
uploaded_video = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"]) | |
if uploaded_video is not None: | |
# Save the uploaded video file temporarily | |
with tempfile.NamedTemporaryFile(delete=False) as tmp_video: | |
tmp_video.write(uploaded_video.read()) | |
tmp_video_path = tmp_video.name | |
# Add an "Analyze Video" button | |
if st.button("Analyze Video"): | |
with st.spinner("Processing video... Please wait."): | |
# Convert video to audio | |
audio_file = video_to_audio(tmp_video_path) | |
# Convert the extracted MP3 audio to WAV | |
wav_audio_file = convert_mp3_to_wav(audio_file) | |
# Transcribe audio to text | |
transcription = transcribe_audio(wav_audio_file) | |
# Show the transcription | |
st.text_area("Transcription", transcription, height=300) | |
# Store transcription and audio file in session state | |
st.session_state.transcription = transcription | |
# Store the audio file as a BytesIO object in memory | |
with open(wav_audio_file, "rb") as f: | |
audio_data = f.read() | |
st.session_state.wav_audio_file = io.BytesIO(audio_data) | |
# Cleanup temporary files | |
os.remove(tmp_video_path) | |
os.remove(audio_file) | |
# Check if transcription and audio file are stored in session state | |
if 'transcription' in st.session_state and 'wav_audio_file' in st.session_state: | |
# Provide the audio file to the user for download | |
st.audio(st.session_state.wav_audio_file, format='audio/wav') | |
# Add download buttons for the transcription and audio | |
# Downloadable transcription file | |
st.download_button( | |
label="Download Transcription", | |
data=st.session_state.transcription, | |
file_name="transcription.txt", | |
mime="text/plain" | |
) | |
# Downloadable audio file | |
st.download_button( | |
label="Download Audio", | |
data=st.session_state.wav_audio_file, | |
file_name="converted_audio.wav", | |
mime="audio/wav" | |
) | |
elif tab == "Audio": | |
# File uploader for audio | |
uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"]) | |
if uploaded_audio is not None: | |
# Save the uploaded audio file temporarily | |
with tempfile.NamedTemporaryFile(delete=False) as tmp_audio: | |
tmp_audio.write(uploaded_audio.read()) | |
tmp_audio_path = tmp_audio.name | |
# Add an "Analyze Audio" button | |
if st.button("Analyze Audio"): | |
with st.spinner("Processing audio... Please wait."): | |
# Convert audio to WAV if it's in MP3 format | |
if uploaded_audio.type == "audio/mpeg": | |
wav_audio_file = convert_mp3_to_wav(tmp_audio_path) | |
else: | |
wav_audio_file = tmp_audio_path | |
# Transcribe audio to text | |
transcription = transcribe_audio(wav_audio_file) | |
# Show the transcription | |
st.text_area("Transcription", transcription, height=300) | |
# Store transcription in session state | |
st.session_state.transcription_audio = transcription | |
# Store the audio file as a BytesIO object in memory | |
with open(wav_audio_file, "rb") as f: | |
audio_data = f.read() | |
st.session_state.wav_audio_file_audio = io.BytesIO(audio_data) | |
# Cleanup temporary audio file | |
os.remove(tmp_audio_path) | |
# Check if transcription and audio file are stored in session state | |
if 'transcription_audio' in st.session_state and 'wav_audio_file_audio' in st.session_state: | |
# Provide the audio file to the user for download | |
st.audio(st.session_state.wav_audio_file_audio, format='audio/wav') | |
# Add download buttons for the transcription and audio | |
# Downloadable transcription file | |
st.download_button( | |
label="Download Transcription", | |
data=st.session_state.transcription_audio, | |
file_name="transcription_audio.txt", | |
mime="text/plain" | |
) | |
# Downloadable audio file | |
st.download_button( | |
label="Download Audio", | |
data=st.session_state.wav_audio_file_audio, | |
file_name="converted_audio_audio.wav", | |
mime="audio/wav" | |
) | |
# Streamlit UI for YouTube video download | |
elif tab == "YouTube": | |
st.title("YouTube Video Downloader") | |
st.write("""This app allows you to download YouTube videos in various formats. Simply enter the YouTube URL below and click "Get Download Link".""") | |
youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw") | |
if st.button("Get Download Link"): | |
if youtube_url: | |
try: | |
download_url = extract_video_url(youtube_url) | |
st.success("Download link generated successfully!") | |
st.write("Click below to download the video:") | |
st.markdown(f"[Download Video]({download_url})", unsafe_allow_html=True) | |
except Exception as e: | |
st.error(f"Error occurred: {e}") | |
else: | |
st.error("Please enter a valid YouTube URL.") |