Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -73,28 +73,55 @@ def gethtml(url):
|
|
73 |
r.raise_for_status()
|
74 |
return r.text
|
75 |
|
76 |
-
# Function to extract the video download URL
|
77 |
def extract_video_url(youtube_url):
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
# Streamlit app layout
|
100 |
st.title("Video and Audio to Text Transcription")
|
@@ -220,6 +247,7 @@ elif tab == "Audio":
|
|
220 |
mime="audio/wav"
|
221 |
)
|
222 |
|
|
|
223 |
elif tab == "YouTube":
|
224 |
youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
|
225 |
|
|
|
73 |
r.raise_for_status()
|
74 |
return r.text
|
75 |
|
76 |
+
# Function to extract the video download URL with better error handling
|
77 |
def extract_video_url(youtube_url):
|
78 |
+
try:
|
79 |
+
# Get HTML content for the provided URL
|
80 |
+
reo = gethtml(youtube_url)
|
81 |
+
|
82 |
+
# Extract the script containing the video download info
|
83 |
+
reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
|
84 |
+
reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
|
85 |
+
reA = reo.split("\n")
|
86 |
+
|
87 |
+
# Ensure that the necessary script part is found
|
88 |
+
if len(reA) < 3:
|
89 |
+
raise ValueError("Could not extract valid script data from the YouTube page.")
|
90 |
+
|
91 |
+
# Extract the video URL
|
92 |
+
name = reA[len(reA) - 3].split(";")[0] + ";"
|
93 |
+
addition = """
|
94 |
+
const jsdom = require("jsdom");
|
95 |
+
const { JSDOM } = jsdom;
|
96 |
+
const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
|
97 |
+
window = dom.window;
|
98 |
+
document = window.document;
|
99 |
+
XMLHttpRequest = window.XMLHttpRequest;
|
100 |
+
"""
|
101 |
+
ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
|
102 |
+
text = ct.eval(name.split("=")[1].replace(";", ""))
|
103 |
+
|
104 |
+
# Validate the extraction of the JSON data
|
105 |
+
result = re.search('show\((.*?)\);;', text, re.I | re.M)
|
106 |
+
if not result:
|
107 |
+
raise ValueError("No video download URL found in the script data.")
|
108 |
+
|
109 |
+
result = result.group(0).replace("show(", "").replace(");;", "")
|
110 |
+
|
111 |
+
# Parse the result as JSON
|
112 |
+
j = json.loads(result)
|
113 |
+
|
114 |
+
# Ensure that the URL data exists
|
115 |
+
if "url" not in j or len(j["url"]) <= 1:
|
116 |
+
raise ValueError("No valid download links found for this video.")
|
117 |
+
|
118 |
+
# Return the download URL for the video
|
119 |
+
num = 1 # Get the second URL from the available options
|
120 |
+
downurl = j["url"][num]["url"]
|
121 |
+
return downurl
|
122 |
+
|
123 |
+
except Exception as e:
|
124 |
+
raise ValueError(f"Error occurred while extracting the download URL: {e}")
|
125 |
|
126 |
# Streamlit app layout
|
127 |
st.title("Video and Audio to Text Transcription")
|
|
|
247 |
mime="audio/wav"
|
248 |
)
|
249 |
|
250 |
+
# Streamlit UI for YouTube download
|
251 |
elif tab == "YouTube":
|
252 |
youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
|
253 |
|