shukdevdatta123 commited on
Commit
60c6961
·
verified ·
1 Parent(s): ca5f8e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -21
app.py CHANGED
@@ -73,28 +73,55 @@ def gethtml(url):
73
  r.raise_for_status()
74
  return r.text
75
 
76
- # Function to extract the video download URL
77
  def extract_video_url(youtube_url):
78
- reo = gethtml(youtube_url)
79
- reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
80
- reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
81
- reA = reo.split("\n")
82
- name = reA[len(reA) - 3].split(";")[0] + ";"
83
- addition = """
84
- const jsdom = require("jsdom");
85
- const { JSDOM } = jsdom;
86
- const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
87
- window = dom.window;
88
- document = window.document;
89
- XMLHttpRequest = window.XMLHttpRequest;
90
- """
91
- ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
92
- text = ct.eval(name.split("=")[1].replace(";", ""))
93
- result = re.search('show\((.*?)\);;', text, re.I | re.M).group(0).replace("show(", "").replace(");;", "")
94
- j = json.loads(result)
95
- num = 1
96
- downurl = j["url"][num]["url"]
97
- return downurl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  # Streamlit app layout
100
  st.title("Video and Audio to Text Transcription")
@@ -220,6 +247,7 @@ elif tab == "Audio":
220
  mime="audio/wav"
221
  )
222
 
 
223
  elif tab == "YouTube":
224
  youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
225
 
 
73
  r.raise_for_status()
74
  return r.text
75
 
76
+ # Function to extract the video download URL with better error handling
77
  def extract_video_url(youtube_url):
78
+ try:
79
+ # Get HTML content for the provided URL
80
+ reo = gethtml(youtube_url)
81
+
82
+ # Extract the script containing the video download info
83
+ reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
84
+ reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
85
+ reA = reo.split("\n")
86
+
87
+ # Ensure that the necessary script part is found
88
+ if len(reA) < 3:
89
+ raise ValueError("Could not extract valid script data from the YouTube page.")
90
+
91
+ # Extract the video URL
92
+ name = reA[len(reA) - 3].split(";")[0] + ";"
93
+ addition = """
94
+ const jsdom = require("jsdom");
95
+ const { JSDOM } = jsdom;
96
+ const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
97
+ window = dom.window;
98
+ document = window.document;
99
+ XMLHttpRequest = window.XMLHttpRequest;
100
+ """
101
+ ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
102
+ text = ct.eval(name.split("=")[1].replace(";", ""))
103
+
104
+ # Validate the extraction of the JSON data
105
+ result = re.search('show\((.*?)\);;', text, re.I | re.M)
106
+ if not result:
107
+ raise ValueError("No video download URL found in the script data.")
108
+
109
+ result = result.group(0).replace("show(", "").replace(");;", "")
110
+
111
+ # Parse the result as JSON
112
+ j = json.loads(result)
113
+
114
+ # Ensure that the URL data exists
115
+ if "url" not in j or len(j["url"]) <= 1:
116
+ raise ValueError("No valid download links found for this video.")
117
+
118
+ # Return the download URL for the video
119
+ num = 1 # Get the second URL from the available options
120
+ downurl = j["url"][num]["url"]
121
+ return downurl
122
+
123
+ except Exception as e:
124
+ raise ValueError(f"Error occurred while extracting the download URL: {e}")
125
 
126
  # Streamlit app layout
127
  st.title("Video and Audio to Text Transcription")
 
247
  mime="audio/wav"
248
  )
249
 
250
+ # Streamlit UI for YouTube download
251
  elif tab == "YouTube":
252
  youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
253