shukdevdatta123 commited on
Commit
469c3a9
·
verified ·
1 Parent(s): 47b5817

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -69
app.py CHANGED
@@ -55,79 +55,25 @@ def transcribe_audio(audio_file):
55
  except sr.RequestError:
56
  return "Could not request results from Google Speech Recognition service."
57
 
58
- # Function to get the HTML of the page
59
- def gethtml(url):
60
- headers = {
61
- "cache-Control": "no-cache",
62
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
63
- "accept-encoding": "gzip, deflate, br",
64
- "accept-language": "en-US,en;q=0.9",
65
- "content-type": "application/x-www-form-urlencoded",
66
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
67
- }
68
- kv = {"sf_url": url,
69
- "sf_submit": "",
70
- "new": "1",
71
- "lang": "en",
72
- "app": "",
73
- "country": "us",
74
- "os": "Windows",
75
- "browser": "Chrome"}
76
- r = requests.post(url="https://en.savefrom.net/savefrom.php", headers=headers, data=kv)
77
- r.raise_for_status()
78
- return r.text
79
-
80
- # Function to extract the video download URL
81
  def extract_video_url(youtube_url):
82
  try:
83
- # Get the HTML content of the YouTube page
84
- reo = gethtml(youtube_url)
85
-
86
- # Try extracting the relevant script tag containing download information
87
- try:
88
- reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
89
- except IndexError:
90
- raise ValueError("Could not find the script containing video data in the HTML response.")
91
-
92
- # Modify the script to allow extraction
93
- reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
94
- reA = reo.split("\n")
95
-
96
- if len(reA) < 3:
97
- raise ValueError("Could not extract valid script data from the YouTube page.")
98
 
99
- # Extract the JSON object containing the video download URLs
100
- name = reA[len(reA) - 3].split(";")[0] + ";"
101
- addition = """
102
- const jsdom = require("jsdom");
103
- const { JSDOM } = jsdom;
104
- const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
105
- window = dom.window;
106
- document = window.document;
107
- XMLHttpRequest = window.XMLHttpRequest;
108
- """
109
- ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
110
- text = ct.eval(name.split("=")[1].replace(";", ""))
111
-
112
- # Extract and parse the JSON
113
- try:
114
- result = re.search('show\((.*?)\);;', text, re.I | re.M)
115
- if result is None:
116
- raise ValueError("No valid video download URL found in the extracted data.")
117
-
118
- result = result.group(0).replace("show(", "").replace(");;", "")
119
- j = json.loads(result)
120
 
121
- # Ensure the JSON contains the expected download URLs
122
- if "url" not in j or len(j["url"]) == 0:
123
- raise ValueError("No valid download links found in the extracted data.")
124
-
125
- # Assuming the first video URL is what we want (or try a different index if necessary)
126
- downurl = j["url"][0]["url"]
127
- return downurl
128
-
129
- except (IndexError, KeyError, json.JSONDecodeError) as e:
130
- raise ValueError(f"Error occurred while extracting the download URL: {e}")
131
 
132
  except Exception as e:
133
  raise ValueError(f"Error occurred while extracting the download URL: {e}")
 
55
  except sr.RequestError:
56
  return "Could not request results from Google Speech Recognition service."
57
 
58
+ # Function to extract video download URL using yt-dlp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def extract_video_url(youtube_url):
60
  try:
61
+ # Set up yt-dlp options
62
+ ydl_opts = {
63
+ 'format': 'bestaudio/best', # Get best audio or best video
64
+ 'quiet': True, # Suppress output
65
+ }
 
 
 
 
 
 
 
 
 
 
66
 
67
+ # Create a yt-dlp object with the provided options
68
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
+ # Extract the video information
70
+ info_dict = ydl.extract_info(youtube_url, download=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Check if the video has an 'url' field and return it
73
+ if 'url' in info_dict:
74
+ return info_dict['url']
75
+ else:
76
+ raise ValueError("Unable to find download URL for the video.")
 
 
 
 
 
77
 
78
  except Exception as e:
79
  raise ValueError(f"Error occurred while extracting the download URL: {e}")