Spaces:

shukdevdatta123
/

Video-Transcriber

Sleeping

App Files Files Community

shukdevdatta123 commited on Feb 8

Commit

469c3a9

verified ·

1 Parent(s): 47b5817

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -69

app.py CHANGED Viewed

@@ -55,79 +55,25 @@ def transcribe_audio(audio_file):
     except sr.RequestError:
         return "Could not request results from Google Speech Recognition service."
-# Function to get the HTML of the page
-def gethtml(url):
-    headers = {
-        "cache-Control": "no-cache",
-        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
-        "accept-encoding": "gzip, deflate, br",
-        "accept-language": "en-US,en;q=0.9",
-        "content-type": "application/x-www-form-urlencoded",
-        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
-    }
-    kv = {"sf_url": url,
-          "sf_submit": "",
-          "new": "1",
-          "lang": "en",
-          "app": "",
-          "country": "us",
-          "os": "Windows",
-          "browser": "Chrome"}
-    r = requests.post(url="https://en.savefrom.net/savefrom.php", headers=headers, data=kv)
-    r.raise_for_status()
-    return r.text
-# Function to extract the video download URL
 def extract_video_url(youtube_url):
     try:
-        # Get the HTML content of the YouTube page
-        reo = gethtml(youtube_url)
-        # Try extracting the relevant script tag containing download information
-        try:
-            reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
-        except IndexError:
-            raise ValueError("Could not find the script containing video data in the HTML response.")
-        # Modify the script to allow extraction
-        reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
-        reA = reo.split("\n")
-        if len(reA) < 3:
-            raise ValueError("Could not extract valid script data from the YouTube page.")
-        # Extract the JSON object containing the video download URLs
-        name = reA[len(reA) - 3].split(";")[0] + ";"
-        addition = """
-        const jsdom = require("jsdom");
-        const { JSDOM } = jsdom;
-        const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
-        window = dom.window;
-        document = window.document;
-        XMLHttpRequest = window.XMLHttpRequest;
-        """
-        ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
-        text = ct.eval(name.split("=")[1].replace(";", ""))
-        # Extract and parse the JSON
-        try:
-            result = re.search('show\((.*?)\);;', text, re.I | re.M)
-            if result is None:
-                raise ValueError("No valid video download URL found in the extracted data.")
-            result = result.group(0).replace("show(", "").replace(");;", "")
-            j = json.loads(result)
-            # Ensure the JSON contains the expected download URLs
-            if "url" not in j or len(j["url"]) == 0:
-                raise ValueError("No valid download links found in the extracted data.")
-            # Assuming the first video URL is what we want (or try a different index if necessary)
-            downurl = j["url"][0]["url"]
-            return downurl
-        except (IndexError, KeyError, json.JSONDecodeError) as e:
-            raise ValueError(f"Error occurred while extracting the download URL: {e}")
     except Exception as e:
         raise ValueError(f"Error occurred while extracting the download URL: {e}")

     except sr.RequestError:
         return "Could not request results from Google Speech Recognition service."
+# Function to extract video download URL using yt-dlp
 def extract_video_url(youtube_url):
     try:
+        # Set up yt-dlp options
+        ydl_opts = {
+            'format': 'bestaudio/best',  # Get best audio or best video
+            'quiet': True,  # Suppress output
+        }
+        # Create a yt-dlp object with the provided options
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            # Extract the video information
+            info_dict = ydl.extract_info(youtube_url, download=False)
+            # Check if the video has an 'url' field and return it
+            if 'url' in info_dict:
+                return info_dict['url']
+            else:
+                raise ValueError("Unable to find download URL for the video.")
     except Exception as e:
         raise ValueError(f"Error occurred while extracting the download URL: {e}")