Spaces:

shukdevdatta123
/

Video-Transcriber

Running

App Files Files Community

shukdevdatta123 commited on Feb 8

Commit

60c6961

verified ·

1 Parent(s): ca5f8e8

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -21

app.py CHANGED Viewed

@@ -73,28 +73,55 @@ def gethtml(url):
     r.raise_for_status()
     return r.text
-# Function to extract the video download URL
 def extract_video_url(youtube_url):
-    reo = gethtml(youtube_url)
-    reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
-    reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
-    reA = reo.split("\n")
-    name = reA[len(reA) - 3].split(";")[0] + ";"
-    addition = """
-    const jsdom = require("jsdom");
-    const { JSDOM } = jsdom;
-    const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
-    window = dom.window;
-    document = window.document;
-    XMLHttpRequest = window.XMLHttpRequest;
-    """
-    ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
-    text = ct.eval(name.split("=")[1].replace(";", ""))
-    result = re.search('show\((.*?)\);;', text, re.I | re.M).group(0).replace("show(", "").replace(");;", "")
-    j = json.loads(result)
-    num = 1
-    downurl = j["url"][num]["url"]
-    return downurl
 # Streamlit app layout
 st.title("Video and Audio to Text Transcription")
@@ -220,6 +247,7 @@ elif tab == "Audio":
                 mime="audio/wav"
             )
 elif tab == "YouTube":
     youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")

     r.raise_for_status()
     return r.text
+# Function to extract the video download URL with better error handling
 def extract_video_url(youtube_url):
+    try:
+        # Get HTML content for the provided URL
+        reo = gethtml(youtube_url)
+        # Extract the script containing the video download info
+        reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
+        reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
+        reA = reo.split("\n")
+        # Ensure that the necessary script part is found
+        if len(reA) < 3:
+            raise ValueError("Could not extract valid script data from the YouTube page.")
+        # Extract the video URL
+        name = reA[len(reA) - 3].split(";")[0] + ";"
+        addition = """
+        const jsdom = require("jsdom");
+        const { JSDOM } = jsdom;
+        const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
+        window = dom.window;
+        document = window.document;
+        XMLHttpRequest = window.XMLHttpRequest;
+        """
+        ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
+        text = ct.eval(name.split("=")[1].replace(";", ""))
+        # Validate the extraction of the JSON data
+        result = re.search('show\((.*?)\);;', text, re.I | re.M)
+        if not result:
+            raise ValueError("No video download URL found in the script data.")
+        result = result.group(0).replace("show(", "").replace(");;", "")
+        # Parse the result as JSON
+        j = json.loads(result)
+        # Ensure that the URL data exists
+        if "url" not in j or len(j["url"]) <= 1:
+            raise ValueError("No valid download links found for this video.")
+        # Return the download URL for the video
+        num = 1  # Get the second URL from the available options
+        downurl = j["url"][num]["url"]
+        return downurl
+    except Exception as e:
+        raise ValueError(f"Error occurred while extracting the download URL: {e}")
 # Streamlit app layout
 st.title("Video and Audio to Text Transcription")
                 mime="audio/wav"
             )
+# Streamlit UI for YouTube download
 elif tab == "YouTube":
     youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")