Spaces:
Running
Running
File size: 3,080 Bytes
073d3e8 5870494 876af8e e95db1b 5870494 c8e948c f44c7d6 c8e948c f44c7d6 c8e948c 5870494 0fbd8f6 5870494 f3279ca 5870494 64cd965 5870494 64cd965 5870494 64cd965 5870494 0fbd8f6 5870494 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
import requests
import json
import gradio as gr
import re
# Hugging Face νκ²½ λ³μλ‘λΆν° RapidAPI ν€μ νΈμ€νΈ κ°μ Έμ€κΈ°
AA_KEY = os.getenv("AA_KEY")
AA_HOST = "youtube-transcriptor.p.rapidapi.com"
def get_video_id(youtube_url):
# λ€μν YouTube URL νμμ λν μ κ·ννμ ν¨ν΄
patterns = [
r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", # μΌλ°μ μΈ YouTube URL λ° Shorts URL
r"(?:youtu\.be\/|youtube.com\/shorts\/)([0-9A-Za-z_-]{11})", # λ¨μΆ URL λ° Shorts URL
r"^[0-9A-Za-z_-]{11}$" # λΉλμ€ IDλ§ μ
λ ₯λ κ²½μ°
]
# URLμμ 쿼리 νλΌλ―Έν° μ κ±°
url_without_params = youtube_url.split('?')[0]
for pattern in patterns:
match = re.search(pattern, url_without_params)
if match:
return match.group(1)
return None
# μλ§ μΈμ΄ μ°μ μμ 리μ€νΈ
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
# μ νλΈ μλ§μ μμ²νλ ν¨μ (μΈμ΄ μ°μ μμλ₯Ό μ μ©νμ¬ μλ)
def get_youtube_transcript(youtube_url):
# λΉλμ€ ID μΆμΆ
video_id = get_video_id(youtube_url)
if video_id is None:
return {"error": "μλͺ»λ μ νλΈ URLμ
λλ€. λΉλμ€ IDλ₯Ό μ°Ύμ μ μμ΅λλ€."}
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
headers = {
"x-rapidapi-key": AA_KEY,
"x-rapidapi-host": AA_HOST
}
# μΈμ΄ μ°μ μμμ λ°λΌ μμ°¨μ μΌλ‘ μμ²μ μλ
for lang in LANGUAGE_PRIORITY:
querystring = {"video_id": video_id, "lang": lang}
try:
response = requests.get(url, headers=headers, params=querystring)
response.raise_for_status() # μ€λ₯ μν μ½λμ λν΄ μμΈ λ°μ
data = response.json()
# μλ§μ΄ μλ κ²½μ° μ²λ¦¬
if "transcript" not in data or not data["transcript"]:
continue
return {"language": lang, "data": data}
except requests.exceptions.RequestException as e:
print(f"Error for language {lang}: {str(e)}")
continue
except json.JSONDecodeError as e:
print(f"JSON Decode Error for language {lang}: {str(e)}")
continue
# λͺ¨λ μΈμ΄μμ μλ§μ μ°Ύμ§ λͺ»ν κ²½μ°
return {"error": "μ°μ μμ μΈμ΄λ‘ μλ§μ μ°Ύμ μ μμ΅λλ€. ν΄λΉ λμμμ μλ§μ΄ μκ±°λ μ κ·Όν μ μμ΅λλ€."}
# Gradio μΈν°νμ΄μ€ μ μ
def youtube_transcript_interface(youtube_url):
# μλ§ λ°μ΄ν° κ°μ Έμ€κΈ°
transcript_data = get_youtube_transcript(youtube_url)
# κ²°κ³Ό μΆλ ₯
return json.dumps(transcript_data, ensure_ascii=False, indent=2)
# Gradio μΈν°νμ΄μ€ μμ±
interface = gr.Interface(
fn=youtube_transcript_interface,
inputs="text",
outputs="text",
title="YouTube μλ§ μΆμΆκΈ°",
description="μ νλΈ URLμ μ
λ ₯νμΈμ."
)
# Gradio μΈν°νμ΄μ€ μ€ν
interface.launch() |