Spaces:
Running
Running
import os | |
import requests | |
import json | |
import gradio as gr | |
import re | |
# Hugging Face νκ²½ λ³μλ‘λΆν° RapidAPI ν€μ νΈμ€νΈ κ°μ Έμ€κΈ° | |
AA_KEY = os.getenv("AA_KEY") | |
AA_HOST = "youtube-transcriptor.p.rapidapi.com" | |
def get_video_id(youtube_url): | |
# λ€μν YouTube URL νμμ λν μ κ·ννμ ν¨ν΄ | |
patterns = [ | |
r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", # μΌλ°μ μΈ YouTube URL λ° Shorts URL | |
r"(?:youtu\.be\/|youtube.com\/shorts\/)([0-9A-Za-z_-]{11})", # λ¨μΆ URL λ° Shorts URL | |
r"^[0-9A-Za-z_-]{11}$" # λΉλμ€ IDλ§ μ λ ₯λ κ²½μ° | |
] | |
# URLμμ 쿼리 νλΌλ―Έν° μ κ±° | |
url_without_params = youtube_url.split('?')[0] | |
for pattern in patterns: | |
match = re.search(pattern, url_without_params) | |
if match: | |
return match.group(1) | |
return None | |
# μλ§ μΈμ΄ μ°μ μμ 리μ€νΈ | |
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh'] | |
# μ νλΈ μλ§μ μμ²νλ ν¨μ (μΈμ΄ μ°μ μμλ₯Ό μ μ©νμ¬ μλ) | |
def get_youtube_transcript(youtube_url): | |
# λΉλμ€ ID μΆμΆ | |
video_id = get_video_id(youtube_url) | |
if video_id is None: | |
return {"error": "μλͺ»λ μ νλΈ URLμ λλ€. λΉλμ€ IDλ₯Ό μ°Ύμ μ μμ΅λλ€."} | |
url = "https://youtube-transcriptor.p.rapidapi.com/transcript" | |
headers = { | |
"x-rapidapi-key": AA_KEY, | |
"x-rapidapi-host": AA_HOST | |
} | |
# μΈμ΄ μ°μ μμμ λ°λΌ μμ°¨μ μΌλ‘ μμ²μ μλ | |
for lang in LANGUAGE_PRIORITY: | |
querystring = {"video_id": video_id, "lang": lang} | |
try: | |
response = requests.get(url, headers=headers, params=querystring) | |
response.raise_for_status() # μ€λ₯ μν μ½λμ λν΄ μμΈ λ°μ | |
data = response.json() | |
# μλ§μ΄ μλ κ²½μ° μ²λ¦¬ | |
if "transcript" not in data or not data["transcript"]: | |
continue | |
return {"language": lang, "data": data} | |
except requests.exceptions.RequestException as e: | |
print(f"Error for language {lang}: {str(e)}") | |
continue | |
except json.JSONDecodeError as e: | |
print(f"JSON Decode Error for language {lang}: {str(e)}") | |
continue | |
# λͺ¨λ μΈμ΄μμ μλ§μ μ°Ύμ§ λͺ»ν κ²½μ° | |
return {"error": "μ°μ μμ μΈμ΄λ‘ μλ§μ μ°Ύμ μ μμ΅λλ€. ν΄λΉ λμμμ μλ§μ΄ μκ±°λ μ κ·Όν μ μμ΅λλ€."} | |
# Gradio μΈν°νμ΄μ€ μ μ | |
def youtube_transcript_interface(youtube_url): | |
# μλ§ λ°μ΄ν° κ°μ Έμ€κΈ° | |
transcript_data = get_youtube_transcript(youtube_url) | |
# κ²°κ³Ό μΆλ ₯ | |
return json.dumps(transcript_data, ensure_ascii=False, indent=2) | |
# Gradio μΈν°νμ΄μ€ μμ± | |
interface = gr.Interface( | |
fn=youtube_transcript_interface, | |
inputs="text", | |
outputs="text", | |
title="YouTube μλ§ μΆμΆκΈ°", | |
description="μ νλΈ URLμ μ λ ₯νμΈμ." | |
) | |
# Gradio μΈν°νμ΄μ€ μ€ν | |
interface.launch() |