YT_Script_Shorts

Paused

App Files Files

xet

Community

YT_Script_Shorts / app.py

AIRider

Update app.py

4a45930 verified about 1 year ago

raw

history blame

6.06 kB

	import gradio as gr
	from gradio_client import Client
	import json
	import logging
	import ast
	import openai
	import os
	import random
	import re

	# 로깅 설정
	logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(message)s')

	def parse_api_response(response):
	try:
	if isinstance(response, str):
	response = ast.literal_eval(response)
	if isinstance(response, list) and len(response) > 0:
	response = response[0]
	if not isinstance(response, dict):
	raise ValueError(f"예상치 못한 응답 형식입니다. 받은 데이터 타입: {type(response)}")
	return response
	except Exception as e:
	raise ValueError(f"API 응답 파싱 실패: {str(e)}")

	# 문장 구분 함수 (한국어)
	def split_sentences(text):
	sentences = re.split(r"(니다\|에요\|구나\|해요\|군요\|겠어요\|시오\|해라\|예요\|아요\|데요\|대요\|세요\|어요\|게요\|구요\|고요\|나요\|하죠)(?![\w])", text)
	combined_sentences = []
	current_sentence = ""
	for i in range(0, len(sentences), 2):
	if i + 1 < len(sentences):
	sentence = sentences[i] + sentences[i + 1]
	else:
	sentence = sentences[i]
	if len(current_sentence) + len(sentence) > 100: # 100자를 초과할 경우
	combined_sentences.append(current_sentence.strip())
	current_sentence = sentence.strip()
	else:
	current_sentence += sentence
	if sentence.endswith(('.', '?', '!')):
	combined_sentences.append(current_sentence.strip())
	current_sentence = ""
	if current_sentence:
	combined_sentences.append(current_sentence.strip())
	return combined_sentences

	def get_youtube_script(url):
	logging.info(f"스크립트 추출 시작: URL = {url}")

	# 엔드포인트를 새로운 것으로 변경
	client = Client("whispersound/YT_Ts_R")

	try:
	logging.debug("API 호출 시작")
	result = client.predict(youtube_url=url, api_name="/predict")
	logging.debug("API 호출 완료")

	# 응답 파싱
	parsed_result = parse_api_response(result)

	title = parsed_result["data"][0]["title"]
	transcription_text = parsed_result["data"][0]["transcriptionAsText"]

	logging.info("스크립트 추출 완료")
	return title, transcription_text

	except Exception as e:
	error_msg = f"스크립트 추출 중 오류 발생: {str(e)}"
	logging.exception(error_msg)
	return "", ""

	# OpenAI API 키 설정
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# LLM API 호출 함수
	def call_api(prompt, max_tokens, temperature, top_p):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini", # 모델을 gpt-4o-mini로 변경
	messages=[{"role": "user", "content": prompt}],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	logging.exception("LLM API 호출 중 오류 발생")
	return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."

	# 텍스트 요약 함수
	def summarize_text(text):
	prompt = text # 프롬프트를 원본 텍스트로 설정하여 self-discover 가능하도록 함

	try:
	return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
	except Exception as e:
	logging.exception("요약 생성 중 오류 발생")
	return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."

	# Gradio 인터페이스 설정
	with gr.Blocks() as demo:
	gr.Markdown("## YouTube 스크립트 추출 및 요약 도구")

	youtube_url_input = gr.Textbox(label="YouTube URL 입력")
	analyze_button = gr.Button("분석하기")
	script_output = gr.HTML(label="스크립트")
	summary_output = gr.HTML(label="요약")

	# 캐시를 위한 상태 변수
	cached_data = gr.State({"url": "", "title": "", "script": ""})

	def extract_and_cache(url, cache):
	if url == cache["url"]:
	return cache["title"], cache["script"], cache

	title, script = get_youtube_script(url)
	new_cache = {"url": url, "title": title, "script": script}
	return title, script, new_cache

	def display_script(title, script):
	formatted_script = "\n".join(split_sentences(script))
	script_html = f"""<h2 style='font-size:24px;'>{title}</h2>
	<details>
	<summary><h3>원문 스크립트 (클릭하여 펼치기)</h3></summary>
	<div style="white-space: pre-wrap;">{formatted_script}</div>
	</details>"""
	return script_html

	def generate_summary(script):
	summary = summarize_text(script)
	# 요약 결과를 잘 표시하기 위해 div 태그와 CSS 스타일 적용
	summary_html = f"""
	<h3>요약:</h3>
	<div style="white-space: pre-wrap; max-height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px;">
	{summary}
	</div>
	"""
	return summary_html

	def analyze(url, cache):
	title, script, new_cache = extract_and_cache(url, cache)
	script_html = display_script(title, script)
	return script_html, new_cache

	def update_summary(cache):
	if not cache["script"]:
	return "스크립트가 없습니다. 먼저 YouTube URL을 입력하고 분석을 실행해주세요."
	return generate_summary(cache["script"])

	# 버튼 클릭 시 스크립트 추출
	analyze_button.click(
	analyze,
	inputs=[youtube_url_input, cached_data],
	outputs=[script_output, cached_data]
	).then(
	update_summary,
	inputs=[cached_data],
	outputs=summary_output
	)

	# 인터페이스 실행
	demo.launch(share=True)