YT_Script_Shorts

Paused

App Files Files Community

YT_Script_Shorts / app.py

AIRider

Update app.py

bb03802 verified about 1 year ago

raw

history blame

8.2 kB

	import gradio as gr
	from gradio_client import Client
	import json
	import logging
	import ast
	import openai
	import os
	import random
	import re

	logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(message)s')

	openai.api_key = os.getenv("OPENAI_API_KEY")

	def parse_api_response(response):
	try:
	if isinstance(response, str):
	response = ast.literal_eval(response)
	if isinstance(response, list) and len(response) > 0:
	response = response[0]
	if not isinstance(response, dict):
	raise ValueError(f"예상치 못한 응답 형식입니다. 받은 데이터 타입: {type(response)}")
	return response
	except Exception as e:
	raise ValueError(f"API 응답 파싱 실패: {str(e)}")

	def split_sentences(text):
	sentences = re.split(r"(니다\|에요\|구나\|해요\|군요\|겠어요\|시오\|해라\|예요\|아요\|데요\|대요\|세요\|어요\|게요\|구요\|고요\|나요\|하죠)(?![\w])", text)
	combined_sentences = []
	current_sentence = ""
	for i in range(0, len(sentences), 2):
	if i + 1 < len(sentences):
	sentence = sentences[i] + sentences[i + 1]
	else:
	sentence = sentences[i]
	if len(current_sentence) + len(sentence) > 100:
	combined_sentences.append(current_sentence.strip())
	current_sentence = sentence.strip()
	else:
	current_sentence += sentence
	if sentence.endswith(('.', '?', '!')):
	combined_sentences.append(current_sentence.strip())
	current_sentence = ""
	if current_sentence:
	combined_sentences.append(current_sentence.strip())
	return combined_sentences

	def get_youtube_script(url):
	logging.info(f"스크립트 추출 시작: URL = {url}")

	client = Client("whispersound/YT_Ts_R")

	try:
	logging.debug("API 호출 시작")
	result = client.predict(youtube_url=url, api_name="/predict")
	logging.debug("API 호출 완료")

	parsed_result = parse_api_response(result)

	title = parsed_result["data"][0]["title"]
	transcription_text = parsed_result["data"][0]["transcriptionAsText"]
	sections = parsed_result["data"][0]["sections"]

	logging.info("스크립트 추출 완료")
	return title, transcription_text, sections

	except Exception as e:
	error_msg = f"스크립트 추출 중 오류 발생: {str(e)}"
	logging.exception(error_msg)
	return "", "", []

	def call_api(prompt, max_tokens, temperature, top_p):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	logging.exception("LLM API 호출 중 오류 발생")
	return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."

	def summarize_section(section_text):
	prompt = f"""
	다음 유튜브 대본 섹션의 핵심 내용을 간결하게 요약하세요:
	1. 한글로 작성하세요.
	2. 주요 논점과 중요한 세부사항을 포함하세요.
	3. 요약은 2-3문장으로 제한하세요.

	섹션 내용:
	{section_text}
	"""
	return call_api(prompt, max_tokens=150, temperature=0.3, top_p=0.9)

	def format_time(seconds):
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"

	def generate_timeline_summary(sections):
	timeline_summary = ""
	for i, section in enumerate(sections, 1):
	start_time = format_time(section['start_time'])
	summary = summarize_section(section['text'])
	timeline_summary += f"{start_time} {i}. {summary}\n\n"
	return timeline_summary

	def summarize_text(text):
	prompt = f"""
	1. 다음 주어지는 유튜브 대본의 핵심 주제와 모든 주요 내용을 상세하게 요약하라
	2. 반드시 한글로 작성하라
	3. 요약문만으로도 영상을 직접 시청한 것과 동일한 수준으로 내용을 이해할 수 있도록 상세히 작성
	4. 글을 너무 압축하거나 함축하지 말고, 중요한 내용과 세부사항을 모두 포함
	5. 반드시 대본의 흐름과 논리 구조를 유지
	6. 반드시 시간 순서나 사건의 전개 과정을 명확하게 반영
	7. 등장인물, 장소, 사건 등 중요한 요소를 정확하게 작성
	8. 대본에서 전달하는 감정이나 분위기도 포함
	9. 반드시 기술적 용어나 전문 용어가 있을 경우, 이를 정확하게 사용
	10. 대본의 목적이나 의도를 파악하고, 이를 요약에 반드시 반영
	11. 전체글을 보고

	---

	이 프롬프트가 도움이 되시길 바랍니다.
	\n\n
	{text}"""

	try:
	return call_api(prompt, max_tokens=10000, temperature=0.3, top_p=0.9)
	except Exception as e:
	logging.exception("요약 생성 중 오류 발생")
	return "요약을 생성하는 동안 오류가 발생했습니다. 나중에 다시 시도해 주세요."

	with gr.Blocks() as demo:
	gr.Markdown("## YouTube 스크립트 추출 및 요약 도구")

	youtube_url_input = gr.Textbox(label="YouTube URL 입력")
	analyze_button = gr.Button("분석하기")
	script_output = gr.HTML(label="스크립트")
	timeline_output = gr.HTML(label="타임라인 요약")
	summary_output = gr.HTML(label="전체 요약")

	cached_data = gr.State({"url": "", "title": "", "script": "", "sections": []})

	def extract_and_cache(url, cache):
	if url == cache["url"]:
	return cache["title"], cache["script"], cache["sections"], cache

	title, script, sections = get_youtube_script(url)
	new_cache = {"url": url, "title": title, "script": script, "sections": sections}
	return title, script, sections, new_cache

	def display_script(title, script):
	if not script:
	return "<p>스크립트를 추출하지 못했습니다. URL을 확인하고 다시 시도해 주세요.</p>"
	formatted_script = "\n".join(split_sentences(script))
	script_html = f"""<h2 style='font-size:24px;'>{title}</h2>
	<details>
	<summary><h3>원문 스크립트 (클릭하여 펼치기)</h3></summary>
	<div style="white-space: pre-wrap;">{formatted_script}</div>
	</details>"""
	return script_html

	def display_timeline(sections):
	if not sections:
	return "<p>타임라인을 생성하지 못했습니다. 스크립트 추출에 실패했을 수 있습니다.</p>"
	timeline_summary = generate_timeline_summary(sections)
	timeline_html = f"""
	<h3>타임라인 요약:</h3>
	<div style="white-space: pre-wrap; max-height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px;">
	{timeline_summary}
	</div>
	"""
	return timeline_html

	def generate_summary(script):
	if not script:
	return "<p>전체 요약을 생성하지 못했습니다. 스크립트 추출에 실패했을 수 있습니다.</p>"
	summary = summarize_text(script)
	summary_html = f"""
	<h3>전체 요약:</h3>
	<div style="white-space: pre-wrap; max-height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px;">
	{summary}
	</div>
	"""
	return summary_html

	def analyze(url, cache):
	title, script, sections, new_cache = extract_and_cache(url, cache)
	script_html = display_script(title, script)
	timeline_html = display_timeline(sections)
	summary_html = generate_summary(script)
	return script_html, timeline_html, summary_html, new_cache

	analyze_button.click(
	analyze,
	inputs=[youtube_url_input, cached_data],
	outputs=[script_output, timeline_output, summary_output, cached_data]
	)

	demo.launch(share=True)