AIRider's picture
Update app.py
4a45930 verified
raw
history blame
6.06 kB
import gradio as gr
from gradio_client import Client
import json
import logging
import ast
import openai
import os
import random
import re
# λ‘œκΉ… μ„€μ •
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s')
def parse_api_response(response):
try:
if isinstance(response, str):
response = ast.literal_eval(response)
if isinstance(response, list) and len(response) > 0:
response = response[0]
if not isinstance(response, dict):
raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
return response
except Exception as e:
raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
# λ¬Έμž₯ ꡬ뢄 ν•¨μˆ˜ (ν•œκ΅­μ–΄)
def split_sentences(text):
sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
combined_sentences = []
current_sentence = ""
for i in range(0, len(sentences), 2):
if i + 1 < len(sentences):
sentence = sentences[i] + sentences[i + 1]
else:
sentence = sentences[i]
if len(current_sentence) + len(sentence) > 100: # 100자λ₯Ό μ΄ˆκ³Όν•  경우
combined_sentences.append(current_sentence.strip())
current_sentence = sentence.strip()
else:
current_sentence += sentence
if sentence.endswith(('.', '?', '!')):
combined_sentences.append(current_sentence.strip())
current_sentence = ""
if current_sentence:
combined_sentences.append(current_sentence.strip())
return combined_sentences
def get_youtube_script(url):
logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
# μ—”λ“œν¬μΈνŠΈλ₯Ό μƒˆλ‘œμš΄ κ²ƒμœΌλ‘œ λ³€κ²½
client = Client("whispersound/YT_Ts_R")
try:
logging.debug("API 호좜 μ‹œμž‘")
result = client.predict(youtube_url=url, api_name="/predict")
logging.debug("API 호좜 μ™„λ£Œ")
# 응닡 νŒŒμ‹±
parsed_result = parse_api_response(result)
title = parsed_result["data"][0]["title"]
transcription_text = parsed_result["data"][0]["transcriptionAsText"]
logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
return title, transcription_text
except Exception as e:
error_msg = f"슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
logging.exception(error_msg)
return "", ""
# OpenAI API ν‚€ μ„€μ •
openai.api_key = os.getenv("OPENAI_API_KEY")
# LLM API 호좜 ν•¨μˆ˜
def call_api(prompt, max_tokens, temperature, top_p):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini", # λͺ¨λΈμ„ gpt-4o-mini둜 λ³€κ²½
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response['choices'][0]['message']['content']
except Exception as e:
logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
# ν…μŠ€νŠΈ μš”μ•½ ν•¨μˆ˜
def summarize_text(text):
prompt = text # ν”„λ‘¬ν”„νŠΈλ₯Ό 원본 ν…μŠ€νŠΈλ‘œ μ„€μ •ν•˜μ—¬ self-discover κ°€λŠ₯ν•˜λ„λ‘ 함
try:
return call_api(prompt, max_tokens=2000, temperature=0.3, top_p=0.9)
except Exception as e:
logging.exception("μš”μ•½ 생성 쀑 였λ₯˜ λ°œμƒ")
return "μš”μ•½μ„ μƒμ„±ν•˜λŠ” λ™μ•ˆ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
with gr.Blocks() as demo:
gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
script_output = gr.HTML(label="슀크립트")
summary_output = gr.HTML(label="μš”μ•½")
# μΊμ‹œλ₯Ό μœ„ν•œ μƒνƒœ λ³€μˆ˜
cached_data = gr.State({"url": "", "title": "", "script": ""})
def extract_and_cache(url, cache):
if url == cache["url"]:
return cache["title"], cache["script"], cache
title, script = get_youtube_script(url)
new_cache = {"url": url, "title": title, "script": script}
return title, script, new_cache
def display_script(title, script):
formatted_script = "\n".join(split_sentences(script))
script_html = f"""<h2 style='font-size:24px;'>{title}</h2>
<details>
<summary><h3>원문 슀크립트 (ν΄λ¦­ν•˜μ—¬ 펼치기)</h3></summary>
<div style="white-space: pre-wrap;">{formatted_script}</div>
</details>"""
return script_html
def generate_summary(script):
summary = summarize_text(script)
# μš”μ•½ κ²°κ³Όλ₯Ό 잘 ν‘œμ‹œν•˜κΈ° μœ„ν•΄ div νƒœκ·Έμ™€ CSS μŠ€νƒ€μΌ 적용
summary_html = f"""
<h3>μš”μ•½:</h3>
<div style="white-space: pre-wrap; max-height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px;">
{summary}
</div>
"""
return summary_html
def analyze(url, cache):
title, script, new_cache = extract_and_cache(url, cache)
script_html = display_script(title, script)
return script_html, new_cache
def update_summary(cache):
if not cache["script"]:
return "μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € YouTube URL을 μž…λ ₯ν•˜κ³  뢄석을 μ‹€ν–‰ν•΄μ£Όμ„Έμš”."
return generate_summary(cache["script"])
# λ²„νŠΌ 클릭 μ‹œ 슀크립트 μΆ”μΆœ
analyze_button.click(
analyze,
inputs=[youtube_url_input, cached_data],
outputs=[script_output, cached_data]
).then(
update_summary,
inputs=[cached_data],
outputs=summary_output
)
# μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
demo.launch(share=True)