AIRider's picture
Update app.py
a366b56 verified
raw
history blame
8.33 kB
import gradio as gr
from gradio_client import Client
import json
import logging
import openai
import os
import re
import html
# λ‘œκΉ… μ„€μ •
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s')
openai.api_key = os.getenv("OPENAI_API_KEY")
def parse_api_response(response):
try:
if isinstance(response, str):
response = json.loads(response)
if isinstance(response, list) and len(response) > 0:
response = response[0]
if not isinstance(response, dict):
raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
return response
except Exception as e:
logging.error(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
def get_youtube_script(url):
logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
client = Client("whispersound/YT_Ts_R")
try:
result = client.predict(youtube_url=url, api_name="/predict")
parsed_result = parse_api_response(result)
if 'data' not in parsed_result or not parsed_result['data']:
raise ValueError("API 응닡에 μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
data = parsed_result["data"][0]
title = data.get("title", "제λͺ© μ—†μŒ")
description = data.get("description", "μ„€λͺ… μ—†μŒ")
transcription_text = data.get("transcriptionAsText", "")
if not transcription_text:
raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
return title, description, transcription_text
except Exception as e:
logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
raise
def call_api(prompt, max_tokens, temperature, top_p):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response['choices'][0]['message']['content']
except Exception as e:
logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
raise
def summarize_text(title, description, text):
prompt = f"""
[유튜브 μš”μ•½ κ·œμΉ™]
1. λ„ˆλŠ” 유튜브 μ˜μƒ μ „λ¬Έ ν•΄μ„€κ°€λ‘œμ„œ 지침에 맞게 이 글을 μž‘μ„±ν•˜λΌ
2. μ•„λž˜μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 메타데이터이닀.
3. λ°˜λ“œμ‹œ 제λͺ©κ³Ό μ„€λͺ…μœΌλ‘œ μ£Όμ œμ™€ λ¬Έλ§₯을 λ¨Όμ € νŒŒμ•…ν•˜κ³ , μ•„λž˜μ˜ λŒ€λ³Έμ„ λ°˜λ“œμ‹œ 지침에 맞게 μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
4. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
5. λ°˜λ“œμ‹œ '이 유튜브 λŒ€λ³Έμ€', '이 μ˜μƒμ€', '이 μœ νŠœλΈŒλŠ”'λ“±μ˜ μ†Œκ°œμ‹ ν‘œν˜„μ€ μ œμ™Έν•˜λΌ
6. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
7. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
8. λ°˜λ“œμ‹œ λŒ€λ³Έμ˜ 흐름과 논리 ꡬ쑰λ₯Ό μœ μ§€
9. λŒ€λ³Έμ˜ λͺ©μ μ΄λ‚˜ μ˜λ„λ₯Ό νŒŒμ•…ν•˜κ³ , 이λ₯Ό μš”μ•½μ— λ°˜λ“œμ‹œ 반영
10. λ°˜λ“œμ‹œ μ‹œκ°„ μˆœμ„œλ‚˜ μ‚¬κ±΄μ˜ μ „κ°œ 과정을 λͺ…ν™•ν•˜κ²Œ 반영
11. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
12. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
13. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©
14. λ°˜λ“œμ‹œ 핡심 μ„Ήμ…˜(μ†Œμ£Όμ œ)λ₯Ό νŒŒμ•…ν•˜μ—¬ μ„Ήμ…˜μ— 맞게 글을 μš”μ•½ν•˜λΌ(κΈ€μ˜ 양을 κ³ λ €ν•˜μ—¬ μ„Ήμ…˜μ˜ 개수λ₯Ό 탄λ ₯적으둜 μ„€μ •)
15. 각 μ„Ήμ…˜μ˜ 제λͺ©(μ†Œμ£Όμ œ)μ—λŠ” λ‚΄μš©κ³Ό μ–΄μšΈλ¦¬λŠ” μ μ ˆν•œ 이λͺ¨μ§€λ‘œ μ†Œμ£Όμ œλ₯Ό μ‹œμž‘ν•˜λΌ
16. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ€ Bullet Pointλ₯Ό μ‚¬μš©ν•˜μ—¬ 가독성을 높여라(λ¬Έμž₯ λ‹¨μœ„λ‘œ ꡬ뢄)
[μ˜ˆμ‹œ]
(λ³€κ²½μ „)
- 유튜브λ₯Ό 처음 μ‹œμž‘ν•˜λŠ” μ‚¬λžŒλ“€μ€ κ΅¬λ…μž μˆ˜μ™€ μ‘°νšŒμˆ˜μ— 큰 관심을 두고 맀일 유튜브 μŠ€νŠœλ””μ˜€λ₯Ό ν™•μΈν•˜κ²Œ λœλ‹€. κ·ΈλŸ¬λ‚˜ κ΅¬λ…μžκ°€ 100λͺ…, 1,000λͺ…에 λ„λ‹¬ν•˜λŠ” κ²ƒλ§ŒμœΌλ‘œλŠ” 지속적인 μ„±μž₯에 도움이 λ˜μ§€ μ•ŠλŠ”λ‹€. κ΅¬λ…μž μˆ˜κ°€ λŠ˜μ–΄λ‚œ 후에도 유튜브 채널 μš΄μ˜μ— λŒ€ν•œ 감을 μž‘μ§€ λͺ»ν•΄ ν¬κΈ°ν•˜λŠ” κ²½μš°κ°€ λ§Žλ‹€.
(λ³€κ²½ν›„)
- 유튜브λ₯Ό 처음 μ‹œμž‘ν•˜λŠ” μ‚¬λžŒλ“€μ€ κ΅¬λ…μž μˆ˜μ™€ μ‘°νšŒμˆ˜μ— 큰 관심을 두고 맀일 유튜브 μŠ€νŠœλ””μ˜€λ₯Ό ν™•μΈν•˜κ²Œ λœλ‹€.
- κ·ΈλŸ¬λ‚˜ κ΅¬λ…μžκ°€ 100λͺ…, 1,000λͺ…에 λ„λ‹¬ν•˜λŠ” κ²ƒλ§ŒμœΌλ‘œλŠ” 지속적인 μ„±μž₯에 도움이 λ˜μ§€ μ•ŠλŠ”λ‹€.
- κ΅¬λ…μž μˆ˜κ°€ λŠ˜μ–΄λ‚œ 후에도 유튜브 채널 μš΄μ˜μ— λŒ€ν•œ 감을 μž‘μ§€ λͺ»ν•΄ ν¬κΈ°ν•˜λŠ” κ²½μš°κ°€ λ§Žλ‹€.
17. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ„ λ°˜λ“œμ‹œ μΆ©μ‹€ν•˜κ²Œ μž‘μ„±
제λͺ©: {title}
μ„€λͺ…: {description}
λŒ€λ³Έ:
{text}
"""
return call_api(prompt, max_tokens=8000, temperature=0.35, top_p=0.95)
def split_sentences(text):
sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
combined_sentences = []
current_sentence = ""
for i in range(0, len(sentences), 2):
if i + 1 < len(sentences):
sentence = sentences[i] + sentences[i + 1]
else:
sentence = sentences[i]
if len(current_sentence) + len(sentence) > 100: # 100자λ₯Ό μ΄ˆκ³Όν•  경우
combined_sentences.append(current_sentence.strip())
current_sentence = sentence.strip()
else:
current_sentence += sentence
if sentence.endswith(('.', '?', '!')):
combined_sentences.append(current_sentence.strip())
current_sentence = ""
if current_sentence:
combined_sentences.append(current_sentence.strip())
return combined_sentences
def display_script(title, script):
script_sentences = split_sentences(script)
formatted_script = "\n\n".join(script_sentences)
return f"""<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px;">
<h3>원문 슀크립트</h3>
<details>
<summary>ν΄λ¦­ν•˜μ—¬ 펼치기</summary>
<h2>{title}</h2>
<pre style="white-space: pre-wrap;">{formatted_script}</pre>
</details>
</div>"""
def display_summary(title, summary):
return f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
<h3>μš”μ•½</h3>
<h2>{title}</h2>
{summary}
</div>"""
def analyze(url):
# 슀크립트 μΆ”μΆœ
yield "슀크립트 μΆ”μΆœ 쀑...", "슀크립트 μΆ”μΆœ 쀑..."
title, description, script = get_youtube_script(url)
script_content = display_script(title, script)
# 원문 슀크립트 ν‘œμ‹œ 및 μš”μ•½ μ‹œμž‘
yield script_content, "μš”μ•½ 생성 쀑..."
# μš”μ•½ 생성
summary = summarize_text(title, description, script)
lines = summary.split('\n')
formatted_lines = []
for line in lines:
if line.startswith('# '):
line = f"<h1>{html.escape(line[2:])}</h1>"
elif line.startswith('## '):
line = f"<h2>{html.escape(line[3:])}</h2>"
elif line.startswith('### '):
line = f"<h3>{html.escape(line[4:])}</h3>"
else:
line = f"<p>{html.escape(line)}</p>"
formatted_lines.append(line)
formatted_summary = '\n'.join(formatted_lines)
summary_content = f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
<h3>μš”μ•½</h3>
<h2>{html.escape(title)}</h2>
{formatted_summary}
</div>"""
# μ΅œμ’… κ²°κ³Ό ν‘œμ‹œ
yield script_content, summary_content
# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks() as demo:
gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
script_output = gr.HTML(label="원문 슀크립트")
summary_output = gr.HTML(label="μš”μ•½")
analyze_button.click(
analyze,
inputs=[youtube_url_input],
outputs=[script_output, summary_output]
)
if __name__ == "__main__":
demo.launch()