YT_Script_Shorts

Running

File size: 8,327 Bytes

073d3e8
 
 
 
4a45930
073d3e8
876af8e
23b09fc
d3f7e6f
7b2bf17
d3f7e6f
 
 
 
bb03802
073d3e8
 
 
d3f7e6f
 
 
073d3e8
 
 
 
7b2bf17
073d3e8
 
 
 
 
 
 
 
d3f7e6f
 
 
3c311b4
7b2bf17
 
876af8e
7b2bf17
3c311b4
d3f7e6f
 
3c311b4
57d8b30
876af8e
073d3e8
7b2bf17
d3f7e6f
073d3e8
57d8b30
 
 
 
 
 
 
 
 
 
 
 
d3f7e6f
52d4f4d
876af8e
d3f7e6f
e95db1b
 
 
 
 
 
 
 
 
a725404
e95db1b
 
 
 
a725404
 
57336e5
629db0a
 
 
 
 
 
 
 
a366b56
e95db1b
876af8e
 
 
7b2bf17
 
 
b146cdf
d3f7e6f
a7e5781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730a2fb
 
6abe9bf
730a2fb
aff0317
730a2fb
 
 
 
 
 
 
 
f14837f
730a2fb
6abe9bf
 
0fbd8f6
1ed9894
6abe9bf
 
0fbd8f6
 
1ed9894
0fbd8f6
 
6abe9bf
f3279ca
9448ef3
 
 
3c404e0
36c388a
3c404e0
36c388a
3c404e0
36c388a
9448ef3
36c388a
9448ef3
 
 
f3279ca
24e6bef
108f0d2
23b09fc
108f0d2
 
0fbd8f6
 
1ed9894
073d3e8
7b2bf17
073d3e8
4a45930
073d3e8
 
1ed9894
 
073d3e8
 
85efe2c
 
1ed9894
073d3e8
 
7b2bf17
85efe2c

import gradio as gr
from gradio_client import Client
import json
import logging
import openai
import os
import re
import html

# 로깅 설정
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

openai.api_key = os.getenv("OPENAI_API_KEY")

def parse_api_response(response):
    try:
        if isinstance(response, str):
            response = json.loads(response)
        if isinstance(response, list) and len(response) > 0:
            response = response[0]
        if not isinstance(response, dict):
            raise ValueError(f"예상치 못한 응답 형식입니다. 받은 데이터 타입: {type(response)}")
        return response
    except Exception as e:
        logging.error(f"API 응답 파싱 실패: {str(e)}")
        raise ValueError(f"API 응답 파싱 실패: {str(e)}")

def get_youtube_script(url):
    logging.info(f"스크립트 추출 시작: URL = {url}")
    client = Client("whispersound/YT_Ts_R")
    try:
        result = client.predict(youtube_url=url, api_name="/predict")
        parsed_result = parse_api_response(result)
        
        if 'data' not in parsed_result or not parsed_result['data']:
            raise ValueError("API 응답에 유효한 데이터가 없습니다.")

        data = parsed_result["data"][0]
        title = data.get("title", "제목 없음")
        description = data.get("description", "설명 없음")
        transcription_text = data.get("transcriptionAsText", "")

        if not transcription_text:
            raise ValueError("추출된 스크립트가 없습니다.")

        logging.info("스크립트 추출 완료")
        return title, description, transcription_text
    except Exception as e:
        logging.exception("스크립트 추출 중 오류 발생")
        raise

def call_api(prompt, max_tokens, temperature, top_p):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        logging.exception("LLM API 호출 중 오류 발생")
        raise

def summarize_text(title, description, text):
    prompt = f"""
[유튜브 요약 규칙]
1. 너는 유튜브 영상 전문 해설가로서 지침에 맞게 이 글을 작성하라
2. 아래의 제목과 설명은 이 유튜브 영상의 원본 메타데이터이다.
3. 반드시 제목과 설명으로 주제와 문맥을 먼저 파악하고, 아래의 대본을 반드시 지침에 맞게 상세하게 요약하라
4. 반드시 한글로 작성하라
5. 반드시 '이 유튜브 대본은', '이 영상은', '이 유튜브는'등의 소개식 표현은 제외하라
6. 요약문만으로도 영상을 직접 시청한 것과 동일한 수준으로 내용을 이해할 수 있도록 상세히 작성
7. 글을 너무 압축하거나 함축하지 말고, 중요한 내용과 세부사항을 모두 포함
8. 반드시 대본의 흐름과 논리 구조를 유지
9. 대본의 목적이나 의도를 파악하고, 이를 요약에 반드시 반영
10. 반드시 시간 순서나 사건의 전개 과정을 명확하게 반영
11. 등장인물, 장소, 사건 등 중요한 요소를 정확하게 작성
12. 대본에서 전달하는 감정이나 분위기도 포함
13. 반드시 기술적 용어나 전문 용어가 있을 경우, 이를 정확하게 사용

14. 반드시 핵심 섹션(소주제)를 파악하여 섹션에 맞게 글을 요약하라(글의 양을 고려하여 섹션의 개수를 탄력적으로 설정)
15. 각 섹션의 제목(소주제)에는 내용과 어울리는 적절한 이모지로 소주제를 시작하라
16. 각 섹션의 내용은 Bullet Point를 사용하여 가독성을 높여라(문장 단위로 구분)
  [예시]
(변경전)
 - 유튜브를 처음 시작하는 사람들은 구독자 수와 조회수에 큰 관심을 두고 매일 유튜브 스튜디오를 확인하게 된다. 그러나 구독자가 100명, 1,000명에 도달하는 것만으로는 지속적인 성장에 도움이 되지 않는다. 구독자 수가 늘어난 후에도 유튜브 채널 운영에 대한 감을 잡지 못해 포기하는 경우가 많다.
(변경후)
 - 유튜브를 처음 시작하는 사람들은 구독자 수와 조회수에 큰 관심을 두고 매일 유튜브 스튜디오를 확인하게 된다. 
 - 그러나 구독자가 100명, 1,000명에 도달하는 것만으로는 지속적인 성장에 도움이 되지 않는다. 
 - 구독자 수가 늘어난 후에도 유튜브 채널 운영에 대한 감을 잡지 못해 포기하는 경우가 많다.
17. 각 섹션의 내용을 반드시 충실하게 작성

제목: {title}
설명: {description}

대본:
{text}
"""
    return call_api(prompt, max_tokens=8000, temperature=0.35, top_p=0.95)

def split_sentences(text):
    sentences = re.split(r"(니다|에요|구나|해요|군요|겠어요|시오|해라|예요|아요|데요|대요|세요|어요|게요|구요|고요|나요|하죠)(?![\w])", text)
    combined_sentences = []
    current_sentence = ""
    for i in range(0, len(sentences), 2):
        if i + 1 < len(sentences):
            sentence = sentences[i] + sentences[i + 1]
        else:
            sentence = sentences[i]
        if len(current_sentence) + len(sentence) > 100:  # 100자를 초과할 경우
            combined_sentences.append(current_sentence.strip())
            current_sentence = sentence.strip()
        else:
            current_sentence += sentence
        if sentence.endswith(('.', '?', '!')):
            combined_sentences.append(current_sentence.strip())
            current_sentence = ""
    if current_sentence:
        combined_sentences.append(current_sentence.strip())
    return combined_sentences

def display_script(title, script):
    script_sentences = split_sentences(script)
    formatted_script = "\n\n".join(script_sentences)
    return f"""<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px;">
<h3>원문 스크립트</h3>
<details>
    <summary>클릭하여 펼치기</summary>
    <h2>{title}</h2>
    <pre style="white-space: pre-wrap;">{formatted_script}</pre>
</details>
</div>"""

def display_summary(title, summary):
    return f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
<h3>요약</h3>
<h2>{title}</h2>
{summary}
</div>"""

def analyze(url):
    # 스크립트 추출
    yield "스크립트 추출 중...", "스크립트 추출 중..."
    title, description, script = get_youtube_script(url)
    script_content = display_script(title, script)
    
    # 원문 스크립트 표시 및 요약 시작
    yield script_content, "요약 생성 중..."
    
    # 요약 생성
    summary = summarize_text(title, description, script)
    
    lines = summary.split('\n')
    formatted_lines = []
    for line in lines:
        if line.startswith('# '):
            line = f"<h1>{html.escape(line[2:])}</h1>"
        elif line.startswith('## '):
            line = f"<h2>{html.escape(line[3:])}</h2>"
        elif line.startswith('### '):
            line = f"<h3>{html.escape(line[4:])}</h3>"
        else:
            line = f"<p>{html.escape(line)}</p>"
        formatted_lines.append(line)
    
    formatted_summary = '\n'.join(formatted_lines)
    
    summary_content = f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
    <h3>요약</h3>
    <h2>{html.escape(title)}</h2>
    {formatted_summary}
    </div>"""
    
    # 최종 결과 표시
    yield script_content, summary_content

# Gradio 인터페이스
with gr.Blocks() as demo:
    gr.Markdown("## YouTube 스크립트 추출 및 요약 도구")
    youtube_url_input = gr.Textbox(label="YouTube URL 입력")
    analyze_button = gr.Button("분석하기")
    script_output = gr.HTML(label="원문 스크립트")
    summary_output = gr.HTML(label="요약")

    analyze_button.click(
        analyze,
        inputs=[youtube_url_input],
        outputs=[script_output, summary_output]
    )

if __name__ == "__main__":
    demo.launch()