File size: 6,983 Bytes
073d3e8
 
 
 
4a45930
073d3e8
876af8e
d3f7e6f
7b2bf17
d3f7e6f
 
 
 
bb03802
073d3e8
 
 
d3f7e6f
 
 
073d3e8
 
 
 
7b2bf17
073d3e8
 
 
 
 
 
 
 
d3f7e6f
 
 
3c311b4
7b2bf17
 
876af8e
7b2bf17
3c311b4
d3f7e6f
 
3c311b4
57d8b30
876af8e
073d3e8
7b2bf17
d3f7e6f
073d3e8
57d8b30
 
 
 
 
 
 
 
 
 
 
 
d3f7e6f
52d4f4d
876af8e
d3f7e6f
e95db1b
 
 
 
 
 
 
 
 
a725404
e95db1b
 
 
 
a725404
 
 
 
 
e95db1b
876af8e
 
 
7b2bf17
 
 
b146cdf
d3f7e6f
a7e5781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730a2fb
 
6abe9bf
730a2fb
aff0317
730a2fb
 
 
 
 
 
 
 
aff0317
730a2fb
6abe9bf
 
0fbd8f6
 
6abe9bf
 
0fbd8f6
 
 
 
 
6abe9bf
616c44c
0fbd8f6
 
 
073d3e8
7b2bf17
073d3e8
4a45930
073d3e8
 
730a2fb
 
073d3e8
 
85efe2c
 
0fbd8f6
073d3e8
 
7b2bf17
85efe2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
from gradio_client import Client
import json
import logging
import openai
import os
import re

# λ‘œκΉ… μ„€μ •
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

openai.api_key = os.getenv("OPENAI_API_KEY")

def parse_api_response(response):
    try:
        if isinstance(response, str):
            response = json.loads(response)
        if isinstance(response, list) and len(response) > 0:
            response = response[0]
        if not isinstance(response, dict):
            raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
        return response
    except Exception as e:
        logging.error(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
        raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")

def get_youtube_script(url):
    logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
    client = Client("whispersound/YT_Ts_R")
    try:
        result = client.predict(youtube_url=url, api_name="/predict")
        parsed_result = parse_api_response(result)
        
        if 'data' not in parsed_result or not parsed_result['data']:
            raise ValueError("API 응닡에 μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.")

        data = parsed_result["data"][0]
        title = data.get("title", "제λͺ© μ—†μŒ")
        description = data.get("description", "μ„€λͺ… μ—†μŒ")
        transcription_text = data.get("transcriptionAsText", "")

        if not transcription_text:
            raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")

        logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
        return title, description, transcription_text
    except Exception as e:
        logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
        raise

def call_api(prompt, max_tokens, temperature, top_p):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
        raise

def summarize_text(title, description, text):
    prompt = f"""
[유튜브 μš”μ•½ κ·œμΉ™]
1. λ„ˆλŠ” 유튜브 μ˜μƒ μ „λ¬Έ ν•΄μ„€κ°€λ‘œμ„œ 지침에 맞게 이 글을 μž‘μ„±ν•˜λΌ
2. μ•„λž˜μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 메타데이터이닀.
3. λ°˜λ“œμ‹œ 제λͺ©κ³Ό μ„€λͺ…μœΌλ‘œ μ£Όμ œμ™€ λ¬Έλ§₯을 λ¨Όμ € νŒŒμ•…ν•˜κ³ , μ•„λž˜μ˜ λŒ€λ³Έμ„ λ°˜λ“œμ‹œ 지침에 맞게 μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
4. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
5. λ°˜λ“œμ‹œ '이 유튜브 λŒ€λ³Έμ€', '이 μ˜μƒμ€', '이 μœ νŠœλΈŒλŠ”'λ“±μ˜ μ†Œκ°œμ‹ ν‘œν˜„μ€ μ œμ™Έν•˜λΌ
6. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
7. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
8. λ°˜λ“œμ‹œ λŒ€λ³Έμ˜ 흐름과 논리 ꡬ쑰λ₯Ό μœ μ§€
9. λŒ€λ³Έμ˜ λͺ©μ μ΄λ‚˜ μ˜λ„λ₯Ό νŒŒμ•…ν•˜κ³ , 이λ₯Ό μš”μ•½μ— λ°˜λ“œμ‹œ 반영
10. λ°˜λ“œμ‹œ μ‹œκ°„ μˆœμ„œλ‚˜ μ‚¬κ±΄μ˜ μ „κ°œ 과정을 λͺ…ν™•ν•˜κ²Œ 반영
11. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
12. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
13. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©

14. λ°˜λ“œμ‹œ 핡심 μ„Ήμ…˜(μ†Œμ£Όμ œ)λ₯Ό νŒŒμ•…ν•˜μ—¬ μ„Ήμ…˜μ— 맞게 글을 μš”μ•½ν•˜λΌ(κΈ€μ˜ 양을 κ³ λ €ν•˜μ—¬ μ„Ήμ…˜μ˜ 개수λ₯Ό 탄λ ₯적으둜 μ„€μ •)
15. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ€ κΈ€λ¨Έλ¦¬κΈ°ν˜Έλ₯Ό μ‚¬μš©ν•˜μ—¬ 가독성을 높여라
16. 각 λ¬Έμž₯을 λͺ…ν™•ν•˜κ²Œ κ΅¬λΆ„ν•˜κ³ , μ μ ˆν•œ 단락 ꡬ뢄을 μ‚¬μš©
17. 각 μ„Ήμ…˜μ˜ 제λͺ©(μ†Œμ£Όμ œ)μ—λŠ” λ‚΄μš©κ³Ό μ–΄μšΈλ¦¬λŠ” μ μ ˆν•œ 이λͺ¨μ§€λ₯Ό μ‚¬μš©ν•˜λΌ

제λͺ©: {title}
μ„€λͺ…: {description}

λŒ€λ³Έ:
{text}
"""
    return call_api(prompt, max_tokens=8000, temperature=0.35, top_p=0.95)

def split_sentences(text):
    sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
    combined_sentences = []
    current_sentence = ""
    for i in range(0, len(sentences), 2):
        if i + 1 < len(sentences):
            sentence = sentences[i] + sentences[i + 1]
        else:
            sentence = sentences[i]
        if len(current_sentence) + len(sentence) > 100:  # 100자λ₯Ό μ΄ˆκ³Όν•  경우
            combined_sentences.append(current_sentence.strip())
            current_sentence = sentence.strip()
        else:
            current_sentence += sentence
        if sentence.endswith(('.', '?', '!')):
            combined_sentences.append(current_sentence.strip())
            current_sentence = ""
    if current_sentence:
        combined_sentences.append(current_sentence.strip())
    return combined_sentences

def display_script(title, script):
    script_sentences = split_sentences(script)
    formatted_script = "\n\n".join(script_sentences)
    return f"""<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px;">
<h3>원문 슀크립트</h3>
<details>
    <summary>ν΄λ¦­ν•˜μ—¬ 펼치기</summary>
    <h2>{title}</h2>
    <pre style="white-space: pre-wrap;">{formatted_script}</pre>
</details>
</div>"""

def display_summary(title, summary):
    return f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
<h3>μš”μ•½</h3>
<h2>{title}</h2>
<p>{summary}</p>
</div>"""

def analyze(url):
    # 슀크립트 μΆ”μΆœ
    yield gr.update(value="<div>슀크립트 μΆ”μΆœ 쀑...</div>", visible=True), gr.update(visible=False)
    title, description, script = get_youtube_script(url)
    script_content = display_script(title, script)
    
    # 원문 슀크립트 ν‘œμ‹œ 및 μš”μ•½ μ‹œμž‘
    yield gr.update(value=script_content, visible=True), gr.update(value="<div>μš”μ•½ 생성 쀑...</div>", visible=True)
    
    # μš”μ•½ 생성
    summary = summarize_text(title, description, script)
    summary_content = f"# {title}\n\n{summary}"
    
    # μ΅œμ’… κ²°κ³Ό ν‘œμ‹œ
    yield gr.update(value=script_content, visible=True), gr.update(value=summary_content, visible=True)

# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks() as demo:
    gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
    youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
    analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
    script_output = gr.HTML(label="원문 슀크립트", visible=False)
    summary_output = gr.HTML(label="μš”μ•½", visible=False)

    analyze_button.click(
        analyze,
        inputs=[youtube_url_input],
        outputs=[script_output, summary_output]
    )

if __name__ == "__main__":
    demo.launch()