File size: 8,327 Bytes
073d3e8
 
 
 
4a45930
073d3e8
876af8e
23b09fc
d3f7e6f
7b2bf17
d3f7e6f
 
 
 
bb03802
073d3e8
 
 
d3f7e6f
 
 
073d3e8
 
 
 
7b2bf17
073d3e8
 
 
 
 
 
 
 
d3f7e6f
 
 
3c311b4
7b2bf17
 
876af8e
7b2bf17
3c311b4
d3f7e6f
 
3c311b4
57d8b30
876af8e
073d3e8
7b2bf17
d3f7e6f
073d3e8
57d8b30
 
 
 
 
 
 
 
 
 
 
 
d3f7e6f
52d4f4d
876af8e
d3f7e6f
e95db1b
 
 
 
 
 
 
 
 
a725404
e95db1b
 
 
 
a725404
 
57336e5
629db0a
 
 
 
 
 
 
 
a366b56
e95db1b
876af8e
 
 
7b2bf17
 
 
b146cdf
d3f7e6f
a7e5781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730a2fb
 
6abe9bf
730a2fb
aff0317
730a2fb
 
 
 
 
 
 
 
f14837f
730a2fb
6abe9bf
 
0fbd8f6
1ed9894
6abe9bf
 
0fbd8f6
 
1ed9894
0fbd8f6
 
6abe9bf
f3279ca
9448ef3
 
 
3c404e0
36c388a
3c404e0
36c388a
3c404e0
36c388a
9448ef3
36c388a
9448ef3
 
 
f3279ca
24e6bef
108f0d2
23b09fc
108f0d2
 
0fbd8f6
 
1ed9894
073d3e8
7b2bf17
073d3e8
4a45930
073d3e8
 
1ed9894
 
073d3e8
 
85efe2c
 
1ed9894
073d3e8
 
7b2bf17
85efe2c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import gradio as gr
from gradio_client import Client
import json
import logging
import openai
import os
import re
import html

# λ‘œκΉ… μ„€μ •
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

openai.api_key = os.getenv("OPENAI_API_KEY")

def parse_api_response(response):
    try:
        if isinstance(response, str):
            response = json.loads(response)
        if isinstance(response, list) and len(response) > 0:
            response = response[0]
        if not isinstance(response, dict):
            raise ValueError(f"μ˜ˆμƒμΉ˜ λͺ»ν•œ 응닡 ν˜•μ‹μž…λ‹ˆλ‹€. 받은 데이터 νƒ€μž…: {type(response)}")
        return response
    except Exception as e:
        logging.error(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")
        raise ValueError(f"API 응닡 νŒŒμ‹± μ‹€νŒ¨: {str(e)}")

def get_youtube_script(url):
    logging.info(f"슀크립트 μΆ”μΆœ μ‹œμž‘: URL = {url}")
    client = Client("whispersound/YT_Ts_R")
    try:
        result = client.predict(youtube_url=url, api_name="/predict")
        parsed_result = parse_api_response(result)
        
        if 'data' not in parsed_result or not parsed_result['data']:
            raise ValueError("API 응닡에 μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.")

        data = parsed_result["data"][0]
        title = data.get("title", "제λͺ© μ—†μŒ")
        description = data.get("description", "μ„€λͺ… μ—†μŒ")
        transcription_text = data.get("transcriptionAsText", "")

        if not transcription_text:
            raise ValueError("μΆ”μΆœλœ μŠ€ν¬λ¦½νŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€.")

        logging.info("슀크립트 μΆ”μΆœ μ™„λ£Œ")
        return title, description, transcription_text
    except Exception as e:
        logging.exception("슀크립트 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ")
        raise

def call_api(prompt, max_tokens, temperature, top_p):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        logging.exception("LLM API 호좜 쀑 였λ₯˜ λ°œμƒ")
        raise

def summarize_text(title, description, text):
    prompt = f"""
[유튜브 μš”μ•½ κ·œμΉ™]
1. λ„ˆλŠ” 유튜브 μ˜μƒ μ „λ¬Έ ν•΄μ„€κ°€λ‘œμ„œ 지침에 맞게 이 글을 μž‘μ„±ν•˜λΌ
2. μ•„λž˜μ˜ 제λͺ©κ³Ό μ„€λͺ…은 이 유튜브 μ˜μƒμ˜ 원본 메타데이터이닀.
3. λ°˜λ“œμ‹œ 제λͺ©κ³Ό μ„€λͺ…μœΌλ‘œ μ£Όμ œμ™€ λ¬Έλ§₯을 λ¨Όμ € νŒŒμ•…ν•˜κ³ , μ•„λž˜μ˜ λŒ€λ³Έμ„ λ°˜λ“œμ‹œ 지침에 맞게 μƒμ„Έν•˜κ²Œ μš”μ•½ν•˜λΌ
4. λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ μž‘μ„±ν•˜λΌ
5. λ°˜λ“œμ‹œ '이 유튜브 λŒ€λ³Έμ€', '이 μ˜μƒμ€', '이 μœ νŠœλΈŒλŠ”'λ“±μ˜ μ†Œκ°œμ‹ ν‘œν˜„μ€ μ œμ™Έν•˜λΌ
6. μš”μ•½λ¬Έλ§ŒμœΌλ‘œλ„ μ˜μƒμ„ 직접 μ‹œμ²­ν•œ 것과 λ™μΌν•œ μˆ˜μ€€μœΌλ‘œ λ‚΄μš©μ„ 이해할 수 μžˆλ„λ‘ μƒμ„Ένžˆ μž‘μ„±
7. 글을 λ„ˆλ¬΄ μ••μΆ•ν•˜κ±°λ‚˜ ν•¨μΆ•ν•˜μ§€ 말고, μ€‘μš”ν•œ λ‚΄μš©κ³Ό 세뢀사항을 λͺ¨λ‘ 포함
8. λ°˜λ“œμ‹œ λŒ€λ³Έμ˜ 흐름과 논리 ꡬ쑰λ₯Ό μœ μ§€
9. λŒ€λ³Έμ˜ λͺ©μ μ΄λ‚˜ μ˜λ„λ₯Ό νŒŒμ•…ν•˜κ³ , 이λ₯Ό μš”μ•½μ— λ°˜λ“œμ‹œ 반영
10. λ°˜λ“œμ‹œ μ‹œκ°„ μˆœμ„œλ‚˜ μ‚¬κ±΄μ˜ μ „κ°œ 과정을 λͺ…ν™•ν•˜κ²Œ 반영
11. λ“±μž₯인물, μž₯μ†Œ, 사건 λ“± μ€‘μš”ν•œ μš”μ†Œλ₯Ό μ •ν™•ν•˜κ²Œ μž‘μ„±
12. λŒ€λ³Έμ—μ„œ μ „λ‹¬ν•˜λŠ” κ°μ •μ΄λ‚˜ λΆ„μœ„κΈ°λ„ 포함
13. λ°˜λ“œμ‹œ 기술적 μš©μ–΄λ‚˜ μ „λ¬Έ μš©μ–΄κ°€ μžˆμ„ 경우, 이λ₯Ό μ •ν™•ν•˜κ²Œ μ‚¬μš©

14. λ°˜λ“œμ‹œ 핡심 μ„Ήμ…˜(μ†Œμ£Όμ œ)λ₯Ό νŒŒμ•…ν•˜μ—¬ μ„Ήμ…˜μ— 맞게 글을 μš”μ•½ν•˜λΌ(κΈ€μ˜ 양을 κ³ λ €ν•˜μ—¬ μ„Ήμ…˜μ˜ 개수λ₯Ό 탄λ ₯적으둜 μ„€μ •)
15. 각 μ„Ήμ…˜μ˜ 제λͺ©(μ†Œμ£Όμ œ)μ—λŠ” λ‚΄μš©κ³Ό μ–΄μšΈλ¦¬λŠ” μ μ ˆν•œ 이λͺ¨μ§€λ‘œ μ†Œμ£Όμ œλ₯Ό μ‹œμž‘ν•˜λΌ
16. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ€ Bullet Pointλ₯Ό μ‚¬μš©ν•˜μ—¬ 가독성을 높여라(λ¬Έμž₯ λ‹¨μœ„λ‘œ ꡬ뢄)
  [μ˜ˆμ‹œ]
(λ³€κ²½μ „)
 - 유튜브λ₯Ό 처음 μ‹œμž‘ν•˜λŠ” μ‚¬λžŒλ“€μ€ κ΅¬λ…μž μˆ˜μ™€ μ‘°νšŒμˆ˜μ— 큰 관심을 두고 맀일 유튜브 μŠ€νŠœλ””μ˜€λ₯Ό ν™•μΈν•˜κ²Œ λœλ‹€. κ·ΈλŸ¬λ‚˜ κ΅¬λ…μžκ°€ 100λͺ…, 1,000λͺ…에 λ„λ‹¬ν•˜λŠ” κ²ƒλ§ŒμœΌλ‘œλŠ” 지속적인 μ„±μž₯에 도움이 λ˜μ§€ μ•ŠλŠ”λ‹€. κ΅¬λ…μž μˆ˜κ°€ λŠ˜μ–΄λ‚œ 후에도 유튜브 채널 μš΄μ˜μ— λŒ€ν•œ 감을 μž‘μ§€ λͺ»ν•΄ ν¬κΈ°ν•˜λŠ” κ²½μš°κ°€ λ§Žλ‹€.
(λ³€κ²½ν›„)
 - 유튜브λ₯Ό 처음 μ‹œμž‘ν•˜λŠ” μ‚¬λžŒλ“€μ€ κ΅¬λ…μž μˆ˜μ™€ μ‘°νšŒμˆ˜μ— 큰 관심을 두고 맀일 유튜브 μŠ€νŠœλ””μ˜€λ₯Ό ν™•μΈν•˜κ²Œ λœλ‹€. 
 - κ·ΈλŸ¬λ‚˜ κ΅¬λ…μžκ°€ 100λͺ…, 1,000λͺ…에 λ„λ‹¬ν•˜λŠ” κ²ƒλ§ŒμœΌλ‘œλŠ” 지속적인 μ„±μž₯에 도움이 λ˜μ§€ μ•ŠλŠ”λ‹€. 
 - κ΅¬λ…μž μˆ˜κ°€ λŠ˜μ–΄λ‚œ 후에도 유튜브 채널 μš΄μ˜μ— λŒ€ν•œ 감을 μž‘μ§€ λͺ»ν•΄ ν¬κΈ°ν•˜λŠ” κ²½μš°κ°€ λ§Žλ‹€.
17. 각 μ„Ήμ…˜μ˜ λ‚΄μš©μ„ λ°˜λ“œμ‹œ μΆ©μ‹€ν•˜κ²Œ μž‘μ„±

제λͺ©: {title}
μ„€λͺ…: {description}

λŒ€λ³Έ:
{text}
"""
    return call_api(prompt, max_tokens=8000, temperature=0.35, top_p=0.95)

def split_sentences(text):
    sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
    combined_sentences = []
    current_sentence = ""
    for i in range(0, len(sentences), 2):
        if i + 1 < len(sentences):
            sentence = sentences[i] + sentences[i + 1]
        else:
            sentence = sentences[i]
        if len(current_sentence) + len(sentence) > 100:  # 100자λ₯Ό μ΄ˆκ³Όν•  경우
            combined_sentences.append(current_sentence.strip())
            current_sentence = sentence.strip()
        else:
            current_sentence += sentence
        if sentence.endswith(('.', '?', '!')):
            combined_sentences.append(current_sentence.strip())
            current_sentence = ""
    if current_sentence:
        combined_sentences.append(current_sentence.strip())
    return combined_sentences

def display_script(title, script):
    script_sentences = split_sentences(script)
    formatted_script = "\n\n".join(script_sentences)
    return f"""<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px;">
<h3>원문 슀크립트</h3>
<details>
    <summary>ν΄λ¦­ν•˜μ—¬ 펼치기</summary>
    <h2>{title}</h2>
    <pre style="white-space: pre-wrap;">{formatted_script}</pre>
</details>
</div>"""

def display_summary(title, summary):
    return f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
<h3>μš”μ•½</h3>
<h2>{title}</h2>
{summary}
</div>"""

def analyze(url):
    # 슀크립트 μΆ”μΆœ
    yield "슀크립트 μΆ”μΆœ 쀑...", "슀크립트 μΆ”μΆœ 쀑..."
    title, description, script = get_youtube_script(url)
    script_content = display_script(title, script)
    
    # 원문 슀크립트 ν‘œμ‹œ 및 μš”μ•½ μ‹œμž‘
    yield script_content, "μš”μ•½ 생성 쀑..."
    
    # μš”μ•½ 생성
    summary = summarize_text(title, description, script)
    
    lines = summary.split('\n')
    formatted_lines = []
    for line in lines:
        if line.startswith('# '):
            line = f"<h1>{html.escape(line[2:])}</h1>"
        elif line.startswith('## '):
            line = f"<h2>{html.escape(line[3:])}</h2>"
        elif line.startswith('### '):
            line = f"<h3>{html.escape(line[4:])}</h3>"
        else:
            line = f"<p>{html.escape(line)}</p>"
        formatted_lines.append(line)
    
    formatted_summary = '\n'.join(formatted_lines)
    
    summary_content = f"""<div style="background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin-top: 20px;">
    <h3>μš”μ•½</h3>
    <h2>{html.escape(title)}</h2>
    {formatted_summary}
    </div>"""
    
    # μ΅œμ’… κ²°κ³Ό ν‘œμ‹œ
    yield script_content, summary_content

# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks() as demo:
    gr.Markdown("## YouTube 슀크립트 μΆ”μΆœ 및 μš”μ•½ 도ꡬ")
    youtube_url_input = gr.Textbox(label="YouTube URL μž…λ ₯")
    analyze_button = gr.Button("λΆ„μ„ν•˜κΈ°")
    script_output = gr.HTML(label="원문 슀크립트")
    summary_output = gr.HTML(label="μš”μ•½")

    analyze_button.click(
        analyze,
        inputs=[youtube_url_input],
        outputs=[script_output, summary_output]
    )

if __name__ == "__main__":
    demo.launch()