Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,36 +12,6 @@ logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG
|
|
12 |
|
13 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
14 |
|
15 |
-
# λ¬Έμ₯ κ΅¬λΆ ν¨μ
|
16 |
-
def split_sentences(text):
|
17 |
-
# μ£Όμ΄μ§ κ΅¬λΆ κΈ°μ€μ μ¬μ©ν μ κ· ννμ
|
18 |
-
sentence_end = r'(λλ€|μμ|ꡬλ|ν΄μ|κ΅°μ|κ² μ΄μ|μμ€|ν΄λΌ|μμ|μμ|λ°μ|λμ|μΈμ|μ΄μ|κ²μ|ꡬμ|κ³ μ|λμ|νμ£ )(?![\w])'
|
19 |
-
segments = re.split(f'({sentence_end})', text)
|
20 |
-
|
21 |
-
combined_sentences = []
|
22 |
-
current_sentence = ""
|
23 |
-
|
24 |
-
for i in range(0, len(segments), 2):
|
25 |
-
segment = segments[i]
|
26 |
-
if i + 1 < len(segments):
|
27 |
-
segment += segments[i + 1]
|
28 |
-
|
29 |
-
if len(current_sentence) + len(segment) > 100:
|
30 |
-
if current_sentence:
|
31 |
-
combined_sentences.append(current_sentence.strip())
|
32 |
-
current_sentence = segment.strip()
|
33 |
-
else:
|
34 |
-
current_sentence += (' ' if current_sentence else '') + segment.strip()
|
35 |
-
|
36 |
-
if re.search(sentence_end, segment):
|
37 |
-
combined_sentences.append(current_sentence.strip())
|
38 |
-
current_sentence = ""
|
39 |
-
|
40 |
-
if current_sentence:
|
41 |
-
combined_sentences.append(current_sentence.strip())
|
42 |
-
|
43 |
-
return combined_sentences
|
44 |
-
|
45 |
def parse_api_response(response):
|
46 |
try:
|
47 |
if isinstance(response, str):
|
@@ -126,8 +96,8 @@ def analyze(url, progress=gr.Progress()):
|
|
126 |
script_sentences = split_sentences(script)
|
127 |
script_content = f"# {title}\n\n" + "\n".join(script_sentences)
|
128 |
|
129 |
-
# μλ¬Έ μ€ν¬λ¦½νΈ λ¨Όμ
|
130 |
-
yield script_content,
|
131 |
|
132 |
progress(0.5, desc="μμ½ μμ± μ€...")
|
133 |
summary = summarize_text(title, description, "\n".join(script_sentences))
|
@@ -142,6 +112,34 @@ def analyze(url, progress=gr.Progress()):
|
|
142 |
logging.exception(error_msg)
|
143 |
yield error_msg, error_msg
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
# Gradio μΈν°νμ΄μ€
|
146 |
with gr.Blocks() as demo:
|
147 |
gr.Markdown("## YouTube μ€ν¬λ¦½νΈ μΆμΆ λ° μμ½ λꡬ")
|
@@ -157,7 +155,8 @@ with gr.Blocks() as demo:
|
|
157 |
analyze_button.click(
|
158 |
analyze,
|
159 |
inputs=[youtube_url_input],
|
160 |
-
outputs=[script_output, summary_output]
|
|
|
161 |
)
|
162 |
|
163 |
if __name__ == "__main__":
|
|
|
12 |
|
13 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def parse_api_response(response):
|
16 |
try:
|
17 |
if isinstance(response, str):
|
|
|
96 |
script_sentences = split_sentences(script)
|
97 |
script_content = f"# {title}\n\n" + "\n".join(script_sentences)
|
98 |
|
99 |
+
# μλ¬Έ μ€ν¬λ¦½νΈ λ¨Όμ μΆλ ₯νκ³ μ§ν
|
100 |
+
yield script_content, "μμ½ μ€λΉ μ€..."
|
101 |
|
102 |
progress(0.5, desc="μμ½ μμ± μ€...")
|
103 |
summary = summarize_text(title, description, "\n".join(script_sentences))
|
|
|
112 |
logging.exception(error_msg)
|
113 |
yield error_msg, error_msg
|
114 |
|
115 |
+
def split_sentences(text):
|
116 |
+
sentence_end = r'(λλ€|μμ|ꡬλ|ν΄μ|κ΅°μ|κ² μ΄μ|μμ€|ν΄λΌ|μμ|μμ|λ°μ|λμ|μΈμ|μ΄μ|κ²μ|ꡬμ|κ³ μ|λμ|νμ£ )(?![\w])'
|
117 |
+
segments = re.split(f'({sentence_end})', text)
|
118 |
+
|
119 |
+
combined_sentences = []
|
120 |
+
current_sentence = ""
|
121 |
+
|
122 |
+
for i in range(0, len(segments), 2):
|
123 |
+
segment = segments[i]
|
124 |
+
if i + 1 < len(segments):
|
125 |
+
segment += segments[i + 1]
|
126 |
+
|
127 |
+
if len(current_sentence) + len(segment) > 100:
|
128 |
+
if current_sentence:
|
129 |
+
combined_sentences.append(current_sentence.strip())
|
130 |
+
current_sentence = segment.strip()
|
131 |
+
else:
|
132 |
+
current_sentence += (' ' if current_sentence else '') + segment.strip()
|
133 |
+
|
134 |
+
if re.search(sentence_end, segment):
|
135 |
+
combined_sentences.append(current_sentence.strip())
|
136 |
+
current_sentence = ""
|
137 |
+
|
138 |
+
if current_sentence:
|
139 |
+
combined_sentences.append(current_sentence.strip())
|
140 |
+
|
141 |
+
return combined_sentences
|
142 |
+
|
143 |
# Gradio μΈν°νμ΄μ€
|
144 |
with gr.Blocks() as demo:
|
145 |
gr.Markdown("## YouTube μ€ν¬λ¦½νΈ μΆμΆ λ° μμ½ λꡬ")
|
|
|
155 |
analyze_button.click(
|
156 |
analyze,
|
157 |
inputs=[youtube_url_input],
|
158 |
+
outputs=[script_output, summary_output],
|
159 |
+
show_progress=True
|
160 |
)
|
161 |
|
162 |
if __name__ == "__main__":
|