Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,36 +12,6 @@ logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG
|
|
| 12 |
|
| 13 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 14 |
|
| 15 |
-
# λ¬Έμ₯ κ΅¬λΆ ν¨μ
|
| 16 |
-
def split_sentences(text):
|
| 17 |
-
# μ£Όμ΄μ§ κ΅¬λΆ κΈ°μ€μ μ¬μ©ν μ κ· ννμ
|
| 18 |
-
sentence_end = r'(λλ€|μμ|ꡬλ|ν΄μ|κ΅°μ|κ² μ΄μ|μμ€|ν΄λΌ|μμ|μμ|λ°μ|λμ|μΈμ|μ΄μ|κ²μ|ꡬμ|κ³ μ|λμ|νμ£ )(?![\w])'
|
| 19 |
-
segments = re.split(f'({sentence_end})', text)
|
| 20 |
-
|
| 21 |
-
combined_sentences = []
|
| 22 |
-
current_sentence = ""
|
| 23 |
-
|
| 24 |
-
for i in range(0, len(segments), 2):
|
| 25 |
-
segment = segments[i]
|
| 26 |
-
if i + 1 < len(segments):
|
| 27 |
-
segment += segments[i + 1]
|
| 28 |
-
|
| 29 |
-
if len(current_sentence) + len(segment) > 100:
|
| 30 |
-
if current_sentence:
|
| 31 |
-
combined_sentences.append(current_sentence.strip())
|
| 32 |
-
current_sentence = segment.strip()
|
| 33 |
-
else:
|
| 34 |
-
current_sentence += (' ' if current_sentence else '') + segment.strip()
|
| 35 |
-
|
| 36 |
-
if re.search(sentence_end, segment):
|
| 37 |
-
combined_sentences.append(current_sentence.strip())
|
| 38 |
-
current_sentence = ""
|
| 39 |
-
|
| 40 |
-
if current_sentence:
|
| 41 |
-
combined_sentences.append(current_sentence.strip())
|
| 42 |
-
|
| 43 |
-
return combined_sentences
|
| 44 |
-
|
| 45 |
def parse_api_response(response):
|
| 46 |
try:
|
| 47 |
if isinstance(response, str):
|
|
@@ -126,8 +96,8 @@ def analyze(url, progress=gr.Progress()):
|
|
| 126 |
script_sentences = split_sentences(script)
|
| 127 |
script_content = f"# {title}\n\n" + "\n".join(script_sentences)
|
| 128 |
|
| 129 |
-
# μλ¬Έ μ€ν¬λ¦½νΈ λ¨Όμ
|
| 130 |
-
yield script_content,
|
| 131 |
|
| 132 |
progress(0.5, desc="μμ½ μμ± μ€...")
|
| 133 |
summary = summarize_text(title, description, "\n".join(script_sentences))
|
|
@@ -142,6 +112,34 @@ def analyze(url, progress=gr.Progress()):
|
|
| 142 |
logging.exception(error_msg)
|
| 143 |
yield error_msg, error_msg
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
# Gradio μΈν°νμ΄μ€
|
| 146 |
with gr.Blocks() as demo:
|
| 147 |
gr.Markdown("## YouTube μ€ν¬λ¦½νΈ μΆμΆ λ° μμ½ λꡬ")
|
|
@@ -157,7 +155,8 @@ with gr.Blocks() as demo:
|
|
| 157 |
analyze_button.click(
|
| 158 |
analyze,
|
| 159 |
inputs=[youtube_url_input],
|
| 160 |
-
outputs=[script_output, summary_output]
|
|
|
|
| 161 |
)
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
|
|
|
| 12 |
|
| 13 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def parse_api_response(response):
|
| 16 |
try:
|
| 17 |
if isinstance(response, str):
|
|
|
|
| 96 |
script_sentences = split_sentences(script)
|
| 97 |
script_content = f"# {title}\n\n" + "\n".join(script_sentences)
|
| 98 |
|
| 99 |
+
# μλ¬Έ μ€ν¬λ¦½νΈ λ¨Όμ μΆλ ₯νκ³ μ§ν
|
| 100 |
+
yield script_content, "μμ½ μ€λΉ μ€..."
|
| 101 |
|
| 102 |
progress(0.5, desc="μμ½ μμ± μ€...")
|
| 103 |
summary = summarize_text(title, description, "\n".join(script_sentences))
|
|
|
|
| 112 |
logging.exception(error_msg)
|
| 113 |
yield error_msg, error_msg
|
| 114 |
|
| 115 |
+
def split_sentences(text):
|
| 116 |
+
sentence_end = r'(λλ€|μμ|ꡬλ|ν΄μ|κ΅°μ|κ² μ΄μ|μμ€|ν΄λΌ|μμ|μμ|λ°μ|λμ|μΈμ|μ΄μ|κ²μ|ꡬμ|κ³ μ|λμ|νμ£ )(?![\w])'
|
| 117 |
+
segments = re.split(f'({sentence_end})', text)
|
| 118 |
+
|
| 119 |
+
combined_sentences = []
|
| 120 |
+
current_sentence = ""
|
| 121 |
+
|
| 122 |
+
for i in range(0, len(segments), 2):
|
| 123 |
+
segment = segments[i]
|
| 124 |
+
if i + 1 < len(segments):
|
| 125 |
+
segment += segments[i + 1]
|
| 126 |
+
|
| 127 |
+
if len(current_sentence) + len(segment) > 100:
|
| 128 |
+
if current_sentence:
|
| 129 |
+
combined_sentences.append(current_sentence.strip())
|
| 130 |
+
current_sentence = segment.strip()
|
| 131 |
+
else:
|
| 132 |
+
current_sentence += (' ' if current_sentence else '') + segment.strip()
|
| 133 |
+
|
| 134 |
+
if re.search(sentence_end, segment):
|
| 135 |
+
combined_sentences.append(current_sentence.strip())
|
| 136 |
+
current_sentence = ""
|
| 137 |
+
|
| 138 |
+
if current_sentence:
|
| 139 |
+
combined_sentences.append(current_sentence.strip())
|
| 140 |
+
|
| 141 |
+
return combined_sentences
|
| 142 |
+
|
| 143 |
# Gradio μΈν°νμ΄μ€
|
| 144 |
with gr.Blocks() as demo:
|
| 145 |
gr.Markdown("## YouTube μ€ν¬λ¦½νΈ μΆμΆ λ° μμ½ λꡬ")
|
|
|
|
| 155 |
analyze_button.click(
|
| 156 |
analyze,
|
| 157 |
inputs=[youtube_url_input],
|
| 158 |
+
outputs=[script_output, summary_output],
|
| 159 |
+
show_progress=True
|
| 160 |
)
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|