AIRider commited on
Commit
5f0de54
Β·
verified Β·
1 Parent(s): 0300f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -14,24 +14,32 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
14
 
15
  # λ¬Έμž₯ ꡬ뢄 ν•¨μˆ˜
16
  def split_sentences(text):
17
- sentences = re.split(r"(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])", text)
 
 
 
18
  combined_sentences = []
19
  current_sentence = ""
20
- for i in range(0, len(sentences), 2):
21
- if i + 1 < len(sentences):
22
- sentence = sentences[i] + sentences[i + 1]
23
- else:
24
- sentence = sentences[i]
25
- if len(current_sentence) + len(sentence) > 100: # 100자λ₯Ό μ΄ˆκ³Όν•  경우
26
- combined_sentences.append(current_sentence.strip())
27
- current_sentence = sentence.strip()
 
 
28
  else:
29
- current_sentence += sentence
30
- if sentence.endswith(('.', '?', '!')):
 
31
  combined_sentences.append(current_sentence.strip())
32
  current_sentence = ""
 
33
  if current_sentence:
34
  combined_sentences.append(current_sentence.strip())
 
35
  return combined_sentences
36
 
37
  def parse_api_response(response):
@@ -114,17 +122,17 @@ def analyze(url, progress=gr.Progress()):
114
  progress(0, desc="슀크립트 μΆ”μΆœ 쀑...")
115
  title, description, script = get_youtube_script(url)
116
 
117
- progress(33, desc="원문 슀크립트 처리 쀑...")
118
  script_sentences = split_sentences(script)
119
  script_content = f"# {title}\n\n" + "\n".join(script_sentences)
120
 
121
- progress(66, desc="μš”μ•½ 생성 쀑...")
122
  summary = summarize_text(title, description, script)
123
 
124
- progress(88, desc="μš”μ•½ λ‚΄μš© 정리 쀑...")
125
  summary_content = f"# {title}\n\n{summary}"
126
 
127
- progress(100, desc="μ™„λ£Œ")
128
  return script_content, summary_content
129
  except Exception as e:
130
  error_msg = f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
 
14
 
15
  # λ¬Έμž₯ ꡬ뢄 ν•¨μˆ˜
16
  def split_sentences(text):
17
+ # 주어진 ꡬ뢄 기쀀을 μ‚¬μš©ν•œ μ •κ·œ ν‘œν˜„μ‹
18
+ sentence_end = r'(λ‹ˆλ‹€|μ—μš”|κ΅¬λ‚˜|ν•΄μš”|κ΅°μš”|κ² μ–΄μš”|μ‹œμ˜€|해라|μ˜ˆμš”|μ•„μš”|λ°μš”|λŒ€μš”|μ„Έμš”|μ–΄μš”|κ²Œμš”|κ΅¬μš”|κ³ μš”|λ‚˜μš”|ν•˜μ£ )(?![\w])'
19
+ segments = re.split(f'({sentence_end})', text)
20
+
21
  combined_sentences = []
22
  current_sentence = ""
23
+
24
+ for i in range(0, len(segments), 2):
25
+ segment = segments[i]
26
+ if i + 1 < len(segments):
27
+ segment += segments[i + 1]
28
+
29
+ if len(current_sentence) + len(segment) > 100:
30
+ if current_sentence:
31
+ combined_sentences.append(current_sentence.strip())
32
+ current_sentence = segment.strip()
33
  else:
34
+ current_sentence += (' ' if current_sentence else '') + segment.strip()
35
+
36
+ if re.search(sentence_end, segment):
37
  combined_sentences.append(current_sentence.strip())
38
  current_sentence = ""
39
+
40
  if current_sentence:
41
  combined_sentences.append(current_sentence.strip())
42
+
43
  return combined_sentences
44
 
45
  def parse_api_response(response):
 
122
  progress(0, desc="슀크립트 μΆ”μΆœ 쀑...")
123
  title, description, script = get_youtube_script(url)
124
 
125
+ progress(0.33, desc="원문 슀크립트 처리 쀑...")
126
  script_sentences = split_sentences(script)
127
  script_content = f"# {title}\n\n" + "\n".join(script_sentences)
128
 
129
+ progress(0.66, desc="μš”μ•½ 생성 쀑...")
130
  summary = summarize_text(title, description, script)
131
 
132
+ progress(0.88, desc="μš”μ•½ λ‚΄μš© 정리 쀑...")
133
  summary_content = f"# {title}\n\n{summary}"
134
 
135
+ progress(1.0, desc="μ™„λ£Œ")
136
  return script_content, summary_content
137
  except Exception as e:
138
  error_msg = f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"