Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -42,18 +42,35 @@ def generate_wav_header(sample_rate: int, num_channels: int, sample_width: int,
|
|
42 |
|
43 |
def custom_split_text(text: str) -> list:
|
44 |
"""
|
45 |
-
Custom splitting:
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
"""
|
47 |
words = text.split()
|
48 |
chunks = []
|
49 |
-
chunk_size =
|
50 |
start = 0
|
51 |
while start < len(words):
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return chunks
|
58 |
|
59 |
|
|
|
42 |
|
43 |
def custom_split_text(text: str) -> list:
|
44 |
"""
|
45 |
+
Custom splitting:
|
46 |
+
- Start with a chunk size of 2 words.
|
47 |
+
- For each chunk, if a period (".") is found in any word (except if it’s the very last word),
|
48 |
+
then split the chunk at that word (include words up to that word).
|
49 |
+
- Otherwise, use the current chunk size.
|
50 |
+
- For subsequent chunks, increase the chunk size by 2.
|
51 |
+
- If there are fewer than the desired number of words for a full chunk, add all remaining words.
|
52 |
"""
|
53 |
words = text.split()
|
54 |
chunks = []
|
55 |
+
chunk_size = 2
|
56 |
start = 0
|
57 |
while start < len(words):
|
58 |
+
candidate_end = start + chunk_size
|
59 |
+
if candidate_end > len(words):
|
60 |
+
candidate_end = len(words)
|
61 |
+
chunk_words = words[start:candidate_end]
|
62 |
+
# Look for a period in any word except the last one.
|
63 |
+
split_index = None
|
64 |
+
for i in range(len(chunk_words) - 1):
|
65 |
+
if '.' in chunk_words[i]:
|
66 |
+
split_index = i
|
67 |
+
break
|
68 |
+
if split_index is not None:
|
69 |
+
candidate_end = start + split_index + 1
|
70 |
+
chunk_words = words[start:candidate_end]
|
71 |
+
chunks.append(" ".join(chunk_words))
|
72 |
+
start = candidate_end
|
73 |
+
chunk_size += 2 # Increase the chunk size by 2 for the next iteration.
|
74 |
return chunks
|
75 |
|
76 |
|