bcci commited on
Commit
65e1914
·
verified ·
1 Parent(s): eb95b12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -42,18 +42,35 @@ def generate_wav_header(sample_rate: int, num_channels: int, sample_width: int,
42
 
43
  def custom_split_text(text: str) -> list:
44
  """
45
- Custom splitting: split text into chunks where each chunk doubles in size.
 
 
 
 
 
 
46
  """
47
  words = text.split()
48
  chunks = []
49
- chunk_size = 1
50
  start = 0
51
  while start < len(words):
52
- end = start + chunk_size
53
- chunk = " ".join(words[start:end])
54
- chunks.append(chunk)
55
- start = end
56
- chunk_size *= 2 # double the chunk size for the next iteration
 
 
 
 
 
 
 
 
 
 
 
57
  return chunks
58
 
59
 
 
42
 
43
  def custom_split_text(text: str) -> list:
44
  """
45
+ Custom splitting:
46
+ - Start with a chunk size of 2 words.
47
+ - For each chunk, if a period (".") is found in any word (except if it’s the very last word),
48
+ then split the chunk at that word (include words up to that word).
49
+ - Otherwise, use the current chunk size.
50
+ - For subsequent chunks, increase the chunk size by 2.
51
+ - If there are fewer than the desired number of words for a full chunk, add all remaining words.
52
  """
53
  words = text.split()
54
  chunks = []
55
+ chunk_size = 2
56
  start = 0
57
  while start < len(words):
58
+ candidate_end = start + chunk_size
59
+ if candidate_end > len(words):
60
+ candidate_end = len(words)
61
+ chunk_words = words[start:candidate_end]
62
+ # Look for a period in any word except the last one.
63
+ split_index = None
64
+ for i in range(len(chunk_words) - 1):
65
+ if '.' in chunk_words[i]:
66
+ split_index = i
67
+ break
68
+ if split_index is not None:
69
+ candidate_end = start + split_index + 1
70
+ chunk_words = words[start:candidate_end]
71
+ chunks.append(" ".join(chunk_words))
72
+ start = candidate_end
73
+ chunk_size += 2 # Increase the chunk size by 2 for the next iteration.
74
  return chunks
75
 
76