fucking works!
Browse files- transcript.py +11 -7
transcript.py
CHANGED
@@ -50,8 +50,8 @@ def format_transcript(utterances):
|
|
50 |
# Write out the previous section if it exists
|
51 |
if current_text:
|
52 |
timestamp = format_timestamp(current_start)
|
53 |
-
#
|
54 |
-
section = f"Speaker {current_speaker} {timestamp}\n{' '.join(current_text).strip()}"
|
55 |
formatted_sections.append(section)
|
56 |
current_text = []
|
57 |
|
@@ -64,9 +64,7 @@ def format_transcript(utterances):
|
|
64 |
# Add the final section
|
65 |
if current_text:
|
66 |
timestamp = format_timestamp(current_start)
|
67 |
-
section = (
|
68 |
-
f"Speaker {current_speaker} {timestamp}\n{' '.join(current_text).strip()}"
|
69 |
-
)
|
70 |
formatted_sections.append(section)
|
71 |
|
72 |
return "\n\n".join(formatted_sections)
|
@@ -89,19 +87,25 @@ Please:
|
|
89 |
- Maintain natural conversation flow while improving clarity
|
90 |
|
91 |
3. Format the output consistently:
|
92 |
-
- Keep the "Speaker X
|
|
|
93 |
- Use proper punctuation and capitalization
|
94 |
- Add paragraph breaks for topic changes
|
95 |
-
-
|
|
|
96 |
|
97 |
Example input:
|
98 |
Speaker 1 00:01:15
|
|
|
99 |
Um, yeah, so like, what I was thinking was, you know, when we look at the data, the data shows us that, uh, there's this pattern, this pattern that keeps coming up again and again in the results.
|
100 |
|
101 |
Example output:
|
102 |
Speaker 1 00:01:15
|
|
|
103 |
When we look at the data, we see a consistent pattern in the results.
|
104 |
|
|
|
|
|
105 |
Enhance the following transcript, starting directly with the speaker format:
|
106 |
"""
|
107 |
|
|
|
50 |
# Write out the previous section if it exists
|
51 |
if current_text:
|
52 |
timestamp = format_timestamp(current_start)
|
53 |
+
# Add double line break after speaker/timestamp
|
54 |
+
section = f"Speaker {current_speaker + 1} {timestamp}\n\n{' '.join(current_text).strip()}"
|
55 |
formatted_sections.append(section)
|
56 |
current_text = []
|
57 |
|
|
|
64 |
# Add the final section
|
65 |
if current_text:
|
66 |
timestamp = format_timestamp(current_start)
|
67 |
+
section = f"Speaker {current_speaker + 1} {timestamp}\n\n{' '.join(current_text).strip()}"
|
|
|
|
|
68 |
formatted_sections.append(section)
|
69 |
|
70 |
return "\n\n".join(formatted_sections)
|
|
|
87 |
- Maintain natural conversation flow while improving clarity
|
88 |
|
89 |
3. Format the output consistently:
|
90 |
+
- Keep the "Speaker X 00:00:00" format (no brackets, no other formatting)
|
91 |
+
- Add TWO line breaks between speaker/timestamp and the text
|
92 |
- Use proper punctuation and capitalization
|
93 |
- Add paragraph breaks for topic changes
|
94 |
+
- When you add paragraph breaks between the same speaker's remarks, no need to restate the speaker attribution
|
95 |
+
- Preserve distinct speaker turns
|
96 |
|
97 |
Example input:
|
98 |
Speaker 1 00:01:15
|
99 |
+
|
100 |
Um, yeah, so like, what I was thinking was, you know, when we look at the data, the data shows us that, uh, there's this pattern, this pattern that keeps coming up again and again in the results.
|
101 |
|
102 |
Example output:
|
103 |
Speaker 1 00:01:15
|
104 |
+
|
105 |
When we look at the data, we see a consistent pattern in the results.
|
106 |
|
107 |
+
And when we examine the second part of the analysis, it reveals a completely different finding.
|
108 |
+
|
109 |
Enhance the following transcript, starting directly with the speaker format:
|
110 |
"""
|
111 |
|