Spaces:
Sleeping
Sleeping
Commit
·
eb33652
1
Parent(s):
fecd16e
update: transcript processor
Browse files
app.py
CHANGED
|
@@ -59,51 +59,199 @@ class TranscriptProcessor:
|
|
| 59 |
|
| 60 |
def _process_transcript(self) -> None:
|
| 61 |
results = self.transcript_data["results"]
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
)
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
def _create_segment(
|
| 85 |
self, speaker_id: str, start: float, end: float, items: list
|
| 86 |
) -> None:
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
if words:
|
| 98 |
self.segments.append(
|
| 99 |
TranscriptSegment(
|
| 100 |
speaker_id=speaker_id,
|
| 101 |
start_time=start,
|
| 102 |
end_time=end,
|
| 103 |
-
text=" ".join(
|
| 104 |
)
|
| 105 |
)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
def _create_formatted_transcript(self) -> None:
|
| 108 |
"""Create formatted transcript with default speaker labels."""
|
| 109 |
formatted_segments = []
|
|
@@ -314,13 +462,30 @@ Total takes: 2
|
|
| 314 |
system_prompt = f"""You are analyzing a transcript for Call ID: {cid}, Session ID: {rsid}, Origin: {origin}, and Call Type: {ct}.
|
| 315 |
|
| 316 |
CORE REQUIREMENTS:
|
| 317 |
-
1.
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
"""
|
| 325 |
|
| 326 |
user_prompt = f"""Call Details:
|
|
@@ -328,40 +493,34 @@ User ID: {uid}
|
|
| 328 |
Call ID: {cid}
|
| 329 |
Speakers: {", ".join(speaker_mapping.values())}
|
| 330 |
Transcript: {transcript}
|
| 331 |
-
Your task is to analyze speakers' discussions to identify compelling social media clips. For each speaker, identify key topics that mention people, news, events, trends, or sources.
|
| 332 |
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
**Speaker Name**
|
| 337 |
1. [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
|
| 338 |
2. [Topic title <div id='topic' style="display: inline"> 43s at 14:45 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{928}}&uid={{uid}})
|
| 339 |
3. [Topic title <div id='topic' style="display: inline"> 58s at 16:20 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1038}}&uid={{uid}})
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
- Start time (st): Must begin exactly when speaker starts discussing the specific topic.
|
| 343 |
-
- End time (et): Must end exactly when either:
|
| 344 |
-
* The speaker completes their point, or
|
| 345 |
-
* Before the next speaker begins.
|
| 346 |
-
- NO OVERLAP: Selected duration must NEVER include dialogue from other speakers.
|
| 347 |
-
- Duration limits: Minimum 20 seconds, maximum 1 minute 30 seconds.
|
| 348 |
-
- Time format: "Xs at HH:MM" where X = seconds.
|
| 349 |
-
- URL parameters: Convert display times to seconds.
|
| 350 |
-
Example: "25s at 10:13" → st=613&et=638.
|
| 351 |
-
|
| 352 |
-
3. FORMATTING RULES:
|
| 353 |
-
- Speaker names: Use markdown bold (**Name**).
|
| 354 |
-
- Topic titles: First word capitalized, rest lowercase.
|
| 355 |
-
- Each topic must be a clickable link with correct timestamp.
|
| 356 |
-
- URL format: {{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_time_in_sec}}&et={{end_time_in_sec}}&uid={{uid}}).
|
| 357 |
-
|
| 358 |
-
4. TOPIC SELECTION:
|
| 359 |
-
- Prioritize engaging, viral-worthy content.
|
| 360 |
-
- Minimum 2 topics per speaker, aim for 3 if available (SKIP THE HOST if no compelling content).
|
| 361 |
-
- Topics should be self-contained discussions within the timestamp.
|
| 362 |
-
- Skip speakers if fewer than 2 compelling topics found.
|
| 363 |
"""
|
| 364 |
-
print(user_prompt)
|
| 365 |
|
| 366 |
completion = client.chat.completions.create(
|
| 367 |
model="gpt-4o",
|
|
@@ -479,7 +638,6 @@ If the user provides a link to the agenda, use the correct_speaker_name_with_url
|
|
| 479 |
If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
|
| 480 |
"""
|
| 481 |
messages = [{"role": "system", "content": prompt}]
|
| 482 |
-
print(messages[0]["content"])
|
| 483 |
|
| 484 |
for user_msg, assistant_msg in chat_history:
|
| 485 |
if user_msg is not None: # Skip the initial message where user_msg is None
|
|
@@ -522,10 +680,14 @@ If the user provides the correct call type, use the correct_call_type function t
|
|
| 522 |
corrected_speaker_mapping = (
|
| 523 |
transcript_processor.speaker_mapping
|
| 524 |
)
|
|
|
|
|
|
|
| 525 |
function_call_result_message = {
|
| 526 |
"role": "tool",
|
| 527 |
"content": json.dumps(
|
| 528 |
-
{
|
|
|
|
|
|
|
| 529 |
),
|
| 530 |
"name": tool_call.function.name,
|
| 531 |
"tool_call_id": tool_call.id,
|
|
@@ -534,13 +696,18 @@ If the user provides the correct call type, use the correct_call_type function t
|
|
| 534 |
|
| 535 |
# Get final response after tool call
|
| 536 |
final_response = client.chat.completions.create(
|
| 537 |
-
model="gpt-4o",
|
|
|
|
|
|
|
| 538 |
)
|
| 539 |
|
| 540 |
-
|
| 541 |
for final_chunk in final_response:
|
| 542 |
if final_chunk.choices[0].delta.content:
|
| 543 |
-
|
|
|
|
|
|
|
|
|
|
| 544 |
return
|
| 545 |
else:
|
| 546 |
function_call_result_message = {
|
|
|
|
| 59 |
|
| 60 |
def _process_transcript(self) -> None:
|
| 61 |
results = self.transcript_data["results"]
|
| 62 |
+
current_words = []
|
| 63 |
+
current_speaker = None
|
| 64 |
+
current_start = None
|
| 65 |
+
current_items = []
|
| 66 |
+
|
| 67 |
+
for item in results["items"]:
|
| 68 |
+
if item["type"] == "pronunciation":
|
| 69 |
+
speaker = (
|
| 70 |
+
item.get("speaker_label", "").replace("spk_", "").replace("spk", "")
|
| 71 |
+
)
|
| 72 |
|
| 73 |
+
# Initialize on first pronunciation item
|
| 74 |
+
if current_speaker is None:
|
| 75 |
+
current_speaker = speaker
|
| 76 |
+
current_start = float(item["start_time"])
|
| 77 |
+
|
| 78 |
+
# Check for speaker change
|
| 79 |
+
if speaker != current_speaker:
|
| 80 |
+
if current_items:
|
| 81 |
+
self._create_segment(
|
| 82 |
+
current_speaker,
|
| 83 |
+
current_start,
|
| 84 |
+
float(item["start_time"]),
|
| 85 |
+
current_items,
|
| 86 |
+
)
|
| 87 |
+
current_items = []
|
| 88 |
+
current_words = []
|
| 89 |
+
current_speaker = speaker
|
| 90 |
+
current_start = float(item["start_time"])
|
| 91 |
+
|
| 92 |
+
current_items.append(item)
|
| 93 |
+
current_words.append(item["alternatives"][0]["content"])
|
| 94 |
+
elif item["type"] == "punctuation":
|
| 95 |
+
current_items.append(item)
|
| 96 |
+
# Only check for segment break if we're over 20 words
|
| 97 |
+
if len(current_words) >= 20:
|
| 98 |
+
# Break on this punctuation
|
| 99 |
+
next_item = next(
|
| 100 |
+
(
|
| 101 |
+
it
|
| 102 |
+
for it in results["items"][
|
| 103 |
+
results["items"].index(item) + 1 :
|
| 104 |
+
]
|
| 105 |
+
if it["type"] == "pronunciation"
|
| 106 |
+
),
|
| 107 |
+
None,
|
| 108 |
)
|
| 109 |
+
if next_item:
|
| 110 |
+
self._create_segment(
|
| 111 |
+
current_speaker,
|
| 112 |
+
current_start,
|
| 113 |
+
float(next_item["start_time"]),
|
| 114 |
+
current_items,
|
| 115 |
+
)
|
| 116 |
+
current_items = []
|
| 117 |
+
current_words = []
|
| 118 |
+
current_start = float(next_item["start_time"])
|
| 119 |
+
|
| 120 |
+
# Don't forget the last segment
|
| 121 |
+
if current_items:
|
| 122 |
+
last_time = max(
|
| 123 |
+
float(item["end_time"])
|
| 124 |
+
for item in current_items
|
| 125 |
+
if item["type"] == "pronunciation"
|
| 126 |
+
)
|
| 127 |
+
self._create_segment(
|
| 128 |
+
current_speaker, current_start, last_time, current_items
|
| 129 |
+
)
|
| 130 |
|
| 131 |
def _create_segment(
|
| 132 |
self, speaker_id: str, start: float, end: float, items: list
|
| 133 |
) -> None:
|
| 134 |
+
segment_content = []
|
| 135 |
+
for item in items:
|
| 136 |
+
if item["type"] == "pronunciation":
|
| 137 |
+
segment_content.append(item["alternatives"][0]["content"])
|
| 138 |
+
elif item["type"] == "punctuation":
|
| 139 |
+
# Append punctuation to the last word without a space
|
| 140 |
+
if segment_content:
|
| 141 |
+
segment_content[-1] += item["alternatives"][0]["content"]
|
| 142 |
+
|
| 143 |
+
if segment_content:
|
|
|
|
| 144 |
self.segments.append(
|
| 145 |
TranscriptSegment(
|
| 146 |
speaker_id=speaker_id,
|
| 147 |
start_time=start,
|
| 148 |
end_time=end,
|
| 149 |
+
text=" ".join(segment_content),
|
| 150 |
)
|
| 151 |
)
|
| 152 |
|
| 153 |
+
def correct_speaker_mapping_with_agenda(self, url: str) -> None:
|
| 154 |
+
"""Fetch agenda from a URL and correct the speaker mapping using OpenAI."""
|
| 155 |
+
try:
|
| 156 |
+
if not url.startswith("http"):
|
| 157 |
+
# add https to the url
|
| 158 |
+
url = "https://" + url
|
| 159 |
+
|
| 160 |
+
response = requests.get(url)
|
| 161 |
+
response.raise_for_status()
|
| 162 |
+
html_content = response.text
|
| 163 |
+
|
| 164 |
+
# Parse the HTML to find the desired description
|
| 165 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
| 166 |
+
description_tag = soup.find(
|
| 167 |
+
"script", {"type": "application/ld+json"}
|
| 168 |
+
) # Find the ld+json metadata block
|
| 169 |
+
agenda = ""
|
| 170 |
+
|
| 171 |
+
if description_tag:
|
| 172 |
+
# Extract the JSON content
|
| 173 |
+
json_data = json.loads(description_tag.string)
|
| 174 |
+
if "description" in json_data:
|
| 175 |
+
agenda = json_data["description"]
|
| 176 |
+
else:
|
| 177 |
+
print("Agenda description not found in the JSON metadata.")
|
| 178 |
+
else:
|
| 179 |
+
print("No structured data (ld+json) found.")
|
| 180 |
+
|
| 181 |
+
if not agenda:
|
| 182 |
+
print("No agenda found in the structured metadata. Trying meta tags.")
|
| 183 |
+
|
| 184 |
+
# Fallback: Use meta description if ld+json doesn't have it
|
| 185 |
+
meta_description = soup.find("meta", {"name": "description"})
|
| 186 |
+
agenda = meta_description["content"] if meta_description else ""
|
| 187 |
+
|
| 188 |
+
if not agenda:
|
| 189 |
+
print("No agenda found in any description tags.")
|
| 190 |
+
return
|
| 191 |
+
|
| 192 |
+
print(self.speaker_mapping)
|
| 193 |
+
|
| 194 |
+
prompt = (
|
| 195 |
+
f"Given the original speaker mapping {self.speaker_mapping}, agenda:\n{agenda}, and the transcript: {self.formatted_transcript}\n\n"
|
| 196 |
+
"Some speaker names in the mapping might have spelling errors or be incomplete."
|
| 197 |
+
"Remember that the content in agenda is accurate and transcript can have errors so prioritize the spellings and names in the agenda content."
|
| 198 |
+
"If the speaker name and introduction is similar to the agenda, update the speaker name in the mapping."
|
| 199 |
+
"Please correct the names based on the agenda. Return the corrected mapping in JSON format as "
|
| 200 |
+
"{'spk_0': 'Correct Name', 'spk_1': 'Correct Name', ...}."
|
| 201 |
+
"You should only update the name if the name sounds very similar, or there is a good spelling overlap/ The Speaker Introduction matches the description of the Talk from Agends. If the name is totally unrelated, keep the original name."
|
| 202 |
+
"You should always include all the speakers in the mapping from the original mapping, even if you don't update their names. i.e if there are 4 speakers in original mapping, new mapping should have 4 speakers always, ignore all the other spekaers in the agenda. I REPEAT DO NOT ADD OTHER NEW SPEAKERS IN THE MAPPING."
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
client = OpenAI()
|
| 206 |
+
|
| 207 |
+
completion = client.chat.completions.create(
|
| 208 |
+
model="gpt-4o-mini",
|
| 209 |
+
messages=[
|
| 210 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 211 |
+
{"role": "user", "content": prompt},
|
| 212 |
+
],
|
| 213 |
+
temperature=0,
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
response_text = completion.choices[0].message.content.strip()
|
| 217 |
+
try:
|
| 218 |
+
corrected_mapping = json.loads(response_text)
|
| 219 |
+
except Exception:
|
| 220 |
+
response_text = response_text[
|
| 221 |
+
response_text.find("{") : response_text.rfind("}") + 1
|
| 222 |
+
]
|
| 223 |
+
try:
|
| 224 |
+
corrected_mapping = json.loads(response_text)
|
| 225 |
+
except json.JSONDecodeError:
|
| 226 |
+
print(
|
| 227 |
+
"Error parsing corrected speaker mapping JSON, keeping the original mapping."
|
| 228 |
+
)
|
| 229 |
+
corrected_mapping = self.speaker_mapping
|
| 230 |
+
# Update the speaker mapping with corrected names
|
| 231 |
+
self.speaker_mapping = corrected_mapping
|
| 232 |
+
print("Corrected Speaker Mapping:", self.speaker_mapping)
|
| 233 |
+
|
| 234 |
+
# Update the transcript segments with corrected names
|
| 235 |
+
for segment in self.segments:
|
| 236 |
+
spk_id = f"spk_{segment.speaker_id}"
|
| 237 |
+
segment.speaker_name = self.speaker_mapping.get(spk_id, spk_id)
|
| 238 |
+
|
| 239 |
+
# Recreate the formatted transcript with corrected names
|
| 240 |
+
formatted_segments = []
|
| 241 |
+
for seg in self.segments:
|
| 242 |
+
start_time_str = self._format_time(seg.start_time)
|
| 243 |
+
end_time_str = self._format_time(seg.end_time)
|
| 244 |
+
formatted_segments.append(
|
| 245 |
+
f"time_stamp: {start_time_str}-{end_time_str}\n"
|
| 246 |
+
f"{seg.speaker_name}: {seg.text}\n"
|
| 247 |
+
)
|
| 248 |
+
self.formatted_transcript = "\n".join(formatted_segments)
|
| 249 |
+
|
| 250 |
+
except requests.exceptions.RequestException as e:
|
| 251 |
+
print(f" ching agenda from URL: {str(e)}")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
print(f"Error correcting speaker mapping: {str(e)}")
|
| 254 |
+
|
| 255 |
def _create_formatted_transcript(self) -> None:
|
| 256 |
"""Create formatted transcript with default speaker labels."""
|
| 257 |
formatted_segments = []
|
|
|
|
| 462 |
system_prompt = f"""You are analyzing a transcript for Call ID: {cid}, Session ID: {rsid}, Origin: {origin}, and Call Type: {ct}.
|
| 463 |
|
| 464 |
CORE REQUIREMENTS:
|
| 465 |
+
1. SPEAKER ISOLATION: When creating a clip, you must:
|
| 466 |
+
- Include ONLY continuous dialogue from ONE speaker
|
| 467 |
+
- The speaker must talk continuously without any interruptions
|
| 468 |
+
- As soon as another speaker starts talking, the clip MUST end
|
| 469 |
+
- Never combine dialogue across interruptions, even from the same speaker
|
| 470 |
+
|
| 471 |
+
2. DURATION RULES:
|
| 472 |
+
- Each clip must be 20-100 seconds of CONTINUOUS speech
|
| 473 |
+
- If a speaker's dialogue is interrupted before 20 seconds, it cannot be used
|
| 474 |
+
- Clock starts when speaker begins and must end before any other speaker starts
|
| 475 |
+
|
| 476 |
+
3. TOPIC COHERENCE:
|
| 477 |
+
- Each clip must cover one complete topic/thought
|
| 478 |
+
- Must end before speaker changes topics
|
| 479 |
+
- Content should be engaging and viral-worthy
|
| 480 |
+
|
| 481 |
+
4. SPEAKER COVERAGE:
|
| 482 |
+
- Minimum 2 topics per speaker, aim for 3 if good content exists
|
| 483 |
+
- Host can be skipped if no compelling content
|
| 484 |
+
|
| 485 |
+
CRITICAL: When analyzing timestamps, you must verify that:
|
| 486 |
+
1. No other speaker talks during the selected timeframe
|
| 487 |
+
2. The speaker talks continuously for at least 20 seconds
|
| 488 |
+
3. The clip ends BEFORE any interruption or speaker change
|
| 489 |
"""
|
| 490 |
|
| 491 |
user_prompt = f"""Call Details:
|
|
|
|
| 493 |
Call ID: {cid}
|
| 494 |
Speakers: {", ".join(speaker_mapping.values())}
|
| 495 |
Transcript: {transcript}
|
|
|
|
| 496 |
|
| 497 |
+
Your task is to create social media clips following these strict rules:
|
| 498 |
+
|
| 499 |
+
1. TIMESTAMP SELECTION:
|
| 500 |
+
- You must check the transcript line by line
|
| 501 |
+
- Verify speaker continuity with NO interruptions
|
| 502 |
+
- End clips immediately before any other speaker starts
|
| 503 |
+
- If Speaker A talks from 1:00-1:10, then Speaker B talks, then Speaker A resumes at 1:15, these must be separate clips
|
| 504 |
+
- Never combine timestamps across interruptions
|
| 505 |
|
| 506 |
+
2. CLIP REQUIREMENTS:
|
| 507 |
+
- Minimum 20 seconds of CONTINUOUS speech
|
| 508 |
+
- Maximum 100 seconds
|
| 509 |
+
- Single speaker only
|
| 510 |
+
- Must end before any interruption
|
| 511 |
+
- Complete thoughts/topics only
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
Return Format requirements:
|
| 515 |
+
SPEAKER FORMAT:
|
| 516 |
**Speaker Name**
|
| 517 |
1. [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
|
| 518 |
2. [Topic title <div id='topic' style="display: inline"> 43s at 14:45 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{885}}&et={{928}}&uid={{uid}})
|
| 519 |
3. [Topic title <div id='topic' style="display: inline"> 58s at 16:20 </div>]({{link_start}}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{980}}&et={{1038}}&uid={{uid}})
|
| 520 |
+
**Speaker Name**
|
| 521 |
+
....
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
"""
|
| 523 |
+
print(user_prompt, speaker_mapping)
|
| 524 |
|
| 525 |
completion = client.chat.completions.create(
|
| 526 |
model="gpt-4o",
|
|
|
|
| 638 |
If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
|
| 639 |
"""
|
| 640 |
messages = [{"role": "system", "content": prompt}]
|
|
|
|
| 641 |
|
| 642 |
for user_msg, assistant_msg in chat_history:
|
| 643 |
if user_msg is not None: # Skip the initial message where user_msg is None
|
|
|
|
| 680 |
corrected_speaker_mapping = (
|
| 681 |
transcript_processor.speaker_mapping
|
| 682 |
)
|
| 683 |
+
messages.append(response.choices[0].message)
|
| 684 |
+
|
| 685 |
function_call_result_message = {
|
| 686 |
"role": "tool",
|
| 687 |
"content": json.dumps(
|
| 688 |
+
{
|
| 689 |
+
"speaker_mapping": f"Corrected Speaker Mapping... {corrected_speaker_mapping}"
|
| 690 |
+
}
|
| 691 |
),
|
| 692 |
"name": tool_call.function.name,
|
| 693 |
"tool_call_id": tool_call.id,
|
|
|
|
| 696 |
|
| 697 |
# Get final response after tool call
|
| 698 |
final_response = client.chat.completions.create(
|
| 699 |
+
model="gpt-4o",
|
| 700 |
+
messages=messages,
|
| 701 |
+
stream=True,
|
| 702 |
)
|
| 703 |
|
| 704 |
+
collected_chunk = ""
|
| 705 |
for final_chunk in final_response:
|
| 706 |
if final_chunk.choices[0].delta.content:
|
| 707 |
+
collected_chunk += final_chunk.choices[
|
| 708 |
+
0
|
| 709 |
+
].delta.content
|
| 710 |
+
yield collected_chunk
|
| 711 |
return
|
| 712 |
else:
|
| 713 |
function_call_result_message = {
|