Tai Truong
fix readme
d202ada
import assemblyai as aai
from loguru import logger
from langflow.custom import Component
from langflow.io import DataInput, DropdownInput, FloatInput, IntInput, MultilineInput, Output, SecretStrInput
from langflow.schema import Data
class AssemblyAILeMUR(Component):
display_name = "AssemblyAI LeMUR"
description = "Apply Large Language Models to spoken data using the AssemblyAI LeMUR framework"
documentation = "https://www.assemblyai.com/docs/lemur"
icon = "AssemblyAI"
inputs = [
SecretStrInput(
name="api_key",
display_name="Assembly API Key",
info="Your AssemblyAI API key. You can get one from https://www.assemblyai.com/",
advanced=False,
),
DataInput(
name="transcription_result",
display_name="Transcription Result",
info="The transcription result from AssemblyAI",
),
MultilineInput(
name="prompt",
display_name="Input Prompt",
info="The text to prompt the model",
),
DropdownInput(
name="final_model",
display_name="Final Model",
options=["claude3_5_sonnet", "claude3_opus", "claude3_haiku", "claude3_sonnet"],
value="claude3_5_sonnet",
info="The model that is used for the final prompt after compression is performed",
advanced=True,
),
FloatInput(
name="temperature",
display_name="Temperature",
advanced=True,
value=0.0,
info="The temperature to use for the model",
),
IntInput(
name="max_output_size",
display_name=" Max Output Size",
advanced=True,
value=2000,
info="Max output size in tokens, up to 4000",
),
DropdownInput(
name="endpoint",
display_name="Endpoint",
options=["task", "summary", "question-answer"],
value="task",
info=(
"The LeMUR endpoint to use. For 'summary' and 'question-answer',"
" no prompt input is needed. See https://www.assemblyai.com/docs/api-reference/lemur/ for more info."
),
advanced=True,
),
MultilineInput(
name="questions",
display_name="Questions",
info="Comma-separated list of your questions. Only used if Endpoint is 'question-answer'",
advanced=True,
),
MultilineInput(
name="transcript_ids",
display_name="Transcript IDs",
info=(
"Comma-separated list of transcript IDs. LeMUR can perform actions over multiple transcripts."
" If provided, the Transcription Result is ignored."
),
advanced=True,
),
]
outputs = [
Output(display_name="LeMUR Response", name="lemur_response", method="run_lemur"),
]
def run_lemur(self) -> Data:
"""Use the LeMUR task endpoint to input the LLM prompt."""
aai.settings.api_key = self.api_key
if not self.transcription_result and not self.transcript_ids:
error = "Either a Transcription Result or Transcript IDs must be provided"
self.status = error
return Data(data={"error": error})
if self.transcription_result and self.transcription_result.data.get("error"):
# error message from the previous step
self.status = self.transcription_result.data["error"]
return self.transcription_result
if self.endpoint == "task" and not self.prompt:
self.status = "No prompt specified for the task endpoint"
return Data(data={"error": "No prompt specified"})
if self.endpoint == "question-answer" and not self.questions:
error = "No Questions were provided for the question-answer endpoint"
self.status = error
return Data(data={"error": error})
# Check for valid transcripts
transcript_ids = None
if self.transcription_result and "id" in self.transcription_result.data:
transcript_ids = [self.transcription_result.data["id"]]
elif self.transcript_ids:
transcript_ids = self.transcript_ids.split(",") or []
transcript_ids = [t.strip() for t in transcript_ids]
if not transcript_ids:
error = "Either a valid Transcription Result or valid Transcript IDs must be provided"
self.status = error
return Data(data={"error": error})
# Get TranscriptGroup and check if there is any error
transcript_group = aai.TranscriptGroup(transcript_ids=transcript_ids)
transcript_group, failures = transcript_group.wait_for_completion(return_failures=True)
if failures:
error = f"Getting transcriptions failed: {failures[0]}"
self.status = error
return Data(data={"error": error})
for t in transcript_group.transcripts:
if t.status == aai.TranscriptStatus.error:
self.status = t.error
return Data(data={"error": t.error})
# Perform LeMUR action
try:
response = self.perform_lemur_action(transcript_group, self.endpoint)
except Exception as e: # noqa: BLE001
logger.opt(exception=True).debug("Error running LeMUR")
error = f"An Error happened: {e}"
self.status = error
return Data(data={"error": error})
result = Data(data=response)
self.status = result
return result
def perform_lemur_action(self, transcript_group: aai.TranscriptGroup, endpoint: str) -> dict:
logger.info("Endpoint:", endpoint, type(endpoint))
if endpoint == "task":
result = transcript_group.lemur.task(
prompt=self.prompt,
final_model=self.get_final_model(self.final_model),
temperature=self.temperature,
max_output_size=self.max_output_size,
)
elif endpoint == "summary":
result = transcript_group.lemur.summarize(
final_model=self.get_final_model(self.final_model),
temperature=self.temperature,
max_output_size=self.max_output_size,
)
elif endpoint == "question-answer":
questions = self.questions.split(",")
questions = [aai.LemurQuestion(question=q) for q in questions]
result = transcript_group.lemur.question(
questions=questions,
final_model=self.get_final_model(self.final_model),
temperature=self.temperature,
max_output_size=self.max_output_size,
)
else:
msg = f"Endpoint not supported: {endpoint}"
raise ValueError(msg)
return result.dict()
def get_final_model(self, model_name: str) -> aai.LemurModel:
if model_name == "claude3_5_sonnet":
return aai.LemurModel.claude3_5_sonnet
if model_name == "claude3_opus":
return aai.LemurModel.claude3_opus
if model_name == "claude3_haiku":
return aai.LemurModel.claude3_haiku
if model_name == "claude3_sonnet":
return aai.LemurModel.claude3_sonnet
msg = f"Model name not supported: {model_name}"
raise ValueError(msg)