from huggingface_hub import InferenceClient

MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"

def clean_transcript(prompt, transcript):
    messages = [
        {"role": "user", "content": PROMPT}
    ]
    client = InferenceClient(model=MODEL_NAME)
    for c in client.chat_completion(messages, max_tokens=200, stream=True):
        token = c.choices[0].delta.content
        print(token, end="")