import os import tempfile import streamlit as st from google import genai from jinja2 import Template st.title("Audio Transcription with Speaker Identification") st.write("Upload an audio file to generate a transcript with speakers identified.") # API Key Input api_key_input = st.text_input( "Gemini API Key", type="password", help="You can also set it via GEMINI_API_KEY environment variable." ) api_key = api_key_input or os.getenv("GEMINI_API_KEY") # Speakers Input speakers_input = st.text_input( "Known Speakers (comma-separated)", help="List known speaker names. Leave empty if unknown." ) speakers = [s.strip() for s in speakers_input.split(",")] if speakers_input else [] # File Upload audio_file = st.file_uploader( "Upload Audio File", type=["mp3", "wav", "m4a", "ogg", "mp4"] ) if st.button("Generate Transcript"): if not api_key: st.error("Please provide a Gemini API key.") elif not audio_file: st.error("Please upload an audio file.") else: with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(audio_file.read()) tmp_file_path = tmp_file.name try: # Initialize GenAI client client = genai.Client(api_key=api_key) # Upload audio file uploaded_file = client.files.upload(file=tmp_file_path) # New token counting functionality try: token_info = client.models.count_tokens( model='gemini-2.0-flash', contents=[uploaded_file] ) st.info(f"File contains approximately {token_info.total_tokens} tokens") except AttributeError: st.warning("Token counting not available in current API version") # Create prompt template prompt_template = Template("""[...your existing template here...]""") prompt = prompt_template.render(speakers=speakers) # Generate content response = client.models.generate_content( model="gemini-2.0-flash", contents=[prompt, uploaded_file], ) # Display results st.subheader("Transcript") st.code(response.text, language="text") except Exception as e: st.error(f"An error occurred: {str(e)}") finally: os.remove(tmp_file_path) # Credits section in sidebar st.sidebar.markdown(""" **Credits** - Transcription powered by [Gemini API](https://ai.google.dev/) - Heavy inspired by https://github.com/philschmid/gemini-samples/blob/main/examples/gemini-transcribe-with-timestamps.ipynb """)