Spaces:
Running
Running
import os | |
import tempfile | |
import streamlit as st | |
from google import genai | |
from jinja2 import Template | |
st.title("Audio Transcription with Speaker Identification") | |
st.write("Upload an audio file to generate a transcript with speakers identified.") | |
# API Key Input | |
api_key_input = st.text_input( | |
"Gemini API Key", | |
type="password", | |
help="You can also set it via GEMINI_API_KEY environment variable." | |
) | |
api_key = api_key_input or os.getenv("GEMINI_API_KEY") | |
# Speakers Input | |
speakers_input = st.text_input( | |
"Known Speakers (comma-separated)", | |
help="List known speaker names. Leave empty if unknown." | |
) | |
speakers = [s.strip() for s in speakers_input.split(",")] if speakers_input else [] | |
# File Upload | |
audio_file = st.file_uploader( | |
"Upload Audio File", | |
type=["mp3", "wav", "m4a", "ogg", "mp4"] | |
) | |
if st.button("Generate Transcript"): | |
if not api_key: | |
st.error("Please provide a Gemini API key.") | |
elif not audio_file: | |
st.error("Please upload an audio file.") | |
else: | |
original_extension = os.path.splitext(audio_file.name)[1] | |
with tempfile.NamedTemporaryFile( | |
delete=False, | |
suffix=original_extension | |
) as tmp_file: | |
tmp_file.write(audio_file.read()) | |
tmp_file_path = tmp_file.name | |
try: | |
# Initialize GenAI client | |
client = genai.Client(api_key=api_key) | |
uploaded_file = client.files.upload(file=tmp_file_path) | |
# New token counting functionality | |
try: | |
token_info = client.models.count_tokens( | |
model='gemini-2.0-flash', | |
contents=[uploaded_file] | |
) | |
st.info(f"File contains approximately {token_info.total_tokens} tokens") | |
except AttributeError: | |
st.warning("Token counting not available in current API version") | |
# Create prompt template | |
prompt_template = Template("""Generate a transcript of the episode. Include timestamps and identify speakers. | |
Speakers are: | |
{% for speaker in speakers %}- {{ speaker }}{% if not loop.last %}\n{% endif %}{% endfor %} | |
eg: | |
[00:00] Brady: Hello there. | |
[00:02] Tim: Hi Brady. | |
It is important to include the correct speaker names. Use the names you identified earlier. If you really don't know the speaker's name, identify them with a letter of the alphabet, eg there may be an unknown speaker 'A' and another unknown speaker 'B'. | |
If there is music or a short jingle playing, signify like so: | |
[01:02] [MUSIC] or [01:02] [JINGLE] | |
If you can identify the name of the music or jingle playing then use that instead, eg: | |
[01:02] [Firework by Katy Perry] or [01:02] [The Sofa Shop jingle] | |
If there is some other sound playing try to identify the sound, eg: | |
[01:02] [Bell ringing] | |
Each individual caption should be quite short, a few short sentences at most. | |
Signify the end of the episode with [END]. | |
Don't use any markdown formatting, like bolding or italics. | |
Only use characters from the English alphabet, unless you genuinely believe foreign characters are correct. | |
It is important that you use the correct words and spell everything correctly. Use the context of the podcast to help. | |
If the hosts discuss something like a movie, book or celebrity, make sure the movie, book, or celebrity name is spelled correctly.""") | |
prompt = prompt_template.render(speakers=speakers) | |
# Generate content | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=[prompt, uploaded_file], | |
) | |
# Display results | |
st.subheader("Transcript") | |
st.code(response.text, language="text") | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |
finally: | |
os.remove(tmp_file_path) | |
# Credits section in sidebar | |
st.sidebar.markdown(""" | |
**Info** | |
- It works without key, but has limits. Use your own to upload bigger files. | |
**Credits** | |
- Transcription powered by [Gemini API](https://ai.google.dev/) | |
- Heavy inspired by https://github.com/philschmid/gemini-samples/blob/main/examples/gemini-transcribe-with-timestamps.ipynb | |
""") |