Spaces:
Running
Running
File size: 2,723 Bytes
835bf99 f91380b 835bf99 f91380b 835bf99 f91380b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import os
import tempfile
import streamlit as st
from google import genai
from jinja2 import Template
st.title("Audio Transcription with Speaker Identification")
st.write("Upload an audio file to generate a transcript with speakers identified.")
# API Key Input
api_key_input = st.text_input(
"Gemini API Key",
type="password",
help="You can also set it via GEMINI_API_KEY environment variable."
)
api_key = api_key_input or os.getenv("GEMINI_API_KEY")
# Speakers Input
speakers_input = st.text_input(
"Known Speakers (comma-separated)",
help="List known speaker names. Leave empty if unknown."
)
speakers = [s.strip() for s in speakers_input.split(",")] if speakers_input else []
# File Upload
audio_file = st.file_uploader(
"Upload Audio File",
type=["mp3", "wav", "m4a", "ogg", "mp4"]
)
if st.button("Generate Transcript"):
if not api_key:
st.error("Please provide a Gemini API key.")
elif not audio_file:
st.error("Please upload an audio file.")
else:
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(audio_file.read())
tmp_file_path = tmp_file.name
try:
# Initialize GenAI client
client = genai.Client(api_key=api_key)
# Upload audio file
uploaded_file = client.files.upload(file=tmp_file_path)
# New token counting functionality
try:
token_info = client.models.count_tokens(
model='gemini-2.0-flash',
contents=[uploaded_file]
)
st.info(f"File contains approximately {token_info.total_tokens} tokens")
except AttributeError:
st.warning("Token counting not available in current API version")
# Create prompt template
prompt_template = Template("""[...your existing template here...]""")
prompt = prompt_template.render(speakers=speakers)
# Generate content
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[prompt, uploaded_file],
)
# Display results
st.subheader("Transcript")
st.code(response.text, language="text")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
finally:
os.remove(tmp_file_path)
# Credits section in sidebar
st.sidebar.markdown("""
**Credits**
- Transcription powered by [Gemini API](https://ai.google.dev/)
- Heavy inspired by https://github.com/philschmid/gemini-samples/blob/main/examples/gemini-transcribe-with-timestamps.ipynb
""") |