transcriber / app.py
RealSanjay's picture
Update app.py
167c91e verified
import streamlit as st
from faster_whisper import WhisperModel
from transformers import pipeline
from pydub import AudioSegment
import numpy as np
@st.cache_resource
def initialize_model():
"""Initialize the Whisper model and AI detection pipeline."""
model = WhisperModel("medium", device="cpu", compute_type="int8")
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
return model, ai_detector
def preprocess_audio(uploaded_file):
"""Preprocess uploaded audio file for transcription."""
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_frame_rate(16000).set_channels(1).normalize()
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
return samples
def transcribe_audio(samples, model):
"""Transcribe audio using Whisper."""
segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3)
return [segment.text for segment in segments]
def combine_sentences(transcriptions, group_size=3):
"""Combine 2-3 sentences into a single chunk."""
combined = []
for i in range(0, len(transcriptions), group_size):
chunk = " ".join(transcriptions[i:i + group_size])
combined.append(chunk)
return combined
def ai_detection(text, ai_detector):
"""Perform AI detection on combined text."""
if len(text.split()) < 5:
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
result = ai_detector(text)[0]
label = "Human" if result["label"] == "Real" else "AI" # Map labels
return {
"classification": label,
"probability": result["score"],
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
}
def run_app():
"""Main Streamlit app."""
st.title("AI Speech Detector")
st.subheader("Upload an audio file for transcription and AI analysis.")
st.markdown("""
This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text.
Supported audio formats: **.wav**, **.mp3**.
""")
# Load models
model, ai_detector = initialize_model()
# File uploader
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
if uploaded_file:
st.info("Processing audio... Please wait.")
try:
# Preprocess and transcribe
samples = preprocess_audio(uploaded_file)
transcription = transcribe_audio(samples, model)
# Combine sentences
combined_transcription = combine_sentences(transcription, group_size=3)
full_transcript = "\n".join(combined_transcription)
st.text_area("Transcription", value=full_transcript, height=300)
# AI Detection on combined sentences
st.subheader("AI Detection Results")
for text in combined_transcription:
detection_result = ai_detection(text, ai_detector)
st.write(f"**Text:** {text}")
st.write(f"- **Classification:** {detection_result['classification']}")
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
st.write(f"- **Confidence:** {detection_result['confidence']}")
st.markdown("---")
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
if __name__ == "__main__":
run_app()