import streamlit as st from faster_whisper import WhisperModel from transformers import pipeline from pydub import AudioSegment import numpy as np @st.cache_resource def initialize_model(): """Initialize the Whisper model and AI detection pipeline.""" model = WhisperModel("medium", device="cpu", compute_type="int8") ai_detector = pipeline("text-classification", model="roberta-base-openai-detector") return model, ai_detector def preprocess_audio(uploaded_file): """Preprocess uploaded audio file for transcription.""" audio = AudioSegment.from_file(uploaded_file) audio = audio.set_frame_rate(16000).set_channels(1).normalize() samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0 return samples def transcribe_audio(samples, model): """Transcribe audio using Whisper.""" segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3) return [segment.text for segment in segments] def combine_sentences(transcriptions, group_size=3): """Combine 2-3 sentences into a single chunk.""" combined = [] for i in range(0, len(transcriptions), group_size): chunk = " ".join(transcriptions[i:i + group_size]) combined.append(chunk) return combined def ai_detection(text, ai_detector): """Perform AI detection on combined text.""" if len(text.split()) < 5: return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"} result = ai_detector(text)[0] label = "Human" if result["label"] == "Real" else "AI" # Map labels return { "classification": label, "probability": result["score"], "confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low" } def run_app(): """Main Streamlit app.""" st.title("AI Speech Detector") st.subheader("Upload an audio file for transcription and AI analysis.") st.markdown(""" This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text. Supported audio formats: **.wav**, **.mp3**. """) # Load models model, ai_detector = initialize_model() # File uploader uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) if uploaded_file: st.info("Processing audio... Please wait.") try: # Preprocess and transcribe samples = preprocess_audio(uploaded_file) transcription = transcribe_audio(samples, model) # Combine sentences combined_transcription = combine_sentences(transcription, group_size=3) full_transcript = "\n".join(combined_transcription) st.text_area("Transcription", value=full_transcript, height=300) # AI Detection on combined sentences st.subheader("AI Detection Results") for text in combined_transcription: detection_result = ai_detection(text, ai_detector) st.write(f"**Text:** {text}") st.write(f"- **Classification:** {detection_result['classification']}") st.write(f"- **Probability:** {detection_result['probability']:.2f}") st.write(f"- **Confidence:** {detection_result['confidence']}") st.markdown("---") except Exception as e: st.error(f"Error processing audio: {str(e)}") if __name__ == "__main__": run_app()