Spaces:
Running
Running
import streamlit as st | |
from faster_whisper import WhisperModel | |
from transformers import pipeline | |
from pydub import AudioSegment | |
import numpy as np | |
def initialize_model(): | |
"""Initialize the Whisper model and AI detection pipeline.""" | |
model = WhisperModel("medium", device="cpu", compute_type="int8") | |
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector") | |
return model, ai_detector | |
def preprocess_audio(uploaded_file): | |
"""Preprocess uploaded audio file for transcription.""" | |
audio = AudioSegment.from_file(uploaded_file) | |
audio = audio.set_frame_rate(16000).set_channels(1).normalize() | |
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0 | |
return samples | |
def transcribe_audio(samples, model): | |
"""Transcribe audio using Whisper.""" | |
segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3) | |
return [segment.text for segment in segments] | |
def combine_sentences(transcriptions, group_size=3): | |
"""Combine 2-3 sentences into a single chunk.""" | |
combined = [] | |
for i in range(0, len(transcriptions), group_size): | |
chunk = " ".join(transcriptions[i:i + group_size]) | |
combined.append(chunk) | |
return combined | |
def ai_detection(text, ai_detector): | |
"""Perform AI detection on combined text.""" | |
if len(text.split()) < 5: | |
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"} | |
result = ai_detector(text)[0] | |
label = "Human" if result["label"] == "Real" else "AI" # Map labels | |
return { | |
"classification": label, | |
"probability": result["score"], | |
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low" | |
} | |
def run_app(): | |
"""Main Streamlit app.""" | |
st.title("AI Speech Detector") | |
st.subheader("Upload an audio file for transcription and AI analysis.") | |
st.markdown(""" | |
This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text. | |
Supported audio formats: **.wav**, **.mp3**. | |
""") | |
# Load models | |
model, ai_detector = initialize_model() | |
# File uploader | |
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) | |
if uploaded_file: | |
st.info("Processing audio... Please wait.") | |
try: | |
# Preprocess and transcribe | |
samples = preprocess_audio(uploaded_file) | |
transcription = transcribe_audio(samples, model) | |
# Combine sentences | |
combined_transcription = combine_sentences(transcription, group_size=3) | |
full_transcript = "\n".join(combined_transcription) | |
st.text_area("Transcription", value=full_transcript, height=300) | |
# AI Detection on combined sentences | |
st.subheader("AI Detection Results") | |
for text in combined_transcription: | |
detection_result = ai_detection(text, ai_detector) | |
st.write(f"**Text:** {text}") | |
st.write(f"- **Classification:** {detection_result['classification']}") | |
st.write(f"- **Probability:** {detection_result['probability']:.2f}") | |
st.write(f"- **Confidence:** {detection_result['confidence']}") | |
st.markdown("---") | |
except Exception as e: | |
st.error(f"Error processing audio: {str(e)}") | |
if __name__ == "__main__": | |
run_app() |