Spaces:
Running
Running
File size: 3,491 Bytes
35d2acb 9dd67b8 167c91e 35d2acb 167c91e 35d2acb 9dd67b8 167c91e 9dd67b8 167c91e 9dd67b8 167c91e 9dd67b8 167c91e 9dd67b8 167c91e 9dd67b8 35d2acb 167c91e 9dd67b8 167c91e 35d2acb 167c91e 35d2acb 167c91e 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 167c91e 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 167c91e 9dd67b8 35d2acb 167c91e 9dd67b8 167c91e 9dd67b8 35d2acb 9dd67b8 35d2acb c9bd25f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import streamlit as st
from faster_whisper import WhisperModel
from transformers import pipeline
from pydub import AudioSegment
import numpy as np
@st.cache_resource
def initialize_model():
"""Initialize the Whisper model and AI detection pipeline."""
model = WhisperModel("medium", device="cpu", compute_type="int8")
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
return model, ai_detector
def preprocess_audio(uploaded_file):
"""Preprocess uploaded audio file for transcription."""
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_frame_rate(16000).set_channels(1).normalize()
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
return samples
def transcribe_audio(samples, model):
"""Transcribe audio using Whisper."""
segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3)
return [segment.text for segment in segments]
def combine_sentences(transcriptions, group_size=3):
"""Combine 2-3 sentences into a single chunk."""
combined = []
for i in range(0, len(transcriptions), group_size):
chunk = " ".join(transcriptions[i:i + group_size])
combined.append(chunk)
return combined
def ai_detection(text, ai_detector):
"""Perform AI detection on combined text."""
if len(text.split()) < 5:
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
result = ai_detector(text)[0]
label = "Human" if result["label"] == "Real" else "AI" # Map labels
return {
"classification": label,
"probability": result["score"],
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
}
def run_app():
"""Main Streamlit app."""
st.title("AI Speech Detector")
st.subheader("Upload an audio file for transcription and AI analysis.")
st.markdown("""
This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text.
Supported audio formats: **.wav**, **.mp3**.
""")
# Load models
model, ai_detector = initialize_model()
# File uploader
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
if uploaded_file:
st.info("Processing audio... Please wait.")
try:
# Preprocess and transcribe
samples = preprocess_audio(uploaded_file)
transcription = transcribe_audio(samples, model)
# Combine sentences
combined_transcription = combine_sentences(transcription, group_size=3)
full_transcript = "\n".join(combined_transcription)
st.text_area("Transcription", value=full_transcript, height=300)
# AI Detection on combined sentences
st.subheader("AI Detection Results")
for text in combined_transcription:
detection_result = ai_detection(text, ai_detector)
st.write(f"**Text:** {text}")
st.write(f"- **Classification:** {detection_result['classification']}")
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
st.write(f"- **Confidence:** {detection_result['confidence']}")
st.markdown("---")
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
if __name__ == "__main__":
run_app() |