Spaces:
Sleeping
Sleeping
File size: 3,018 Bytes
35d2acb 9dd67b8 35d2acb 9dd67b8 b642dca 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb 9dd67b8 35d2acb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
from faster_whisper import WhisperModel
from transformers import pipeline
import numpy as np
from pydub import AudioSegment
from textblob import TextBlob
def initialize_model():
"""Initialize the Whisper model and AI detection pipeline."""
if "model" not in st.session_state:
st.session_state.model = WhisperModel("small", device="cpu", compute_type="int8")
if "ai_detector" not in st.session_state:
st.session_state.ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
def process_uploaded_audio(uploaded_file, model):
"""Process uploaded audio file for transcription."""
# Convert uploaded file to a WAV file
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_frame_rate(16000).set_channels(1)
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
# Perform transcription
segments, _ = model.transcribe(samples, language="en", vad_filter=True)
return [segment.text for segment in segments]
def advanced_ai_detection(text, ai_detector):
"""Perform AI detection on transcribed text."""
if len(text.split()) < 5:
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
result = ai_detector(text)[0]
return {
"classification": result["label"],
"probability": result["score"],
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
}
def run_app():
"""Main Streamlit app."""
st.title("AI Speech Detector")
st.subheader("Upload an audio file for transcription and AI analysis.")
st.markdown("""
This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text.
Supported audio formats: **.wav**, **.mp3**.
""")
# Initialize models
initialize_model()
# File uploader
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
if uploaded_file:
st.info("Processing audio... Please wait.")
try:
# Transcription
transcription = process_uploaded_audio(uploaded_file, st.session_state.model)
full_transcript = "\n".join(transcription)
st.text_area("Transcription", value=full_transcript, height=300)
# AI Detection
st.subheader("AI Detection Results")
for text in transcription:
detection_result = advanced_ai_detection(text, st.session_state.ai_detector)
st.write(f"**Text:** {text}")
st.write(f"- **Classification:** {detection_result['classification']}")
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
st.write(f"- **Confidence:** {detection_result['confidence']}")
st.markdown("---")
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
if __name__ == "__main__":
run_app()
|