transcriber / app.py
RealSanjay's picture
Update app.py
9dd67b8 verified
raw
history blame
3.02 kB
import streamlit as st
from faster_whisper import WhisperModel
from transformers import pipeline
import numpy as np
from pydub import AudioSegment
from textblob import TextBlob
def initialize_model():
"""Initialize the Whisper model and AI detection pipeline."""
if "model" not in st.session_state:
st.session_state.model = WhisperModel("small", device="cpu", compute_type="int8")
if "ai_detector" not in st.session_state:
st.session_state.ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
def process_uploaded_audio(uploaded_file, model):
"""Process uploaded audio file for transcription."""
# Convert uploaded file to a WAV file
audio = AudioSegment.from_file(uploaded_file)
audio = audio.set_frame_rate(16000).set_channels(1)
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
# Perform transcription
segments, _ = model.transcribe(samples, language="en", vad_filter=True)
return [segment.text for segment in segments]
def advanced_ai_detection(text, ai_detector):
"""Perform AI detection on transcribed text."""
if len(text.split()) < 5:
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
result = ai_detector(text)[0]
return {
"classification": result["label"],
"probability": result["score"],
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
}
def run_app():
"""Main Streamlit app."""
st.title("AI Speech Detector")
st.subheader("Upload an audio file for transcription and AI analysis.")
st.markdown("""
This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text.
Supported audio formats: **.wav**, **.mp3**.
""")
# Initialize models
initialize_model()
# File uploader
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
if uploaded_file:
st.info("Processing audio... Please wait.")
try:
# Transcription
transcription = process_uploaded_audio(uploaded_file, st.session_state.model)
full_transcript = "\n".join(transcription)
st.text_area("Transcription", value=full_transcript, height=300)
# AI Detection
st.subheader("AI Detection Results")
for text in transcription:
detection_result = advanced_ai_detection(text, st.session_state.ai_detector)
st.write(f"**Text:** {text}")
st.write(f"- **Classification:** {detection_result['classification']}")
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
st.write(f"- **Confidence:** {detection_result['confidence']}")
st.markdown("---")
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
if __name__ == "__main__":
run_app()