Spaces:
Sleeping
Sleeping
from gradio_client import Client, handle_file | |
import pandas as pd | |
import gradio as gr | |
from vosk import Model, KaldiRecognizer | |
import json | |
import wave | |
clientEngText = Client("dj-dawgs-ipd/IPD-Text-English-Finetune") | |
clientHingText = Client("dj-dawgs-ipd/IPD-Text-Hinglish") | |
clientAud = Client("dj-dawgs-ipd/IPD_Audio_HuBERT") | |
profanity_df = pd.read_csv('Hinglish_Profanity_List.csv', encoding='utf-8') | |
profanity_hn = profanity_df['profanity_hn'] | |
vosk_model = Model(lang="en-us") | |
# import whisper | |
# def stt_whisper(file_path): | |
# model = whisper.load_model("base") | |
# try: | |
# result = model.transcribe(file_path) | |
# return result["text"] | |
# except Exception as e: | |
# print(e) | |
# return "" | |
def stt_vosk(file_path): | |
try: | |
wf = wave.open(file_path, "rb") | |
rec = KaldiRecognizer(vosk_model, wf.getframerate()) | |
rec.SetWords(True) | |
rec.SetPartialWords(True) | |
while True: | |
data = wf.readframes(4000) | |
if len(data) == 0: | |
break | |
rec.AcceptWaveform(data) | |
data = json.loads(rec.FinalResult()) | |
return data["text"] | |
except: | |
return "" | |
def extract_text(audio_path): | |
return stt_vosk(audio_path).lower() | |
def predict_hate_speech(audio_path): | |
audResult = clientAud.predict( | |
audio_path=handle_file(audio_path), | |
api_name="/predict" | |
) | |
audResult = json.loads(audResult.replace("'", '"')) | |
stt_text = extract_text(audio_path) | |
engResult = clientEngText.predict( | |
text=stt_text[:200], | |
api_name="/predict" | |
) | |
hingResult = clientHingText.predict( | |
text=stt_text[:200], | |
api_name="/predict" | |
) | |
profanityFound = any(word in stt_text.split() for word in profanity_hn) | |
threshold = 0.6 | |
isHate = (engResult[0] != "NEITHER" and engResult[1] > threshold) or ( | |
hingResult[0] != "NAG" and hingResult[1] > threshold) or ( | |
audResult['Classification'] == 'Hate Speech\n' and audResult['Confidence'] > threshold) | |
engConf = engResult[1] if engResult[0] != "NEITHER" else (1 - engResult[1]) | |
hingConf = hingResult[1] if hingResult[0] != "NEITHER" else (1 - hingResult[1]) | |
audConf = audResult['Confidence'] if audResult['Classification'] == 'Hate Speech\n' else (1 - audResult['Confidence']) | |
confidence = (engConf + hingConf + audConf) / 3 | |
# print(profanityFound, engResult, hingResult, audResult) | |
if profanityFound: | |
return ["hate", f"Result: Profanity Found", f"Text: {stt_text}"] | |
elif isHate: | |
return ["hate", f"Confidence: {confidence}", f"Text: {stt_text}"] | |
return ["not_hate", "No hate found, yay!"] | |
iface = gr.Interface( | |
fn=predict_hate_speech, | |
inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
outputs=gr.Textbox(label="Hate Speech Analysis"), | |
title="Hate Speech Audio Pipeline", | |
description="Upload an audio file to detect potential hate speech content.", | |
examples=[ | |
["hate_video_3_3_snippet2.wav"] | |
], | |
allow_flagging="manual" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |