mojad121 commited on
Commit
57bb8a9
·
verified ·
1 Parent(s): ce2d087

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +39 -28
src/streamlit_app.py CHANGED
@@ -10,10 +10,18 @@ os.environ["HF_HOME"] = "/app/.cache/huggingface"
10
  os.environ["TORCH_HOME"] = "/app/.cache/torch"
11
  hf_token = os.getenv("HateSpeechMujtabatoken")
12
 
13
- whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny", token=hf_token)
14
- whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny", token=hf_token)
15
- text_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain", token=hf_token)
16
- tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain", token=hf_token)
 
 
 
 
 
 
 
 
17
 
18
  def transcribe(audio_path):
19
  waveform, sample_rate = torchaudio.load(audio_path)
@@ -23,31 +31,34 @@ def transcribe(audio_path):
23
  return transcription
24
 
25
  def extract_text_features(text):
26
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
27
  outputs = text_model(**inputs)
28
- predicted_class = outputs.logits.argmax(dim=1).item()
29
- return "Hate Speech" if predicted_class == 1 else "Not Hate Speech"
30
-
31
- def predict(audio_file, text_input):
32
- if not audio_file and not text_input:
33
- return "Please provide either an audio file or some text."
34
- if audio_file:
35
- audio_path = "temp_audio.wav"
36
- with open(audio_path, "wb") as f:
37
- f.write(audio_file.read())
38
- transcribed_text = transcribe(audio_path)
39
- prediction = extract_text_features(text_input or transcribed_text)
40
- if text_input:
41
- return f"Predicted: {prediction}"
42
- else:
43
- return f"Predicted: {prediction} \n\n(Transcribed: {transcribed_text})"
44
  else:
45
- prediction = extract_text_features(text_input)
46
- return f"Predicted: {prediction}"
47
 
48
- st.title("Hate Speech Detector")
49
- uploaded_audio = st.file_uploader("Upload Audio File (.mp3, .wav, .ogg, .flac, .opus)", type=["mp3", "wav", "ogg", "flac", "opus"])
50
- text_input = st.text_input("Or enter text:")
 
 
 
51
  if st.button("Predict"):
52
- result = predict(uploaded_audio, text_input)
53
- st.success(result)
 
 
 
 
 
 
 
 
 
10
  os.environ["TORCH_HOME"] = "/app/.cache/torch"
11
  hf_token = os.getenv("HateSpeechMujtabatoken")
12
 
13
+ import torch
14
+ import torchaudio
15
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
16
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
17
+ import streamlit as st
18
+
19
+ whisper_processor = WhisperProcessor.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain")
20
+ whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
21
+ text_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain")
22
+ tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain")
23
+
24
+ label_map = {0: "Not Hate Speech", 1: "Hate Speech"}
25
 
26
  def transcribe(audio_path):
27
  waveform, sample_rate = torchaudio.load(audio_path)
 
31
  return transcription
32
 
33
  def extract_text_features(text):
34
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
35
  outputs = text_model(**inputs)
36
+ pred_label = outputs.logits.argmax(dim=1).item()
37
+ return label_map.get(pred_label, "Unknown")
38
+
39
+ def predict_hate_speech(audio_path=None, text=None):
40
+ if audio_path:
41
+ transcription = transcribe(audio_path)
42
+ text_input = text if text else transcription
43
+ elif text:
44
+ text_input = text
 
 
 
 
 
 
 
45
  else:
46
+ return "No input provided"
 
47
 
48
+ prediction = extract_text_features(text_input)
49
+ return prediction
50
+
51
+ st.title("Hate Speech Detector with Audio and Text")
52
+ audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "flac", "ogg", "opus"])
53
+ text_input = st.text_input("Optional text input")
54
  if st.button("Predict"):
55
+ if audio_file is not None:
56
+ with open("temp_audio.wav", "wb") as f:
57
+ f.write(audio_file.read())
58
+ prediction = predict_hate_speech("temp_audio.wav", text_input)
59
+ st.success(prediction)
60
+ elif text_input:
61
+ prediction = predict_hate_speech(text=text_input)
62
+ st.success(prediction)
63
+ else:
64
+ st.warning("Please upload an audio file or enter text.")