mojad121 commited on
Commit
31f1ac4
·
verified ·
1 Parent(s): bdcdb1f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +6 -23
src/streamlit_app.py CHANGED
@@ -1,9 +1,6 @@
1
  import torch
2
  import os
3
  import streamlit as st
4
- from pydub import AudioSegment
5
- import numpy as np
6
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
 
9
  os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"
@@ -11,20 +8,9 @@ os.environ["HF_HOME"] = "/app/.cache/huggingface"
11
  os.environ["TORCH_HOME"] = "/app/.cache/torch"
12
  hf_token = os.getenv("HateSpeechMujtabatoken")
13
 
14
- whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny", token=hf_token)
15
- whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny", token=hf_token)
16
  text_model = AutoModelForSequenceClassification.from_pretrained("GroNLP/hateBERT", token=hf_token)
17
  tokenizer = AutoTokenizer.from_pretrained("GroNLP/hateBERT", token=hf_token)
18
 
19
- def transcribe(audio_path):
20
- audio = AudioSegment.from_file(audio_path, format="opus")
21
- audio = audio.set_channels(1).set_frame_rate(16000)
22
- samples = np.array(audio.get_array_of_samples()).astype(np.float32) / (2**15)
23
- input_features = whisper_processor(samples, sampling_rate=16000, return_tensors="pt").input_features
24
- predicted_ids = whisper_model.generate(input_features)
25
- transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
26
- return transcription
27
-
28
  def extract_text_features(text):
29
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
30
  outputs = text_model(**inputs)
@@ -32,16 +18,13 @@ def extract_text_features(text):
32
  return "Hate Speech" if predicted_class >= 1 else "Not Hate Speech"
33
 
34
  def predict(text_input):
35
- audio_path = "input.opus"
36
- transcribed_text = transcribe(audio_path)
37
- prediction = extract_text_features(text_input or transcribed_text)
38
- if text_input:
39
- return f"Predicted: {prediction}"
40
- else:
41
- return f"Predicted: {prediction} \n\n(Transcribed: {transcribed_text})"
42
 
43
  st.title("Hate Speech Detector")
44
- text_input = st.text_input("Enter text (optional):")
45
- if st.button("Run Prediction"):
46
  result = predict(text_input)
47
  st.success(result)
 
1
  import torch
2
  import os
3
  import streamlit as st
 
 
 
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
 
6
  os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"
 
8
  os.environ["TORCH_HOME"] = "/app/.cache/torch"
9
  hf_token = os.getenv("HateSpeechMujtabatoken")
10
 
 
 
11
  text_model = AutoModelForSequenceClassification.from_pretrained("GroNLP/hateBERT", token=hf_token)
12
  tokenizer = AutoTokenizer.from_pretrained("GroNLP/hateBERT", token=hf_token)
13
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_features(text):
15
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
16
  outputs = text_model(**inputs)
 
18
  return "Hate Speech" if predicted_class >= 1 else "Not Hate Speech"
19
 
20
  def predict(text_input):
21
+ if not text_input:
22
+ return "Please enter some text."
23
+ prediction = extract_text_features(text_input)
24
+ return f"Predicted: {prediction}"
 
 
 
25
 
26
  st.title("Hate Speech Detector")
27
+ text_input = st.text_input("Enter text:")
28
+ if st.button("Predict"):
29
  result = predict(text_input)
30
  st.success(result)