Spaces:

zerostratos
/

toxic_classification_model

Sleeping

App Files Files Community

zerostratos commited on Dec 15, 2024

Commit

fcb32b6

verified ·

1 Parent(s): f1a52ff

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -37

app.py CHANGED Viewed

@@ -8,10 +8,6 @@ import torch.nn as nn
 import torch.nn.functional as F
 import pandas as pd
 import re
-from sacremoses import MosesTokenizer, MosesDetokenizer
-teencode_df = pd.read_csv('teencode.txt',names=['teencode','map'],sep='\t',)
-teencode_list = teencode_df['teencode'].to_list()
-map_list = teencode_df['map'].to_list()
 class BCNN(nn.Module):
     def __init__(self, embedding_dim, output_dim,
                  dropout,bidirectional_units,conv_filters):
@@ -100,38 +96,6 @@ class TextClassificationApp:
             words.append(re.sub(r'([A-Z])\1+', lambda m: m.group(1), word, flags = re.IGNORECASE))
         return ' '.join(words)
-    def searchTeencode(self,word):
-        try:
-            global teencode_count
-            index = teencode_list.index(word)
-            map_word = map_list[index]
-            teencode_count += 1
-            return map_word
-        except:
-            pass
-    def deTeencode(self, sentence):
-        lenn = 0
-        sentence = str(sentence)
-        # Tokenize
-        nestList_tokens = sentence.split()
-        for tokens_idx, text_tokens in enumerate(nestList_tokens):
-            # Teencode
-            lenn += len(text_tokens)
-            for idx, word in enumerate(text_tokens):
-                deteencoded = self.searchTeencode(word)
-                if deteencoded is not None:
-                    text_tokens[idx] = deteencoded
-            nestList_tokens[tokens_idx] = text_tokens
-        flat_list = [item for sublist in nestList_tokens for item in sublist]
-        # Detokenize
-        detokens = MosesDetokenizer().detokenize(flat_list, return_str=True)
-        return detokens
     def preprocess_text(self, text):
         """
         Preprocess input text for model prediction
@@ -144,7 +108,6 @@ class TextClassificationApp:
         """
         # Tokenize and encode the text
         text = self.remove_dub_char(text)
-        text = self.deTeencode(text)
         input_ids = []
         attention_masks = []
         encoded = self.tokenizer.encode_plus(

 import torch.nn.functional as F
 import pandas as pd
 import re
 class BCNN(nn.Module):
     def __init__(self, embedding_dim, output_dim,
                  dropout,bidirectional_units,conv_filters):
             words.append(re.sub(r'([A-Z])\1+', lambda m: m.group(1), word, flags = re.IGNORECASE))
         return ' '.join(words)
     def preprocess_text(self, text):
         """
         Preprocess input text for model prediction
         """
         # Tokenize and encode the text
         text = self.remove_dub_char(text)
         input_ids = []
         attention_masks = []
         encoded = self.tokenizer.encode_plus(