zerostratos commited on
Commit
fcb32b6
·
verified ·
1 Parent(s): f1a52ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -37
app.py CHANGED
@@ -8,10 +8,6 @@ import torch.nn as nn
8
  import torch.nn.functional as F
9
  import pandas as pd
10
  import re
11
- from sacremoses import MosesTokenizer, MosesDetokenizer
12
- teencode_df = pd.read_csv('teencode.txt',names=['teencode','map'],sep='\t',)
13
- teencode_list = teencode_df['teencode'].to_list()
14
- map_list = teencode_df['map'].to_list()
15
  class BCNN(nn.Module):
16
  def __init__(self, embedding_dim, output_dim,
17
  dropout,bidirectional_units,conv_filters):
@@ -100,38 +96,6 @@ class TextClassificationApp:
100
  words.append(re.sub(r'([A-Z])\1+', lambda m: m.group(1), word, flags = re.IGNORECASE))
101
  return ' '.join(words)
102
 
103
- def searchTeencode(self,word):
104
- try:
105
- global teencode_count
106
- index = teencode_list.index(word)
107
- map_word = map_list[index]
108
- teencode_count += 1
109
- return map_word
110
- except:
111
- pass
112
-
113
- def deTeencode(self, sentence):
114
- lenn = 0
115
- sentence = str(sentence)
116
- # Tokenize
117
- nestList_tokens = sentence.split()
118
- for tokens_idx, text_tokens in enumerate(nestList_tokens):
119
- # Teencode
120
- lenn += len(text_tokens)
121
- for idx, word in enumerate(text_tokens):
122
- deteencoded = self.searchTeencode(word)
123
- if deteencoded is not None:
124
- text_tokens[idx] = deteencoded
125
- nestList_tokens[tokens_idx] = text_tokens
126
-
127
- flat_list = [item for sublist in nestList_tokens for item in sublist]
128
-
129
- # Detokenize
130
- detokens = MosesDetokenizer().detokenize(flat_list, return_str=True)
131
-
132
- return detokens
133
-
134
-
135
  def preprocess_text(self, text):
136
  """
137
  Preprocess input text for model prediction
@@ -144,7 +108,6 @@ class TextClassificationApp:
144
  """
145
  # Tokenize and encode the text
146
  text = self.remove_dub_char(text)
147
- text = self.deTeencode(text)
148
  input_ids = []
149
  attention_masks = []
150
  encoded = self.tokenizer.encode_plus(
 
8
  import torch.nn.functional as F
9
  import pandas as pd
10
  import re
 
 
 
 
11
  class BCNN(nn.Module):
12
  def __init__(self, embedding_dim, output_dim,
13
  dropout,bidirectional_units,conv_filters):
 
96
  words.append(re.sub(r'([A-Z])\1+', lambda m: m.group(1), word, flags = re.IGNORECASE))
97
  return ' '.join(words)
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def preprocess_text(self, text):
100
  """
101
  Preprocess input text for model prediction
 
108
  """
109
  # Tokenize and encode the text
110
  text = self.remove_dub_char(text)
 
111
  input_ids = []
112
  attention_masks = []
113
  encoded = self.tokenizer.encode_plus(