hamdie commited on
Commit
8c9b5a5
·
verified ·
1 Parent(s): d073cf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -26
app.py CHANGED
@@ -6,8 +6,6 @@ model = tf.saved_model.load('arabert_pretrained')
6
 
7
 
8
 
9
- import pandas as pd
10
- df = pd.read_csv('put\data_cleaned1.csv')
11
 
12
 
13
  from transformers import TFAutoModel, AutoTokenizer
@@ -17,32 +15,8 @@ arabert_tokenizer = AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabert'
17
 
18
  import pandas as pd
19
 
20
- # Assuming your DataFrame is named 'df'
21
- # Split the DataFrame into two parts: label=1 and label=0
22
- label_1_df = df[df['data_labels'] == 1]
23
- label_0_df = df[df['data_labels'] == 0]
24
-
25
- # Sample an equal number of rows from each label
26
- sample_size = min(len(label_1_df), len(label_0_df))
27
- sample_label_1 = label_1_df.sample(n=sample_size, random_state=42)
28
- sample_label_0 = label_0_df.sample(n=sample_size, random_state=42)
29
-
30
- # Concatenate the two samples to get the final balanced sample
31
- balanced_sample = pd.concat([sample_label_1, sample_label_0])
32
-
33
- # Shuffle the rows in the balanced sample
34
- balanced_sample = balanced_sample.sample(frac=1, random_state=42)
35
-
36
-
37
- balanced_sample.reset_index(inplace=True,drop=True)
38
-
39
-
40
- from sklearn.model_selection import train_test_split
41
 
42
- tweets = balanced_sample['cleaned_text']
43
- labels = balanced_sample['data_labels']
44
 
45
- X_train, X_test, y_train, y_test = train_test_split(tweets, labels,stratify=labels, test_size=0.15, random_state=1)
46
  def preprocess_input_data(texts, tokenizer, max_len=120):
47
  """Tokenize and preprocess the input data for Arabert model.
48
 
 
6
 
7
 
8
 
 
 
9
 
10
 
11
  from transformers import TFAutoModel, AutoTokenizer
 
15
 
16
  import pandas as pd
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
19
 
 
20
  def preprocess_input_data(texts, tokenizer, max_len=120):
21
  """Tokenize and preprocess the input data for Arabert model.
22