aimlnerd commited on
Commit
09366c9
·
1 Parent(s): 3a6061e
requirements.txt CHANGED
@@ -1,11 +1,12 @@
1
  transformers[torch]==4.36.2
2
  numpy==1.26.3
3
  #scikit-learn==1.3.2
4
- #matplotlib==3.8.2
5
  datasets==2.16.1
6
  evaluate==0.4.1
7
  accelerate==0.25.0
8
  seqeval==1.2.2
9
  pandas==2.1.4
10
  gradio==4.13.0
11
- pydantic_settings==2.1.0
 
 
1
  transformers[torch]==4.36.2
2
  numpy==1.26.3
3
  #scikit-learn==1.3.2
4
+ matplotlib==3.8.2
5
  datasets==2.16.1
6
  evaluate==0.4.1
7
  accelerate==0.25.0
8
  seqeval==1.2.2
9
  pandas==2.1.4
10
  gradio==4.13.0
11
+ pydantic_settings==2.1.0
12
+ sentencepiece==0.1.99
source/services/predicting_effective_arguments/train/02_classification copy.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from datasets import load_dataset
5
+ from transformers import AutoTokenizer
6
+
7
+
8
+ TARGET = 'discourse_effectiveness'
9
+ TEXT = "discourse_text"
10
+ train_df = pd.read_csv("data/raw_data/train.csv")
11
+ test_df = pd.read_csv("data/raw_data/test.csv")
12
+
13
+
14
+ """
15
+ train_df[TARGET].value_counts(ascending=True).plot.barh()
16
+ plt.title("Frequency of Classes")
17
+ plt.show()
18
+
19
+ train_df['discourse_type'].value_counts(ascending=True).plot.barh()
20
+ plt.title("Frequency of discourse_type")
21
+ plt.show()
22
+
23
+ train_df["Words Per text"] = train_df[TEXT].str.split().apply(len)
24
+ train_df.boxplot("Words Per text", by=TARGET, grid=False, showfliers=False,
25
+ color="black")
26
+ plt.suptitle("")
27
+ plt.xlabel("")
28
+ plt.show()
29
+ """
30
+
31
+
32
+ model_ckpt = "distilbert-base-uncased"
33
+ tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
34
+ tokenizer.model_max_length
35
+ pass