Spaces:
Runtime error
Runtime error
add
Browse files
requirements.txt
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
transformers[torch]==4.36.2
|
2 |
numpy==1.26.3
|
3 |
#scikit-learn==1.3.2
|
4 |
-
|
5 |
datasets==2.16.1
|
6 |
evaluate==0.4.1
|
7 |
accelerate==0.25.0
|
8 |
seqeval==1.2.2
|
9 |
pandas==2.1.4
|
10 |
gradio==4.13.0
|
11 |
-
pydantic_settings==2.1.0
|
|
|
|
1 |
transformers[torch]==4.36.2
|
2 |
numpy==1.26.3
|
3 |
#scikit-learn==1.3.2
|
4 |
+
matplotlib==3.8.2
|
5 |
datasets==2.16.1
|
6 |
evaluate==0.4.1
|
7 |
accelerate==0.25.0
|
8 |
seqeval==1.2.2
|
9 |
pandas==2.1.4
|
10 |
gradio==4.13.0
|
11 |
+
pydantic_settings==2.1.0
|
12 |
+
sentencepiece==0.1.99
|
source/services/predicting_effective_arguments/train/02_classification copy.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from datasets import load_dataset
|
5 |
+
from transformers import AutoTokenizer
|
6 |
+
|
7 |
+
|
8 |
+
TARGET = 'discourse_effectiveness'
|
9 |
+
TEXT = "discourse_text"
|
10 |
+
train_df = pd.read_csv("data/raw_data/train.csv")
|
11 |
+
test_df = pd.read_csv("data/raw_data/test.csv")
|
12 |
+
|
13 |
+
|
14 |
+
"""
|
15 |
+
train_df[TARGET].value_counts(ascending=True).plot.barh()
|
16 |
+
plt.title("Frequency of Classes")
|
17 |
+
plt.show()
|
18 |
+
|
19 |
+
train_df['discourse_type'].value_counts(ascending=True).plot.barh()
|
20 |
+
plt.title("Frequency of discourse_type")
|
21 |
+
plt.show()
|
22 |
+
|
23 |
+
train_df["Words Per text"] = train_df[TEXT].str.split().apply(len)
|
24 |
+
train_df.boxplot("Words Per text", by=TARGET, grid=False, showfliers=False,
|
25 |
+
color="black")
|
26 |
+
plt.suptitle("")
|
27 |
+
plt.xlabel("")
|
28 |
+
plt.show()
|
29 |
+
"""
|
30 |
+
|
31 |
+
|
32 |
+
model_ckpt = "distilbert-base-uncased"
|
33 |
+
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
34 |
+
tokenizer.model_max_length
|
35 |
+
pass
|