File size: 2,302 Bytes
8b414b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from pathlib import Path
from typing import Union

import pandas as pd
from sklearn.model_selection import train_test_split

from src.data_reader import load_train_test_df
from src.metrics import MSEMetric
from src.solutions.base_solution import BaseSolution


class ConstantPredictorSolution(BaseSolution):
    def __init__(self, const=3.0):
        super().__init__()
        self.const = const

    def fit(self, X: pd.DataFrame, y: pd.DataFrame, **kwargs) -> None:
        pass

    def predict(self, X: pd.DataFrame) -> pd.DataFrame:
        submission_df = []

        for _, row in X.iterrows():
            submission_df.append({
                'text_id': row.text_id,
                'cohesion': self.const,
                'syntax': self.const,
                'vocabulary': self.const,
                'phraseology': self.const,
                'grammar': self.const,
                'conventions': self.const
            })

        return pd.DataFrame(submission_df)

    def save(self, directory: Union[str, Path]) -> None:
        directory = Path(directory)
        if not directory.exists():
            directory.mkdir(parents=True)

        path = directory / "weights.ckpt"
        with open(path, 'w') as file:
            file.write(str(self.const))

    def load(self, directory: Union[str, Path]) -> None:
        directory = Path(directory)
        if not directory.exists():
            directory.mkdir(parents=True)

        path = directory / "weights.ckpt"
        with open(path, 'r') as file:
            self.const = float(file.read())

    def to(self, device: str) -> 'BaseSolution':
        return self


def main():
    train_df, test_df = load_train_test_df()

    predictor = ConstantPredictorSolution()

    _, test_data = train_test_split(train_df, test_size=0.2)
    y_pred = predictor.predict(test_data)

    y_true = test_data[['text_id', 'cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']]
    metric = MSEMetric()

    print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")
    print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")

    submission_df = predictor.predict(test_df)

    submission_df.to_csv("submission.csv", index=False)


if __name__ == '__main__':
    main()