File size: 2,302 Bytes
8b414b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from pathlib import Path
from typing import Union
import pandas as pd
from sklearn.model_selection import train_test_split
from src.data_reader import load_train_test_df
from src.metrics import MSEMetric
from src.solutions.base_solution import BaseSolution
class ConstantPredictorSolution(BaseSolution):
def __init__(self, const=3.0):
super().__init__()
self.const = const
def fit(self, X: pd.DataFrame, y: pd.DataFrame, **kwargs) -> None:
pass
def predict(self, X: pd.DataFrame) -> pd.DataFrame:
submission_df = []
for _, row in X.iterrows():
submission_df.append({
'text_id': row.text_id,
'cohesion': self.const,
'syntax': self.const,
'vocabulary': self.const,
'phraseology': self.const,
'grammar': self.const,
'conventions': self.const
})
return pd.DataFrame(submission_df)
def save(self, directory: Union[str, Path]) -> None:
directory = Path(directory)
if not directory.exists():
directory.mkdir(parents=True)
path = directory / "weights.ckpt"
with open(path, 'w') as file:
file.write(str(self.const))
def load(self, directory: Union[str, Path]) -> None:
directory = Path(directory)
if not directory.exists():
directory.mkdir(parents=True)
path = directory / "weights.ckpt"
with open(path, 'r') as file:
self.const = float(file.read())
def to(self, device: str) -> 'BaseSolution':
return self
def main():
train_df, test_df = load_train_test_df()
predictor = ConstantPredictorSolution()
_, test_data = train_test_split(train_df, test_size=0.2)
y_pred = predictor.predict(test_data)
y_true = test_data[['text_id', 'cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']]
metric = MSEMetric()
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")
submission_df = predictor.predict(test_df)
submission_df.to_csv("submission.csv", index=False)
if __name__ == '__main__':
main()
|