Spaces:
Runtime error
Runtime error
| #!/usr/local/bin/python3 | |
| # avenir-python: Machine Learning | |
| # Author: Pranab Ghosh | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); you | |
| # may not use this file except in compliance with the License. You may | |
| # obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
| # implied. See the License for the specific language governing | |
| # permissions and limitations under the License. | |
| # Package imports | |
| import os | |
| import sys | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import sklearn as sk | |
| import matplotlib | |
| import random | |
| import jprops | |
| from io import StringIO | |
| from sklearn.model_selection import cross_val_score | |
| import joblib | |
| from random import randint | |
| from io import StringIO | |
| from sklearn.linear_model import LinearRegression | |
| sys.path.append(os.path.abspath("../lib")) | |
| from util import * | |
| from mlutil import * | |
| from pasearch import * | |
| class BaseRegressor(object): | |
| """ | |
| base regression class | |
| """ | |
| def __init__(self, configFile, defValues): | |
| """ | |
| intializer | |
| """ | |
| defValues["common.mode"] = ("train", None) | |
| defValues["common.model.directory"] = ("model", None) | |
| defValues["common.model.file"] = (None, None) | |
| defValues["common.scale.file.path"] = (None, "missing scale file path") | |
| defValues["common.preprocessing"] = (None, None) | |
| defValues["common.verbose"] = (False, None) | |
| defValues["train.data.file"] = (None, "missing training data file") | |
| defValues["train.data.fields"] = (None, "missing training data field ordinals") | |
| defValues["train.data.feature.fields"] = (None, "missing training data feature field ordinals") | |
| defValues["train.data.out.field"] = (None, "missing out field ordinal") | |
| self.config = Configuration(configFile, defValues) | |
| self.featData = None | |
| self.outData = None | |
| self.regressor = None | |
| self.verbose = self.config.getBooleanConfig("common.verbose")[0] | |
| self.mode = self.config.getBooleanConfig("common.mode")[0] | |
| logFilePath = self.config.getStringConfig("common.logging.file")[0] | |
| logLevName = self.config.getStringConfig("common.logging.level")[0] | |
| self.logger = createLogger(__name__, logFilePath, logLevName) | |
| self.logger.info("********* starting session") | |
| def initConfig(self, configFile, defValues): | |
| """ | |
| initialize config | |
| """ | |
| self.config = Configuration(configFile, defValues) | |
| def getConfig(self): | |
| """ | |
| get config object | |
| """ | |
| return self.config | |
| def setConfigParam(self, name, value): | |
| """ | |
| set config param | |
| """ | |
| self.config.setParam(name, value) | |
| def getMode(self): | |
| """ | |
| get mode | |
| """ | |
| return self.mode | |
| def train(self): | |
| """ | |
| train model | |
| """ | |
| #build model | |
| self.buildModel() | |
| # training data | |
| if self.featData is None: | |
| (featData, outData) = self.prepData("train") | |
| (self.featData, self.outData) = (featData, outData) | |
| else: | |
| (featData, outData) = (self.featData, self.outData) | |
| # parameters | |
| modelSave = self.config.getBooleanConfig("train.model.save")[0] | |
| #train | |
| self.logger.info("...training model") | |
| self.regressor.fit(featData, outData) | |
| rsqScore = self.regressor.score(featData, outData) | |
| coef = self.regressor.coef_ | |
| intc = self.regressor.intercept_ | |
| result = (rsqScore, intc, coef) | |
| if modelSave: | |
| self.logger.info("...saving model") | |
| modelFilePath = self.getModelFilePath() | |
| joblib.dump(self.regressor, modelFilePath) | |
| return result | |
| def validate(self): | |
| # create model | |
| self.prepModel() | |
| # prepare test data | |
| (featData, outDataActual) = self.prepData("validate") | |
| #predict | |
| self.logger.info("...predicting") | |
| outDataPred = self.regressor.predict(featData) | |
| #error | |
| rsqScore = self.regressor.score(featData, outDataActual) | |
| result = (outDataPred, rsqScore) | |
| return result | |
| def predict(self): | |
| """ | |
| predict using trained model | |
| """ | |
| # create model | |
| self.prepModel() | |
| # prepare test data | |
| featData = self.prepData("predict")[0] | |
| #predict | |
| self.logger.info("...predicting") | |
| outData = self.regressor.predict(featData) | |
| return outData | |
| def prepData(self, mode): | |
| """ | |
| loads and prepares data for training and validation | |
| """ | |
| # parameters | |
| key = mode + ".data.file" | |
| dataFile = self.config.getStringConfig(key)[0] | |
| key = mode + ".data.fields" | |
| fieldIndices = self.config.getStringConfig(key)[0] | |
| if not fieldIndices is None: | |
| fieldIndices = strToIntArray(fieldIndices, ",") | |
| key = mode + ".data.feature.fields" | |
| featFieldIndices = self.config.getStringConfig(key)[0] | |
| if not featFieldIndices is None: | |
| featFieldIndices = strToIntArray(featFieldIndices, ",") | |
| if not mode == "predict": | |
| key = mode + ".data.out.field" | |
| outFieldIndex = self.config.getIntConfig(key)[0] | |
| #load data | |
| (data, featData) = loadDataFile(dataFile, ",", fieldIndices, featFieldIndices) | |
| if (self.config.getStringConfig("common.preprocessing")[0] == "scale"): | |
| featData = sk.preprocessing.scale(featData) | |
| outData = None | |
| if not mode == "predict": | |
| outData = extrColumns(data, outFieldIndex) | |
| return (featData, outData) | |
| def prepModel(self): | |
| """ | |
| load saved model or train model | |
| """ | |
| useSavedModel = self.config.getBooleanConfig("predict.use.saved.model")[0] | |
| if (useSavedModel and not self.regressor): | |
| # load saved model | |
| self.logger.info("...loading saved model") | |
| modelFilePath = self.getModelFilePath() | |
| self.regressor = joblib.load(modelFilePath) | |
| else: | |
| # train model | |
| self.train() | |
| class LinearRegressor(BaseRegressor): | |
| """ | |
| linear regression | |
| """ | |
| def __init__(self, configFile): | |
| defValues = {} | |
| defValues["train.normalize"] = (False, None) | |
| super(LinearRegressor, self).__init__(configFile, defValues) | |
| def buildModel(self): | |
| """ | |
| builds model object | |
| """ | |
| self.logger.info("...building linear regression model") | |
| normalize = self.config.getBooleanConfig("train.normalize")[0] | |
| self.regressor = LinearRegression(normalize=normalize) | |
| class ElasticNetRegressor(BaseRegressor): | |
| """ | |
| elastic net regression | |
| """ | |
| def __init__(self, configFile): | |
| defValues = {} | |
| defValues["train.alpha"] = (1.0, None) | |
| defValues["train.loneratio"] = (0.5, None) | |
| defValues["train.normalize"] = (False, None) | |
| defValues["train.precompute"] = (False, None) | |
| defValues["train.max.iter"] = (1000, None) | |
| defValues["train.tol"] = (0.0001, None) | |
| defValues["train.random.state"] = (None, None) | |
| defValues["train.selection"] = ("cyclic", None) | |
| super(ElasticNetRegressor, self).__init__(configFile, defValues) | |
| def buildModel(self): | |
| """ | |
| builds model object | |
| """ | |
| self.logger.info("...building elastic net regression model") | |
| alpha = self.config.getFloatConfig("train.alpha")[0] | |
| loneratio = self.config.getFloatConfig("train.loneratio")[0] | |
| normalize = self.config.getBooleanConfig("train.normalize")[0] | |
| precompute = self.config.getBooleanConfig("train.precompute")[0] | |
| maxIter = self.config.getIntConfig("train.max.iter")[0] | |
| tol = self.config.getFloatConfig("train.tol")[0] | |
| randState = self.config.getIntConfig("train.random.state")[0] | |
| selection = self.config.getIntConfig("train.selection")[0] | |
| self.regressor = ElasticNet(alpha=alpha, l1_ratio=loneratio, normalize=normalize, precompute=precompute, | |
| max_iter=maxIter, tol=tol, random_state=randState, selection=selection) | |