Spaces:
Sleeping
Sleeping
package nn | |
import ( | |
"math" | |
"math/rand" | |
"github.com/go-gota/gota/dataframe" | |
"gonum.org/v1/gonum/mat" | |
) | |
func (nn *NN) TrainTestSplit() { | |
// now we split the data into training | |
// and testing based on user specified | |
// nn.TestSize. | |
nRows := nn.Df.Nrow() | |
testRows := int(math.Floor(float64(nRows) * nn.TestSize)) | |
// subset the testing data | |
// randomly select trainRows number of rows | |
randStrt := rand.Intn(int(math.Floor(float64(nRows) * nn.TestSize))) | |
test := nn.Df.Subset([]int{randStrt, randStrt + testRows}) | |
// use what is left for training | |
allIndices := make([]int, nRows) | |
for i := range allIndices { | |
allIndices[i] = i | |
} | |
// Remove the test indices using slice append and variadic parameter | |
trainIndices := append(allIndices[:randStrt], allIndices[randStrt+testRows:]...) | |
// Create the train DataFrame using the trainIndices | |
train := nn.Df.Subset(trainIndices) | |
XTrain := train.Select(nn.Features) | |
YTrain := train.Select(nn.Target) | |
XTest := test.Select(nn.Features) | |
YTest := test.Select(nn.Target) | |
// to make linear algebra easier & faster, | |
// we convert these dataframes that we are | |
// performing potentially expensive computations | |
// on into gonum matrices since we no longer need the | |
// column names. | |
nn.XTrain = df2mat(&XTrain) | |
nn.YTrain = df2mat(&YTrain) | |
nn.XTest = df2mat(&XTest) | |
nn.YTest = df2mat(&YTest) | |
} | |
// df2mat -> converts gota dataframe into gonum matrix | |
func df2mat(df *dataframe.DataFrame) *mat.Dense { | |
m := mat.NewDense(df.Nrow(), df.Ncol(), nil) | |
for i := 0; i < df.Nrow(); i++ { | |
for j := 0; j < df.Ncol(); j++ { | |
m.Set(i, j, df.Elem(i, j).Float()) | |
} | |
} | |
return m | |
} | |