Spaces:
Sleeping
Sleeping
File size: 1,636 Bytes
dfd3544 554811e 0987346 6b47160 dfd3544 0a04cd7 554811e 59ae052 6b47160 0987346 0a04cd7 0987346 554811e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
package nn
import (
"math"
"math/rand"
"github.com/go-gota/gota/dataframe"
"gonum.org/v1/gonum/mat"
)
func (nn *NN) TrainTestSplit() {
// now we split the data into training
// and testing based on user specified
// nn.TestSize.
nRows := nn.Df.Nrow()
testRows := int(math.Floor(float64(nRows) * nn.TestSize))
// subset the testing data
// randomly select trainRows number of rows
randStrt := rand.Intn(int(math.Floor(float64(nRows) * nn.TestSize)))
test := nn.Df.Subset([]int{randStrt, randStrt + testRows})
// use what is left for training
allIndices := make([]int, nRows)
for i := range allIndices {
allIndices[i] = i
}
// Remove the test indices using slice append and variadic parameter
trainIndices := append(allIndices[:randStrt], allIndices[randStrt+testRows:]...)
// Create the train DataFrame using the trainIndices
train := nn.Df.Subset(trainIndices)
XTrain := train.Select(nn.Features)
YTrain := train.Select(nn.Target)
XTest := test.Select(nn.Features)
YTest := test.Select(nn.Target)
// to make linear algebra easier & faster,
// we convert these dataframes that we are
// performing potentially expensive computations
// on into gonum matrices since we no longer need the
// column names.
nn.XTrain = df2mat(&XTrain)
nn.YTrain = df2mat(&YTrain)
nn.XTest = df2mat(&XTest)
nn.YTest = df2mat(&YTest)
}
// df2mat -> converts gota dataframe into gonum matrix
func df2mat(df *dataframe.DataFrame) *mat.Dense {
m := mat.NewDense(df.Nrow(), df.Ncol(), nil)
for i := 0; i < df.Nrow(); i++ {
for j := 0; j < df.Ncol(); j++ {
m.Set(i, j, df.Elem(i, j).Float())
}
}
return m
}
|