File size: 1,636 Bytes
dfd3544
 
554811e
 
 
0987346
 
 
6b47160
dfd3544
0a04cd7
554811e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59ae052
 
 
 
6b47160
0987346
 
 
 
 
 
 
 
 
 
0a04cd7
0987346
 
 
 
 
 
 
 
 
554811e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package nn

import (
	"math"
	"math/rand"

	"github.com/go-gota/gota/dataframe"
	"gonum.org/v1/gonum/mat"
)

func (nn *NN) TrainTestSplit() {
	// now we split the data into training
	// and testing based on user specified
	// nn.TestSize.
	nRows := nn.Df.Nrow()
	testRows := int(math.Floor(float64(nRows) * nn.TestSize))

	// subset the testing data
	// randomly select trainRows number of rows
	randStrt := rand.Intn(int(math.Floor(float64(nRows) * nn.TestSize)))
	test := nn.Df.Subset([]int{randStrt, randStrt + testRows})

	// use what is left for training
	allIndices := make([]int, nRows)
	for i := range allIndices {
		allIndices[i] = i
	}

	// Remove the test indices using slice append and variadic parameter
	trainIndices := append(allIndices[:randStrt], allIndices[randStrt+testRows:]...)

	// Create the train DataFrame using the trainIndices
	train := nn.Df.Subset(trainIndices)

	XTrain := train.Select(nn.Features)
	YTrain := train.Select(nn.Target)
	XTest := test.Select(nn.Features)
	YTest := test.Select(nn.Target)

	// to make linear algebra easier & faster,
	// we convert these dataframes that we are
	// performing potentially expensive computations
	// on into gonum matrices since we no longer need the
	// column names.
	nn.XTrain = df2mat(&XTrain)
	nn.YTrain = df2mat(&YTrain)
	nn.XTest = df2mat(&XTest)
	nn.YTest = df2mat(&YTest)
}

// df2mat -> converts gota dataframe into gonum matrix
func df2mat(df *dataframe.DataFrame) *mat.Dense {
	m := mat.NewDense(df.Nrow(), df.Ncol(), nil)
	for i := 0; i < df.Nrow(); i++ {
		for j := 0; j < df.Ncol(); j++ {
			m.Set(i, j, df.Elem(i, j).Float())
		}
	}
	return m
}