32 Evaluation of three linear regression models

  • Dataset: iris.csv
  • Algorithms:
    • Simple Linear Regression
    • Multiple Regression
    • Neural Networks

32.2 Explore the Data

  1. Load Iris data
  2. Plot scatterplot
  3. Plot correlogram
data(iris)
write.csv(iris, file.path(data_raw_dir, "iris.csv"))

Create scatterplot matrix

plot(iris[1:4])
library(corrgram)
#> Registered S3 method overwritten by 'seriation':
#>   method         from 
#>   reorder.hclust gclus
corrgram(iris[1:4])
cor(iris[1:4])
#>              Sepal.Length Sepal.Width Petal.Length Petal.Width
#> Sepal.Length        1.000      -0.118        0.872       0.818
#> Sepal.Width        -0.118       1.000       -0.428      -0.366
#> Petal.Length        0.872      -0.428        1.000       0.963
#> Petal.Width         0.818      -0.366        0.963       1.000
cor(
  x = iris$Petal.Length, 
  y = iris$Petal.Width)
#> [1] 0.963
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width,
  xlim = c(0.25, 7),
  ylim = c(0.25, 2.5))

32.3 Create Training and Test Sets

indexes <- sample(
  x = 1:150, 
  size = 100)
train <- iris[indexes, ]
test <- iris[-indexes, ]

32.4 Predict with Simple Linear Regression

simpleModel <- lm(
  formula = Petal.Width ~ Petal.Length,
  data = train)
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width,
  xlim = c(0.25, 7),
  ylim = c(0.25, 2.5))
  
lines(
  x = train$Petal.Length,
  y = simpleModel$fitted, 
  col = "red",
  lwd = 3)
summary(simpleModel)
#> 
#> Call:
#> lm(formula = Petal.Width ~ Petal.Length, data = train)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -0.5684 -0.1279 -0.0307  0.1280  0.6385 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   -0.3486     0.0476   -7.33  6.7e-11 ***
#> Petal.Length   0.4137     0.0119   34.80  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.209 on 98 degrees of freedom
#> Multiple R-squared:  0.925,  Adjusted R-squared:  0.924 
#> F-statistic: 1.21e+03 on 1 and 98 DF,  p-value: <2e-16
simplePredictions <- predict(
  object = simpleModel,
  newdata = test)
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width,
  xlim = c(0.25, 7),
  ylim = c(0.25, 2.5))
  
points(
  x = test$Petal.Length,
  y = simplePredictions,
  col = "blue",
  pch = 4,
  lwd = 2)

points(
  x = test$Petal.Length,
  y = test$Petal.Width,
  col = "red",
  pch = 16)
simpleRMSE <- sqrt(mean((test$Petal.Width - simplePredictions)^2))
print(simpleRMSE)
#> [1] 0.201

32.5 Predict with Multiple Regression

multipleModel <- lm(
  formula = Petal.Width ~ .,
  data = train)
summary(multipleModel)
#> 
#> Call:
#> lm(formula = Petal.Width ~ ., data = train)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -0.5769 -0.0843 -0.0066  0.0978  0.4731 
#> 
#> Coefficients:
#>                   Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)        -0.5088     0.2277   -2.23  0.02779 *  
#> Sepal.Length       -0.0486     0.0593   -0.82  0.41435    
#> Sepal.Width         0.2032     0.0594    3.42  0.00092 ***
#> Petal.Length        0.2103     0.0641    3.28  0.00146 ** 
#> Speciesversicolor   0.6769     0.1583    4.28  4.5e-05 ***
#> Speciesvirginica    1.0762     0.2126    5.06  2.1e-06 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.176 on 94 degrees of freedom
#> Multiple R-squared:  0.949,  Adjusted R-squared:  0.947 
#> F-statistic:  352 on 5 and 94 DF,  p-value: <2e-16
multiplePredictions <- predict(
  object = multipleModel,
  newdata = test)
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width,
  xlim = c(0.25, 7),
  ylim = c(0.25, 2.5))
  
points(
  x = test$Petal.Length,
  y = multiplePredictions,
  col = "blue",
  pch = 4,
  lwd = 2)

points(
  x = test$Petal.Length,
  y = test$Petal.Width,
  col = "red",
  pch = 16)
multipleRMSE <- sqrt(mean((test$Petal.Width - multiplePredictions)^2))
print(multipleRMSE)
#> [1] 0.15

32.6 5. Predict with Neural Network Regression

normalize <- function(x) {
  (x - min(x)) / (max(x) - min(x)) - 0.5
}
denormalize <- function(x, y) {
  ((x + 0.5) * (max(y) - min(y))) + min(y)
}
scaledIris <- data.frame(
  Sepal.Length = normalize(iris$Sepal.Length),
  Sepal.Width = normalize(iris$Sepal.Width),
  Petal.Length = normalize(iris$Petal.Length),
  Petal.Width = normalize(iris$Petal.Width),
  Species = iris$Species)
scaledTrain <- scaledIris[indexes, ]
scaledTest <- scaledIris[-indexes, ]
library(nnet)

neuralRegressor <- nnet(
  formula = Petal.Width ~ .,
  data = scaledTrain,
  linout = TRUE,
  skip = TRUE,
  size = 4,
  decay = 0.0001,
  maxit = 500)
#> # weights:  34
#> initial  value 64.175158 
#> iter  10 value 0.498340
#> iter  20 value 0.439307
#> iter  30 value 0.419373
#> iter  40 value 0.415119
#> iter  50 value 0.412305
#> iter  60 value 0.410862
#> iter  70 value 0.404854
#> iter  80 value 0.402606
#> iter  90 value 0.397903
#> iter 100 value 0.396295
#> iter 110 value 0.394292
#> iter 120 value 0.392628
#> iter 130 value 0.390306
#> iter 140 value 0.389577
#> iter 150 value 0.388916
#> iter 160 value 0.387607
#> iter 170 value 0.382857
#> iter 180 value 0.377332
#> iter 190 value 0.371974
#> iter 200 value 0.366019
#> iter 210 value 0.357405
#> iter 220 value 0.351831
#> iter 230 value 0.347613
#> iter 240 value 0.344466
#> iter 250 value 0.341515
#> iter 260 value 0.340828
#> iter 270 value 0.340236
#> iter 280 value 0.338736
#> iter 290 value 0.337991
#> iter 300 value 0.336182
#> iter 310 value 0.333793
#> iter 320 value 0.331206
#> iter 330 value 0.330171
#> iter 340 value 0.329803
#> iter 350 value 0.329587
#> iter 360 value 0.329343
#> iter 370 value 0.328909
#> iter 380 value 0.327579
#> iter 390 value 0.326227
#> iter 400 value 0.323911
#> iter 410 value 0.322154
#> iter 420 value 0.320878
#> iter 430 value 0.320122
#> iter 440 value 0.319153
#> iter 450 value 0.318239
#> iter 460 value 0.316869
#> iter 470 value 0.315668
#> iter 480 value 0.314685
#> iter 490 value 0.314604
#> iter 500 value 0.314257
#> final  value 0.314257 
#> stopped after 500 iterations
library(NeuralNetTools)

plotnet(neuralRegressor)
scaledPredictions <- predict(
  object = neuralRegressor, 
  newdata = scaledTest)
neuralPredictions <- denormalize(
  x = scaledPredictions, 
  y = iris$Petal.Width)
plot(
  x = iris$Petal.Length, 
  y = iris$Petal.Width,
  xlim = c(0.25, 7),
  ylim = c(0.25, 2.5))
  
points(
  x = test$Petal.Length,
  y = neuralPredictions,
  col = "blue",
  pch = 4,
  lwd = 2)

points(
  x = test$Petal.Length,
  y = test$Petal.Width,
  col = "red",
  pch = 16)
neuralRMSE <- sqrt(mean((test$Petal.Width - neuralPredictions)^2))
print(neuralRMSE)
#> [1] 0.188

32.7 6. Evaluate all the regression Models

print(simpleRMSE)
#> [1] 0.201
print(multipleRMSE)
#> [1] 0.15
print(neuralRMSE)
#> [1] 0.188