32 Evaluation of three linear regression models

• Dataset: iris.csv
• Algorithms:
• Simple Linear Regression
• Multiple Regression
• Neural Networks

32.2 Explore the Data

2. Plot scatterplot
3. Plot correlogram
data(iris)
write.csv(iris, file.path(data_raw_dir, "iris.csv"))

Create scatterplot matrix

plot(iris[1:4])
library(corrgram)
#> Registered S3 method overwritten by 'seriation':
#>   method         from
#>   reorder.hclust gclus
corrgram(iris[1:4])
cor(iris[1:4])
#>              Sepal.Length Sepal.Width Petal.Length Petal.Width
#> Sepal.Length        1.000      -0.118        0.872       0.818
#> Sepal.Width        -0.118       1.000       -0.428      -0.366
#> Petal.Length        0.872      -0.428        1.000       0.963
#> Petal.Width         0.818      -0.366        0.963       1.000
cor(
x = iris$Petal.Length, y = iris$Petal.Width)
#> [1] 0.963
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
xlim = c(0.25, 7),
ylim = c(0.25, 2.5))

32.3 Create Training and Test Sets

set.seed(42)
indexes <- sample(
x = 1:150,
size = 100)
train <- iris[indexes, ]
test <- iris[-indexes, ]

32.4 Predict with Simple Linear Regression

simpleModel <- lm(
formula = Petal.Width ~ Petal.Length,
data = train)
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
xlim = c(0.25, 7),
ylim = c(0.25, 2.5))

lines(
x = train$Petal.Length, y = simpleModel$fitted,
col = "red",
lwd = 3)
summary(simpleModel)
#>
#> Call:
#> lm(formula = Petal.Width ~ Petal.Length, data = train)
#>
#> Residuals:
#>     Min      1Q  Median      3Q     Max
#> -0.5684 -0.1279 -0.0307  0.1280  0.6385
#>
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)
#> (Intercept)   -0.3486     0.0476   -7.33  6.7e-11 ***
#> Petal.Length   0.4137     0.0119   34.80  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.209 on 98 degrees of freedom
#> Multiple R-squared:  0.925,  Adjusted R-squared:  0.924
#> F-statistic: 1.21e+03 on 1 and 98 DF,  p-value: <2e-16
simplePredictions <- predict(
object = simpleModel,
newdata = test)
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
xlim = c(0.25, 7),
ylim = c(0.25, 2.5))

points(
x = test$Petal.Length, y = simplePredictions, col = "blue", pch = 4, lwd = 2) points( x = test$Petal.Length,
y = test$Petal.Width, col = "red", pch = 16) simpleRMSE <- sqrt(mean((test$Petal.Width - simplePredictions)^2))
print(simpleRMSE)
#> [1] 0.201

32.5 Predict with Multiple Regression

multipleModel <- lm(
formula = Petal.Width ~ .,
data = train)
summary(multipleModel)
#>
#> Call:
#> lm(formula = Petal.Width ~ ., data = train)
#>
#> Residuals:
#>     Min      1Q  Median      3Q     Max
#> -0.5769 -0.0843 -0.0066  0.0978  0.4731
#>
#> Coefficients:
#>                   Estimate Std. Error t value Pr(>|t|)
#> (Intercept)        -0.5088     0.2277   -2.23  0.02779 *
#> Sepal.Length       -0.0486     0.0593   -0.82  0.41435
#> Sepal.Width         0.2032     0.0594    3.42  0.00092 ***
#> Petal.Length        0.2103     0.0641    3.28  0.00146 **
#> Speciesversicolor   0.6769     0.1583    4.28  4.5e-05 ***
#> Speciesvirginica    1.0762     0.2126    5.06  2.1e-06 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.176 on 94 degrees of freedom
#> Multiple R-squared:  0.949,  Adjusted R-squared:  0.947
#> F-statistic:  352 on 5 and 94 DF,  p-value: <2e-16
multiplePredictions <- predict(
object = multipleModel,
newdata = test)
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
xlim = c(0.25, 7),
ylim = c(0.25, 2.5))

points(
x = test$Petal.Length, y = multiplePredictions, col = "blue", pch = 4, lwd = 2) points( x = test$Petal.Length,
y = test$Petal.Width, col = "red", pch = 16) multipleRMSE <- sqrt(mean((test$Petal.Width - multiplePredictions)^2))
print(multipleRMSE)
#> [1] 0.15

32.6 5. Predict with Neural Network Regression

normalize <- function(x) {
(x - min(x)) / (max(x) - min(x)) - 0.5
}
denormalize <- function(x, y) {
((x + 0.5) * (max(y) - min(y))) + min(y)
}
scaledIris <- data.frame(
Sepal.Length = normalize(iris$Sepal.Length), Sepal.Width = normalize(iris$Sepal.Width),
Petal.Length = normalize(iris$Petal.Length), Petal.Width = normalize(iris$Petal.Width),
Species = iris$Species) scaledTrain <- scaledIris[indexes, ] scaledTest <- scaledIris[-indexes, ] library(nnet) neuralRegressor <- nnet( formula = Petal.Width ~ ., data = scaledTrain, linout = TRUE, skip = TRUE, size = 4, decay = 0.0001, maxit = 500) #> # weights: 34 #> initial value 64.175158 #> iter 10 value 0.498340 #> iter 20 value 0.439307 #> iter 30 value 0.419373 #> iter 40 value 0.415119 #> iter 50 value 0.412305 #> iter 60 value 0.410862 #> iter 70 value 0.404854 #> iter 80 value 0.402606 #> iter 90 value 0.397903 #> iter 100 value 0.396295 #> iter 110 value 0.394292 #> iter 120 value 0.392628 #> iter 130 value 0.390306 #> iter 140 value 0.389577 #> iter 150 value 0.388916 #> iter 160 value 0.387607 #> iter 170 value 0.382857 #> iter 180 value 0.377332 #> iter 190 value 0.371974 #> iter 200 value 0.366019 #> iter 210 value 0.357405 #> iter 220 value 0.351831 #> iter 230 value 0.347613 #> iter 240 value 0.344466 #> iter 250 value 0.341515 #> iter 260 value 0.340828 #> iter 270 value 0.340236 #> iter 280 value 0.338736 #> iter 290 value 0.337991 #> iter 300 value 0.336182 #> iter 310 value 0.333793 #> iter 320 value 0.331206 #> iter 330 value 0.330171 #> iter 340 value 0.329803 #> iter 350 value 0.329587 #> iter 360 value 0.329343 #> iter 370 value 0.328909 #> iter 380 value 0.327579 #> iter 390 value 0.326227 #> iter 400 value 0.323911 #> iter 410 value 0.322154 #> iter 420 value 0.320878 #> iter 430 value 0.320122 #> iter 440 value 0.319153 #> iter 450 value 0.318239 #> iter 460 value 0.316869 #> iter 470 value 0.315668 #> iter 480 value 0.314685 #> iter 490 value 0.314604 #> iter 500 value 0.314257 #> final value 0.314257 #> stopped after 500 iterations library(NeuralNetTools) plotnet(neuralRegressor) scaledPredictions <- predict( object = neuralRegressor, newdata = scaledTest) neuralPredictions <- denormalize( x = scaledPredictions, y = iris$Petal.Width)
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
xlim = c(0.25, 7),
ylim = c(0.25, 2.5))

points(
x = test$Petal.Length, y = neuralPredictions, col = "blue", pch = 4, lwd = 2) points( x = test$Petal.Length,
y = test$Petal.Width, col = "red", pch = 16) neuralRMSE <- sqrt(mean((test$Petal.Width - neuralPredictions)^2))
print(neuralRMSE)
#> [1] 0.188

32.7 6. Evaluate all the regression Models

print(simpleRMSE)
#> [1] 0.201
print(multipleRMSE)
#> [1] 0.15
print(neuralRMSE)
#> [1] 0.188