Chapter 14 Working with a data●frame
Last update: Thu Nov 19 14:24:08 2020 -0600 (ca4f8b4a0)
14.1 Load PyTorch libraries
14.2 Load dataset
# folders where the images are located
train_data_path = './mnist_png_full/training/'
test_data_path = './mnist_png_full/testing/'
# read the datasets without normalization
train_dataset = torchvision$datasets$ImageFolder(root = train_data_path,
transform = torchvision$transforms$ToTensor()
)
print(train_dataset)
#> Dataset ImageFolder
#> Number of datapoints: 60000
#> Root location: ./mnist_png_full/training/
#> StandardTransform
#> Transform: ToTensor()
14.3 Summary statistics for tensors
14.3.1 Using data.frame
library(tictoc)
tic()
fun_list <- list(
size = c("size"),
numel = c("numel"),
sum = c("sum", "item"),
mean = c("mean", "item"),
std = c("std", "item"),
med = c("median", "item"),
max = c("max", "item"),
min = c("min", "item")
)
idx <- seq(0L, 599L) # how many samples
fun_get_tensor <- function(x) py_get_item(train_dataset, x)[[0]]
stat_fun <- function(x, str_fun) {
fun_var <- paste0("fun_get_tensor(x)", "$", str_fun, "()")
sapply(idx, function(x)
ifelse(is.numeric(eval(parse(text = fun_var))), # size return chracater
eval(parse(text = fun_var)), # all else are numeric
as.character(eval(parse(text = fun_var)))))
}
df <- data.frame(ridx = idx+1, # index number for the sample
do.call(data.frame,
lapply(
sapply(fun_list, function(x) paste(x, collapse = "()$")),
function(y) stat_fun(1, y)
)
)
)
Summary statistics:
#> ridx size numel sum mean std med max min
#> 1 1 torch.Size([3, 28, 28]) 2352 366 0.156 0.329 0 1.000 0
#> 2 2 torch.Size([3, 28, 28]) 2352 284 0.121 0.297 0 1.000 0
#> 3 3 torch.Size([3, 28, 28]) 2352 645 0.274 0.420 0 1.000 0
#> 4 4 torch.Size([3, 28, 28]) 2352 410 0.174 0.355 0 1.000 0
#> 5 5 torch.Size([3, 28, 28]) 2352 321 0.137 0.312 0 1.000 0
#> 6 6 torch.Size([3, 28, 28]) 2352 654 0.278 0.421 0 1.000 0
#> 7 7 torch.Size([3, 28, 28]) 2352 496 0.211 0.374 0 1.000 0
#> 8 8 torch.Size([3, 28, 28]) 2352 549 0.233 0.399 0 1.000 0
#> 9 9 torch.Size([3, 28, 28]) 2352 449 0.191 0.365 0 1.000 0
#> 10 10 torch.Size([3, 28, 28]) 2352 465 0.198 0.367 0 1.000 0
#> 11 11 torch.Size([3, 28, 28]) 2352 383 0.163 0.338 0 1.000 0
#> 12 12 torch.Size([3, 28, 28]) 2352 499 0.212 0.378 0 1.000 0
#> 13 13 torch.Size([3, 28, 28]) 2352 313 0.133 0.309 0 0.996 0
#> 14 14 torch.Size([3, 28, 28]) 2352 360 0.153 0.325 0 1.000 0
#> 15 15 torch.Size([3, 28, 28]) 2352 435 0.185 0.358 0 0.996 0
#> 16 16 torch.Size([3, 28, 28]) 2352 429 0.182 0.358 0 1.000 0
#> 17 17 torch.Size([3, 28, 28]) 2352 596 0.254 0.408 0 1.000 0
#> 18 18 torch.Size([3, 28, 28]) 2352 527 0.224 0.392 0 1.000 0
#> 19 19 torch.Size([3, 28, 28]) 2352 303 0.129 0.301 0 1.000 0
#> 20 20 torch.Size([3, 28, 28]) 2352 458 0.195 0.364 0 1.000 0
Elapsed time per size of sample:
#> 17.327 sec elapsed