Chapter 3 “Working with data.table”
Last update: Thu Oct 22 16:46:28 2020 -0500 (54a46ea04)
3.1 Load PyTorch libraries
library(rTorch)
<- import("torch")
torch <- import("torchvision")
torchvision <- import("torch.nn")
nn <- import("torchvision.transforms")
transforms <- import("torchvision.datasets")
dsets <- import_builtins()
builtins <- import("numpy") np
3.2 Load dataset
## Dataset iteration batch settings
# folders where the images are located
= './mnist_png_full/training/'
train_data_path = './mnist_png_full/testing/' test_data_path
3.3 Datasets without normalization
= torchvision$datasets$ImageFolder(root = train_data_path,
train_dataset transform = torchvision$transforms$ToTensor()
)
print(train_dataset)
#> Dataset ImageFolder
#> Number of datapoints: 60000
#> Root location: ./mnist_png_full/training/
#> StandardTransform
#> Transform: ToTensor()
3.4 Using data.table
library(data.table)
library(tictoc)
tic()
<- list(
fun_list numel = c("numel"),
sum = c("sum", "item"),
mean = c("mean", "item"),
std = c("std", "item"),
med = c("median", "item"),
max = c("max", "item"),
min = c("min", "item")
)
<- seq(0L, 599L)
idx
<- function(x) py_get_item(train_dataset, x)[[0]]
fun_get_tensor
<- function(x, str_fun) {
stat_fun <- paste0("fun_get_tensor(x)", "$", str_fun, "()")
fun_var sapply(idx, function(x)
ifelse(is.numeric(eval(parse(text = fun_var))), # size return character
eval(parse(text = fun_var)), # all else are numeric
as.character(eval(parse(text = fun_var)))))
}
<- data.table(ridx = idx+1,
dt do.call(data.table,
lapply(
sapply(fun_list, function(x) paste(x, collapse = "()$")),
function(y) stat_fun(1, y)
)
) )
Summary statistics:
head(dt)
#> ridx numel sum mean std med max min
#> 1: 1 2352 366 0.156 0.329 0 1 0
#> 2: 2 2352 284 0.121 0.297 0 1 0
#> 3: 3 2352 645 0.274 0.420 0 1 0
#> 4: 4 2352 410 0.174 0.355 0 1 0
#> 5: 5 2352 321 0.137 0.312 0 1 0
#> 6: 6 2352 654 0.278 0.421 0 1 0
Elapsed time per size of sample:
toc()
# 60 1.266 sec elapsed
# 600 11.798 sec elapsed;
# 6000 119.256 sec elapsed;
# 60000 1117.619 sec elapsed
#> 8.93 sec elapsed