vignettes/linear_algebra_with_torch.Rmd
linear_algebra_with_torch.Rmd
# row-vector
(mr <- matrix(1:10, nrow=1))
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 1 2 3 4 5 6 7 8 9 10
torch$as_tensor(mr)
#> tensor([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=torch.int32)
torch$as_tensor(mr)$shape
#> torch.Size([1, 10])
# column-vector
(mc <- matrix(1:10, ncol=1))
#> [,1]
#> [1,] 1
#> [2,] 2
#> [3,] 3
#> [4,] 4
#> [5,] 5
#> [6,] 6
#> [7,] 7
#> [8,] 8
#> [9,] 9
#> [10,] 10
torch$as_tensor(mc)
#> tensor([[ 1],
#> [ 2],
#> [ 3],
#> [ 4],
#> [ 5],
#> [ 6],
#> [ 7],
#> [ 8],
#> [ 9],
#> [10]], dtype=torch.int32)
torch$as_tensor(mc)$shape
#> torch.Size([10, 1])
(m1 <- matrix(1:24, nrow = 3, byrow = TRUE))
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
#> [1,] 1 2 3 4 5 6 7 8
#> [2,] 9 10 11 12 13 14 15 16
#> [3,] 17 18 19 20 21 22 23 24
(t1 <- torch$as_tensor(m1))
#> tensor([[ 1, 2, 3, 4, 5, 6, 7, 8],
#> [ 9, 10, 11, 12, 13, 14, 15, 16],
#> [17, 18, 19, 20, 21, 22, 23, 24]], dtype=torch.int32)
torch$as_tensor(m1)$shape
#> torch.Size([3, 8])
torch$as_tensor(m1)$size()
#> torch.Size([3, 8])
dim(torch$as_tensor(m1))
#> [1] 3 8
length(torch$as_tensor(m1))
#> [1] 24
(m2 <- matrix(0:99, ncol = 10))
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 0 10 20 30 40 50 60 70 80 90
#> [2,] 1 11 21 31 41 51 61 71 81 91
#> [3,] 2 12 22 32 42 52 62 72 82 92
#> [4,] 3 13 23 33 43 53 63 73 83 93
#> [5,] 4 14 24 34 44 54 64 74 84 94
#> [6,] 5 15 25 35 45 55 65 75 85 95
#> [7,] 6 16 26 36 46 56 66 76 86 96
#> [8,] 7 17 27 37 47 57 67 77 87 97
#> [9,] 8 18 28 38 48 58 68 78 88 98
#> [10,] 9 19 29 39 49 59 69 79 89 99
(t2 <- torch$as_tensor(m2))
#> tensor([[ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
#> [ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91],
#> [ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92],
#> [ 3, 13, 23, 33, 43, 53, 63, 73, 83, 93],
#> [ 4, 14, 24, 34, 44, 54, 64, 74, 84, 94],
#> [ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95],
#> [ 6, 16, 26, 36, 46, 56, 66, 76, 86, 96],
#> [ 7, 17, 27, 37, 47, 57, 67, 77, 87, 97],
#> [ 8, 18, 28, 38, 48, 58, 68, 78, 88, 98],
#> [ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99]], dtype=torch.int32)
t2$shape
#> torch.Size([10, 10])
dim(torch$as_tensor(m2))
#> [1] 10 10
# RGB color image has three axes
(img <- torch$rand(3L, 28L, 28L))
#> tensor([[[0.4920, 0.0248, 0.9115, ..., 0.9847, 0.6634, 0.8705],
#> [0.1208, 0.2527, 0.8676, ..., 0.2648, 0.1660, 0.0724],
#> [0.9072, 0.4989, 0.6459, ..., 0.5450, 0.6089, 0.2706],
#> ...,
#> [0.3260, 0.1801, 0.7411, ..., 0.0582, 0.4318, 0.6760],
#> [0.1173, 0.9366, 0.7650, ..., 0.9617, 0.7982, 0.7168],
#> [0.8768, 0.9510, 0.7686, ..., 0.1089, 0.6308, 0.8282]],
#>
#> [[0.6514, 0.3039, 0.4493, ..., 0.0246, 0.3952, 0.8266],
#> [0.6649, 0.6602, 0.8841, ..., 0.4218, 0.7038, 0.9010],
#> [0.4447, 0.8759, 0.0993, ..., 0.9169, 0.1128, 0.7452],
#> ...,
#> [0.3098, 0.4675, 0.5892, ..., 0.6671, 0.5273, 0.5508],
#> [0.4038, 0.9282, 0.7265, ..., 0.6994, 0.3793, 0.0596],
#> [0.2527, 0.5610, 0.9893, ..., 0.0437, 0.0890, 0.8702]],
#>
#> [[0.9598, 0.9510, 0.7040, ..., 0.0072, 0.5999, 0.8662],
#> [0.6223, 0.7581, 0.9945, ..., 0.9419, 0.7821, 0.0715],
#> [0.2308, 0.2813, 0.5536, ..., 0.0103, 0.8173, 0.0481],
#> ...,
#> [0.9602, 0.0967, 0.0508, ..., 0.9151, 0.6835, 0.5957],
#> [0.5006, 0.7613, 0.5489, ..., 0.5614, 0.1959, 0.7073],
#> [0.1265, 0.1147, 0.3188, ..., 0.2434, 0.5904, 0.7173]]])
img$shape
#> torch.Size([3, 28, 28])
(m3 <- matrix(1:25, ncol = 5))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1 6 11 16 21
#> [2,] 2 7 12 17 22
#> [3,] 3 8 13 18 23
#> [4,] 4 9 14 19 24
#> [5,] 5 10 15 20 25
# transpose
tm3 <- t(m3)
tm3
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1 2 3 4 5
#> [2,] 6 7 8 9 10
#> [3,] 11 12 13 14 15
#> [4,] 16 17 18 19 20
#> [5,] 21 22 23 24 25
(t3 <- torch$as_tensor(m3))
#> tensor([[ 1, 6, 11, 16, 21],
#> [ 2, 7, 12, 17, 22],
#> [ 3, 8, 13, 18, 23],
#> [ 4, 9, 14, 19, 24],
#> [ 5, 10, 15, 20, 25]], dtype=torch.int32)
tt3 <- t3$transpose(dim0 = 0L, dim1 = 1L)
tt3
#> tensor([[ 1, 2, 3, 4, 5],
#> [ 6, 7, 8, 9, 10],
#> [11, 12, 13, 14, 15],
#> [16, 17, 18, 19, 20],
#> [21, 22, 23, 24, 25]], dtype=torch.int32)
m2 <- matrix(0:99, ncol = 10)
(t2 <- torch$as_tensor(m2))
#> tensor([[ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
#> [ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91],
#> [ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92],
#> [ 3, 13, 23, 33, 43, 53, 63, 73, 83, 93],
#> [ 4, 14, 24, 34, 44, 54, 64, 74, 84, 94],
#> [ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95],
#> [ 6, 16, 26, 36, 46, 56, 66, 76, 86, 96],
#> [ 7, 17, 27, 37, 47, 57, 67, 77, 87, 97],
#> [ 8, 18, 28, 38, 48, 58, 68, 78, 88, 98],
#> [ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99]], dtype=torch.int32)
# in R
(v1 <- m2[, 1])
#> [1] 0 1 2 3 4 5 6 7 8 9
(v2 <- m2[10, ])
#> [1] 9 19 29 39 49 59 69 79 89 99
# PyTorch
t2c <- t2[, 1]
t2r <- t2[10, ]
t2c
#> tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.int32)
t2r
#> tensor([ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99], dtype=torch.int32)
In vectors, the vector and its transpose are equal.
(x = torch$ones(5L, 4L))
#> tensor([[1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.]])
(y = torch$ones(5L, 4L))
#> tensor([[1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.]])
x + y
#> tensor([[2., 2., 2., 2.],
#> [2., 2., 2., 2.],
#> [2., 2., 2., 2.],
#> [2., 2., 2., 2.],
#> [2., 2., 2., 2.]])
\[A + B = B + A\]
\[A * B = B * A\]
(x = torch$ones(5L, 4L))
#> tensor([[1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.]])
(y = torch$ones(5L, 4L))
#> tensor([[1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.],
#> [1., 1., 1., 1.]])
\[dot(a,b)_{i,j,k,a,b,c} = \sum_m a_{i,j,k,m}b_{a,b,m,c}\]
a <- np$array(list(list(1, 2), list(3, 4)))
a
#> [,1] [,2]
#> [1,] 1 2
#> [2,] 3 4
b <- np$array(list(list(1, 2), list(3, 4)))
b
#> [,1] [,2]
#> [1,] 1 2
#> [2,] 3 4
np$dot(a, b)
#> [,1] [,2]
#> [1,] 7 10
#> [2,] 15 22
torch.dot()
treats both a and b as 1D vectors (irrespective of their original shape) and computes their inner product.
at <- torch$as_tensor(a)
bt <- torch$as_tensor(b)
torch$dot(at, bt)
#> Error in py_call_impl(callable, dots$args, dots$keywords): RuntimeError: dot: Expected 1-D argument self, but got 2-D
# at %.*% bt
If we perform the same dot product operation in Python, we get the same error:
import torch
import numpy as np
a = np.array([[1, 2], [3, 4]])
a
#> array([[1, 2],
#> [3, 4]])
b = np.array([[1, 2], [3, 4]])
b
#> array([[1, 2],
#> [3, 4]])
np.dot(a, b)
#> array([[ 7, 10],
#> [15, 22]])
at = torch.as_tensor(a)
bt = torch.as_tensor(b)
at
#> tensor([[1, 2],
#> [3, 4]])
bt
#> tensor([[1, 2],
#> [3, 4]])
torch.dot(at, bt)
#> Error in py_call_impl(callable, dots$args, dots$keywords): RuntimeError: dot: Expected 1-D argument self, but got 2-D
#>
#> Detailed traceback:
#> File "<string>", line 1, in <module>
a <- torch$Tensor(list(list(1, 2), list(3, 4)))
b <- torch$Tensor(c(c(1, 2), c(3, 4)))
c <- torch$Tensor(list(list(11, 12), list(13, 14)))
a
#> tensor([[1., 2.],
#> [3., 4.]])
b
#> tensor([1., 2., 3., 4.])
torch$dot(a, b)
#> Error in py_call_impl(callable, dots$args, dots$keywords): RuntimeError: dot: Expected 1-D argument self, but got 2-D
# this is another way of performing dot product in PyTorch
# a$dot(a)
o1 <- torch$ones(2L, 2L)
o2 <- torch$ones(2L, 2L)
o1
#> tensor([[1., 1.],
#> [1., 1.]])
o2
#> tensor([[1., 1.],
#> [1., 1.]])
torch$dot(o1, o2)
#> Error in py_call_impl(callable, dots$args, dots$keywords): RuntimeError: dot: Expected 1-D argument self, but got 2-D
o1$dot(o2)
#> Error in py_call_impl(callable, dots$args, dots$keywords): RuntimeError: dot: Expected 1-D argument self, but got 2-D
# 1D tensors work fine
r = torch$dot(torch$Tensor(list(4L, 2L, 4L)), torch$Tensor(list(3L, 4L, 1L)))
r
#> tensor(24.)
## mm and matmul seem to address the dot product we are looking for in tensors
a = torch$randn(2L, 3L)
b = torch$randn(3L, 4L)
a$mm(b)
#> tensor([[-1.3012, 0.0225, 0.9637, 0.3125],
#> [-3.3288, -0.2954, 0.8563, 1.6626]])
a$matmul(b)
#> tensor([[-1.3012, 0.0225, 0.9637, 0.3125],
#> [-3.3288, -0.2954, 0.8563, 1.6626]])
Here is agood explanation: https://stackoverflow.com/a/44525687/5270873
abt <- torch$mm(a, b)$transpose(dim0=0L, dim1=1L)
abt
#> tensor([[-1.3012, -3.3288],
#> [ 0.0225, -0.2954],
#> [ 0.9637, 0.8563],
#> [ 0.3125, 1.6626]])
at <- a$transpose(dim0=0L, dim1=1L)
bt <- b$transpose(dim0=0L, dim1=1L)
btat <- torch$matmul(bt, at)
btat
#> tensor([[-1.3012, -3.3288],
#> [ 0.0225, -0.2954],
#> [ 0.9637, 0.8563],
#> [ 0.3125, 1.6626]])
\[(A B)^T = B^T A^T\]