1
+ #include < iostream>
2
+ #include < chrono>
3
+ #include < torch/torch.h>
4
+ #include < Eigen/Dense>
5
+
6
+ int main () {
7
+ // Define large, mid and small matrix dimensions
8
+ int large_rows = 1000 ;
9
+ int large_cols = 1000 ;
10
+ int mid_rows = 100 ;
11
+ int mid_cols = 100 ;
12
+ int small_rows = 10 ;
13
+ int small_cols = 10 ;
14
+
15
+ // Create large, mid and small torch::Tensors and Eigen matrices
16
+ torch::Tensor large_torch_tensor = torch::rand ({large_rows, large_cols});
17
+ torch::Tensor mid_torch_tensor = torch::rand ({mid_rows, mid_cols});
18
+ torch::Tensor small_torch_tensor = torch::rand ({small_rows, small_cols});
19
+ Eigen::MatrixXd large_eigen_matrix = Eigen::MatrixXd::Random (large_rows, large_cols);
20
+ Eigen::MatrixXd mid_eigen_matrix = Eigen::MatrixXd::Random (mid_rows, mid_cols);
21
+ Eigen::MatrixXd small_eigen_matrix = Eigen::MatrixXd::Random (small_rows, small_cols);
22
+
23
+ // Get the default device (CPU or CUDA if available)
24
+ torch::Device device = torch::cuda::is_available () ? torch::kCUDA : torch::kCPU ;
25
+ // Print the default device
26
+ std::cout << " Default device: " << device << std::endl;
27
+
28
+ // Move torch::Tensors to the default device
29
+ large_torch_tensor = large_torch_tensor.to (device);
30
+ mid_torch_tensor = mid_torch_tensor.to (device);
31
+ small_torch_tensor = small_torch_tensor.to (device);
32
+
33
+ // Benchmark large torch::Tensor matrix multiplication
34
+ auto start_large_torch_matmul = std::chrono::high_resolution_clock::now ();
35
+ // pre-allocate memory for the result
36
+ torch::Tensor large_torch_result = torch::empty ({large_rows, large_rows}, device);
37
+ large_torch_result = torch::matmul (large_torch_tensor, large_torch_tensor.transpose (0 , 1 ));
38
+ auto end_large_torch_matmul = std::chrono::high_resolution_clock::now ();
39
+ std::chrono::duration<double > elapsed_large_torch_matmul = end_large_torch_matmul - start_large_torch_matmul;
40
+
41
+ // Benchmark large Eigen matrix multiplication
42
+ auto start_large_eigen_matmul = std::chrono::high_resolution_clock::now ();
43
+ Eigen::MatrixXd large_eigen_result = large_eigen_matrix * large_eigen_matrix.transpose ();
44
+ auto end_large_eigen_matmul = std::chrono::high_resolution_clock::now ();
45
+ std::chrono::duration<double > elapsed_large_eigen_matmul = end_large_eigen_matmul - start_large_eigen_matmul;
46
+
47
+ // Benchmark mid torch::Tensor matrix multiplication
48
+ auto start_mid_torch_matmul = std::chrono::high_resolution_clock::now ();
49
+ // pre-allocate memory for the result
50
+ torch::Tensor mid_torch_result = torch::empty ({mid_rows, mid_rows}, device);
51
+ mid_torch_result = torch::matmul (mid_torch_tensor, mid_torch_tensor.transpose (0 , 1 ));
52
+ auto end_mid_torch_matmul = std::chrono::high_resolution_clock::now ();
53
+ std::chrono::duration<double > elapsed_mid_torch_matmul = end_mid_torch_matmul - start_mid_torch_matmul;
54
+
55
+ // Benchmark mid Eigen matrix multiplication
56
+ auto start_mid_eigen_matmul = std::chrono::high_resolution_clock::now ();
57
+ Eigen::MatrixXd mid_eigen_result = mid_eigen_matrix * mid_eigen_matrix.transpose ();
58
+ auto end_mid_eigen_matmul = std::chrono::high_resolution_clock::now ();
59
+ std::chrono::duration<double > elapsed_mid_eigen_matmul = end_mid_eigen_matmul - start_mid_eigen_matmul;
60
+
61
+ // Benchmark small torch::Tensor matrix multiplication
62
+ auto start_small_torch_matmul = std::chrono::high_resolution_clock::now ();
63
+ // pre-allocate memory for the result
64
+ torch::Tensor small_torch_result = torch::empty ({small_rows, small_rows}, device);
65
+ small_torch_result = torch::matmul (small_torch_tensor, small_torch_tensor.transpose (0 , 1 ));
66
+ auto end_small_torch_matmul = std::chrono::high_resolution_clock::now ();
67
+ std::chrono::duration<double > elapsed_small_torch_matmul = end_small_torch_matmul - start_small_torch_matmul;
68
+
69
+ // Benchmark small Eigen matrix multiplication
70
+ auto start_small_eigen_matmul = std::chrono::high_resolution_clock::now ();
71
+ Eigen::MatrixXd small_eigen_result = small_eigen_matrix * small_eigen_matrix.transpose ();
72
+ auto end_small_eigen_matmul = std::chrono::high_resolution_clock::now ();
73
+ std::chrono::duration<double > elapsed_small_eigen_matmul = end_small_eigen_matmul - start_small_eigen_matmul;
74
+
75
+ // Benchmark small torch::Tensor inverse
76
+ auto start_small_torch_inverse = std::chrono::high_resolution_clock::now ();
77
+ // pre-allocate memory for the result
78
+ torch::Tensor small_torch_inverse = torch::empty ({small_rows, small_cols}, device);
79
+ small_torch_inverse = torch::inverse (small_torch_tensor);
80
+ auto end_small_torch_inverse = std::chrono::high_resolution_clock::now ();
81
+ std::chrono::duration<double > elapsed_small_torch_inverse = end_small_torch_inverse - start_small_torch_inverse;
82
+
83
+ // Benchmark small Eigen matrix inverse
84
+ auto start_small_eigen_inverse = std::chrono::high_resolution_clock::now ();
85
+ Eigen::MatrixXd small_eigen_inverse = small_eigen_matrix.inverse ();
86
+ auto end_small_eigen_inverse = std::chrono::high_resolution_clock::now ();
87
+ std::chrono::duration<double > elapsed_small_eigen_inverse = end_small_eigen_inverse - start_small_eigen_inverse;
88
+
89
+ // Print results
90
+ std::cout << " Large Torch::Tensor matrix multiplication time: " << elapsed_large_torch_matmul.count () << " seconds" << std::endl;
91
+ std::cout << " Large Eigen matrix multiplication time: " << elapsed_large_eigen_matmul.count () << " seconds" << std::endl;
92
+ std::cout << " Mid Torch::Tensor matrix multiplication time: " << elapsed_mid_torch_matmul.count () << " seconds" << std::endl;
93
+ std::cout << " Mid Eigen matrix multiplication time: " << elapsed_mid_eigen_matmul.count () << " seconds" << std::endl;
94
+ std::cout << " Small Torch::Tensor matrix multiplication time: " << elapsed_small_torch_matmul.count () << " seconds" << std::endl;
95
+ std::cout << " Small Eigen matrix multiplication time: " << elapsed_small_eigen_matmul.count () << " seconds" << std::endl;
96
+ std::cout << " Small Torch::Tensor inverse time: " << elapsed_small_torch_inverse.count () << " seconds" << std::endl;
97
+ std::cout << " Small Eigen matrix inverse time: " << elapsed_small_eigen_inverse.count () << " seconds" << std::endl;
98
+
99
+ return 0 ;
100
+ }
101
+
102
+ // PC specs:
103
+ // - CPU: 13th Gen Intel® Core™ i7-13620H × 16
104
+ // - GPU: NVIDIA GeForce RTX 4060
105
+ // ```
106
+ // $ ./test_torch_eigen
107
+ // Default device: cuda
108
+ // Large Torch::Tensor matrix multiplication time: 0.0187716 seconds
109
+ // Large Eigen matrix multiplication time: 0.121851 seconds
110
+ // Mid Torch::Tensor matrix multiplication time: 0.00302689 seconds
111
+ // Mid Eigen matrix multiplication time: 0.000178552 seconds
112
+ // Small Torch::Tensor matrix multiplication time: 0.00202544 seconds
113
+ // Small Eigen matrix multiplication time: 2.229e-06 seconds
114
+ // Small Torch::Tensor inverse time: 0.0824715 seconds
115
+ // Small Eigen matrix inverse time: 7.489e-06 seconds
116
+ // ```
0 commit comments