@@ -146,33 +146,35 @@ void benchmark_device(const Device_Info& device_info) {
146
146
println (" \r | Memory Bandwidth (misaligned read ) " +alignr (29u , to_string (4 .0f *(float )N*(float )M/(float )(time_mr-time_cw/(double )M)*1E-9f , 2u ))+" GB/s |" );
147
147
println (" \r | Memory Bandwidth (misaligned write) " +alignr (29u , to_string (4 .0f *(float )N*(float )M/(float ) time_mw *1E-9f , 2u ))+" GB/s |" );
148
148
149
- print (" | Benchmarking ... |" );
150
- for (uint i=0u ; i<N_memory; i++) {
151
- clock .start ();
152
- buffer.write_to_device ();
153
- time_send = fmin (clock .stop (), time_send);
154
- }
155
- const float bw_send = 4 .0f *M*N/(float )time_send*1E-9f ;
156
- println (" \r | PCIe Bandwidth (send ) " +alignr (29u , to_string (bw_send, 2u ))+" GB/s |" );
157
- print (" | Benchmarking ... |" );
158
- for (uint i=0u ; i<N_memory; i++) {
159
- clock .start ();
160
- buffer.read_from_device ();
161
- time_receive = fmin (clock .stop (), time_receive);
162
- }
163
- const float bw_receive = 4 .0f *M*N/(float )time_receive*1E-9f ;
164
- println (" \r | PCIe Bandwidth ( receive ) " +alignr (29u , to_string (bw_receive, 2u ))+" GB/s |" );
165
- print (" | Benchmarking ... |" );
166
- for (uint i=0u ; i<N_memory; i++) {
167
- clock .start ();
168
- buffer.read_from_device (N*M/2u , N*M, false );
169
- buffer.write_to_device (0u , N*M/2u , false );
170
- buffer.finish_queue ();
171
- time_bidirectional = fmin (clock .stop (), time_bidirectional);
149
+ if (!device.info .uses_ram ) {
150
+ print (" | Benchmarking ... |" );
151
+ for (uint i=0u ; i<N_memory; i++) {
152
+ clock .start ();
153
+ buffer.write_to_device ();
154
+ time_send = fmin (clock .stop (), time_send);
155
+ }
156
+ const float bw_send = 4 .0f *M*N/(float )time_send*1E-9f ;
157
+ println (" \r | PCIe Bandwidth (send ) " +alignr (29u , to_string (bw_send, 2u ))+" GB/s |" );
158
+ print (" | Benchmarking ... |" );
159
+ for (uint i=0u ; i<N_memory; i++) {
160
+ clock .start ();
161
+ buffer.read_from_device ();
162
+ time_receive = fmin (clock .stop (), time_receive);
163
+ }
164
+ const float bw_receive = 4 .0f *M*N/(float )time_receive*1E-9f ;
165
+ println (" \r | PCIe Bandwidth ( receive ) " +alignr (29u , to_string (bw_receive, 2u ))+" GB/s |" );
166
+ print (" | Benchmarking ... |" );
167
+ for (uint i=0u ; i<N_memory; i++) {
168
+ clock .start ();
169
+ buffer.read_from_device (N*M/2u , N*M, false );
170
+ buffer.write_to_device (0u , N*M/2u , false );
171
+ buffer.finish_queue ();
172
+ time_bidirectional = fmin (clock .stop (), time_bidirectional);
173
+ }
174
+ const float bw_bidirectional = 4 .0f *M*N/(float )time_bidirectional*1E-9f ;
175
+ const float bw_max = fmax (2 .0f *fmax (bw_send, bw_receive), bw_bidirectional);
176
+ println (" \r | PCIe Bandwidth ( bidirectional) (Gen" +to_string (bw_max>17 .6f ?4 :bw_max>8 .8f ?3 :bw_max>4 .4f ?2 :1 )+" x16)" +alignr (8u , to_string (bw_bidirectional, 2u ))+" GB/s |" );
172
177
}
173
- const float bw_bidirectional = 4 .0f *M*N/(float )time_bidirectional*1E-9f ;
174
- const float bw_max = fmax (2 .0f *fmax (bw_send, bw_receive), bw_bidirectional);
175
- println (" \r | PCIe Bandwidth ( bidirectional) (Gen" +to_string (bw_max>17 .6f ?4 :bw_max>8 .8f ?3 :bw_max>4 .4f ?2 :1 )+" x16)" +alignr (8u , to_string (bw_bidirectional, 2u ))+" GB/s |" );
176
178
177
179
println (" |-----------------------------------------------------------------------------|" );
178
180
}
0 commit comments