1
1
/*
2
- * Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
* All rights reserved.
4
4
*
5
5
* This source code is licensed under the BSD-style license found in the
6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
9
+ /*
10
+ * Sections of code adapted from
11
+ * Copyright (c) 2016 Jon Gjengset under MIT License (MIT)
12
+ */
13
+
9
14
//! This file contains primitive data structures for interacting with ibverbs.
10
15
//!
11
16
//! Primitives:
@@ -25,10 +30,62 @@ use std::ffi::CStr;
25
30
use std:: fmt;
26
31
27
32
use hyperactor:: Named ;
28
- use ibverbs:: Gid ;
29
33
use serde:: Deserialize ;
30
34
use serde:: Serialize ;
31
35
36
+ #[ derive(
37
+ Default ,
38
+ Copy ,
39
+ Clone ,
40
+ Debug ,
41
+ Eq ,
42
+ PartialEq ,
43
+ Hash ,
44
+ serde:: Serialize ,
45
+ serde:: Deserialize
46
+ ) ]
47
+ #[ repr( transparent) ]
48
+ pub struct Gid {
49
+ raw : [ u8 ; 16 ] ,
50
+ }
51
+
52
+ impl Gid {
53
+ #[ allow( dead_code) ]
54
+ fn subnet_prefix ( & self ) -> u64 {
55
+ u64:: from_be_bytes ( self . raw [ ..8 ] . try_into ( ) . unwrap ( ) )
56
+ }
57
+
58
+ #[ allow( dead_code) ]
59
+ fn interface_id ( & self ) -> u64 {
60
+ u64:: from_be_bytes ( self . raw [ 8 ..] . try_into ( ) . unwrap ( ) )
61
+ }
62
+ }
63
+ impl From < rdmacore_sys:: ibv_gid > for Gid {
64
+ fn from ( gid : rdmacore_sys:: ibv_gid ) -> Self {
65
+ Self {
66
+ raw : unsafe { gid. raw } ,
67
+ }
68
+ }
69
+ }
70
+
71
+ impl From < Gid > for rdmacore_sys:: ibv_gid {
72
+ fn from ( mut gid : Gid ) -> Self {
73
+ * gid. as_mut ( )
74
+ }
75
+ }
76
+
77
+ impl AsRef < rdmacore_sys:: ibv_gid > for Gid {
78
+ fn as_ref ( & self ) -> & rdmacore_sys:: ibv_gid {
79
+ unsafe { & * self . raw . as_ptr ( ) . cast :: < rdmacore_sys:: ibv_gid > ( ) }
80
+ }
81
+ }
82
+
83
+ impl AsMut < rdmacore_sys:: ibv_gid > for Gid {
84
+ fn as_mut ( & mut self ) -> & mut rdmacore_sys:: ibv_gid {
85
+ unsafe { & mut * self . raw . as_mut_ptr ( ) . cast :: < rdmacore_sys:: ibv_gid > ( ) }
86
+ }
87
+ }
88
+
32
89
/// Represents ibverbs specific configurations.
33
90
///
34
91
/// This struct holds various parameters required to establish and manage an RDMA connection.
@@ -86,7 +143,7 @@ impl Default for IbverbsConfig {
86
143
max_recv_wr : 1 ,
87
144
max_send_sge : 1 ,
88
145
max_recv_sge : 1 ,
89
- path_mtu : ffi :: IBV_MTU_1024 ,
146
+ path_mtu : rdmacore_sys :: IBV_MTU_1024 ,
90
147
retry_cnt : 7 ,
91
148
rnr_retry : 7 ,
92
149
qp_timeout : 14 , // 4.096 μs * 2^14 = ~67 ms
@@ -144,7 +201,7 @@ impl std::fmt::Display for IbverbsConfig {
144
201
#[ derive( Debug , Clone , Serialize , Deserialize ) ]
145
202
pub struct RdmaDevice {
146
203
/// `name` - The name of the RDMA device (e.g., "mlx5_0").
147
- name : String ,
204
+ pub name : String ,
148
205
/// `vendor_id` - The vendor ID of the device.
149
206
vendor_id : u32 ,
150
207
/// `vendor_part_id` - The vendor part ID of the device.
@@ -330,10 +387,10 @@ impl fmt::Display for RdmaPort {
330
387
/// # Returns
331
388
///
332
389
/// A string representation of the port state.
333
- pub fn get_port_state_str ( state : ffi :: ibv_port_state:: Type ) -> String {
390
+ pub fn get_port_state_str ( state : rdmacore_sys :: ibv_port_state:: Type ) -> String {
334
391
// SAFETY: We are calling a C function that returns a C string.
335
392
unsafe {
336
- let c_str = ffi :: ibv_port_state_str ( state) ;
393
+ let c_str = rdmacore_sys :: ibv_port_state_str ( state) ;
337
394
if c_str. is_null ( ) {
338
395
return "Unknown" . to_string ( ) ;
339
396
}
@@ -428,7 +485,7 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
428
485
// SAFETY: We are calling several C functions from libibverbs.
429
486
unsafe {
430
487
let mut num_devices = 0 ;
431
- let device_list = ffi :: ibv_get_device_list ( & mut num_devices) ;
488
+ let device_list = rdmacore_sys :: ibv_get_device_list ( & mut num_devices) ;
432
489
if device_list. is_null ( ) || num_devices == 0 {
433
490
return devices;
434
491
}
@@ -439,18 +496,18 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
439
496
continue ;
440
497
}
441
498
442
- let context = ffi :: ibv_open_device ( device) ;
499
+ let context = rdmacore_sys :: ibv_open_device ( device) ;
443
500
if context. is_null ( ) {
444
501
continue ;
445
502
}
446
503
447
- let device_name = CStr :: from_ptr ( ffi :: ibv_get_device_name ( device) )
504
+ let device_name = CStr :: from_ptr ( rdmacore_sys :: ibv_get_device_name ( device) )
448
505
. to_string_lossy ( )
449
506
. into_owned ( ) ;
450
507
451
- let mut device_attr = ffi :: ibv_device_attr:: default ( ) ;
452
- if ffi :: ibv_query_device ( context, & mut device_attr) != 0 {
453
- ffi :: ibv_close_device ( context) ;
508
+ let mut device_attr = rdmacore_sys :: ibv_device_attr:: default ( ) ;
509
+ if rdmacore_sys :: ibv_query_device ( context, & mut device_attr) != 0 {
510
+ rdmacore_sys :: ibv_close_device ( context) ;
454
511
continue ;
455
512
}
456
513
@@ -475,11 +532,11 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
475
532
} ;
476
533
477
534
for port_num in 1 ..=device_attr. phys_port_cnt {
478
- let mut port_attr = ffi :: ibv_port_attr:: default ( ) ;
479
- if ffi :: ibv_query_port (
535
+ let mut port_attr = rdmacore_sys :: ibv_port_attr:: default ( ) ;
536
+ if rdmacore_sys :: ibv_query_port (
480
537
context,
481
538
port_num,
482
- & mut port_attr as * mut ffi :: ibv_port_attr as * mut _ ,
539
+ & mut port_attr as * mut rdmacore_sys :: ibv_port_attr as * mut _ ,
483
540
) != 0
484
541
{
485
542
continue ;
@@ -489,8 +546,8 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
489
546
490
547
let link_layer = get_link_layer_str ( port_attr. link_layer ) ;
491
548
492
- let mut gid = ffi :: ibv_gid:: default ( ) ;
493
- let gid_str = if ffi :: ibv_query_gid ( context, port_num, 0 , & mut gid) == 0 {
549
+ let mut gid = rdmacore_sys :: ibv_gid:: default ( ) ;
550
+ let gid_str = if rdmacore_sys :: ibv_query_gid ( context, port_num, 0 , & mut gid) == 0 {
494
551
format_gid ( & gid. raw )
495
552
} else {
496
553
"N/A" . to_string ( )
@@ -513,10 +570,10 @@ pub fn get_all_devices() -> Vec<RdmaDevice> {
513
570
}
514
571
515
572
devices. push ( rdma_device) ;
516
- ffi :: ibv_close_device ( context) ;
573
+ rdmacore_sys :: ibv_close_device ( context) ;
517
574
}
518
575
519
- ffi :: ibv_free_device_list ( device_list) ;
576
+ rdmacore_sys :: ibv_free_device_list ( device_list) ;
520
577
}
521
578
522
579
devices
@@ -535,9 +592,9 @@ pub fn ibverbs_supported() -> bool {
535
592
// SAFETY: We are calling a C function from libibverbs.
536
593
unsafe {
537
594
let mut num_devices = 0 ;
538
- let device_list = ffi :: ibv_get_device_list ( & mut num_devices) ;
595
+ let device_list = rdmacore_sys :: ibv_get_device_list ( & mut num_devices) ;
539
596
if !device_list. is_null ( ) {
540
- ffi :: ibv_free_device_list ( device_list) ;
597
+ rdmacore_sys :: ibv_free_device_list ( device_list) ;
541
598
return true ;
542
599
}
543
600
false
@@ -557,6 +614,7 @@ pub fn ibverbs_supported() -> bool {
557
614
/// RDMA operations are in progress.
558
615
#[ derive( Debug , PartialEq , Eq , std:: hash:: Hash , Serialize , Deserialize , Clone ) ]
559
616
pub struct RdmaMemoryRegionView {
617
+ pub id : u32 ,
560
618
pub addr : usize ,
561
619
pub size : usize ,
562
620
pub lkey : u32 ,
@@ -582,8 +640,9 @@ unsafe impl Sync for RdmaMemoryRegionView {}
582
640
583
641
impl RdmaMemoryRegionView {
584
642
/// Creates a new `RdmaMemoryRegionView` with the given address and size.
585
- pub fn new ( addr : usize , size : usize , lkey : u32 , rkey : u32 ) -> Self {
643
+ pub fn new ( id : u32 , addr : usize , size : usize , lkey : u32 , rkey : u32 ) -> Self {
586
644
Self {
645
+ id,
587
646
addr,
588
647
size,
589
648
lkey,
@@ -612,20 +671,20 @@ pub enum RdmaOperation {
612
671
Read ,
613
672
}
614
673
615
- impl From < RdmaOperation > for ffi :: ibv_wr_opcode:: Type {
674
+ impl From < RdmaOperation > for rdmacore_sys :: ibv_wr_opcode:: Type {
616
675
fn from ( op : RdmaOperation ) -> Self {
617
676
match op {
618
- RdmaOperation :: Write => ffi :: ibv_wr_opcode:: IBV_WR_RDMA_WRITE ,
619
- RdmaOperation :: Read => ffi :: ibv_wr_opcode:: IBV_WR_RDMA_READ ,
677
+ RdmaOperation :: Write => rdmacore_sys :: ibv_wr_opcode:: IBV_WR_RDMA_WRITE ,
678
+ RdmaOperation :: Read => rdmacore_sys :: ibv_wr_opcode:: IBV_WR_RDMA_READ ,
620
679
}
621
680
}
622
681
}
623
682
624
- impl From < ffi :: ibv_wc_opcode:: Type > for RdmaOperation {
625
- fn from ( op : ffi :: ibv_wc_opcode:: Type ) -> Self {
683
+ impl From < rdmacore_sys :: ibv_wc_opcode:: Type > for RdmaOperation {
684
+ fn from ( op : rdmacore_sys :: ibv_wc_opcode:: Type ) -> Self {
626
685
match op {
627
- ffi :: ibv_wc_opcode:: IBV_WC_RDMA_WRITE => RdmaOperation :: Write ,
628
- ffi :: ibv_wc_opcode:: IBV_WC_RDMA_READ => RdmaOperation :: Read ,
686
+ rdmacore_sys :: ibv_wc_opcode:: IBV_WC_RDMA_WRITE => RdmaOperation :: Write ,
687
+ rdmacore_sys :: ibv_wc_opcode:: IBV_WC_RDMA_READ => RdmaOperation :: Read ,
629
688
_ => panic ! ( "Unsupported operation type" ) ,
630
689
}
631
690
}
@@ -660,7 +719,7 @@ impl std::fmt::Debug for RdmaQpInfo {
660
719
661
720
/// Wrapper around ibv_wc (ibverbs work completion).
662
721
///
663
- /// This exposes only the public fields of ffi ::ibv_wc, allowing us to more easily
722
+ /// This exposes only the public fields of rdmacore_sys ::ibv_wc, allowing us to more easily
664
723
/// interact with it from Rust. Work completions are used to track the status of
665
724
/// RDMA operations and are generated when an operation completes.
666
725
#[ derive( Debug , Named , Clone , serde:: Serialize , serde:: Deserialize ) ]
@@ -672,9 +731,9 @@ pub struct IbvWc {
672
731
/// `valid` - Whether the work completion is valid
673
732
valid : bool ,
674
733
/// `error` - Error information if the operation failed
675
- error : Option < ( ffi :: ibv_wc_status:: Type , u32 ) > ,
734
+ error : Option < ( rdmacore_sys :: ibv_wc_status:: Type , u32 ) > ,
676
735
/// `opcode` - Type of operation that completed (read, write, etc.)
677
- opcode : ffi :: ibv_wc_opcode:: Type ,
736
+ opcode : rdmacore_sys :: ibv_wc_opcode:: Type ,
678
737
/// `bytes` - Immediate data (if any)
679
738
bytes : Option < u32 > ,
680
739
/// `qp_num` - Queue Pair Number
@@ -691,8 +750,8 @@ pub struct IbvWc {
691
750
dlid_path_bits : u8 ,
692
751
}
693
752
694
- impl From < ffi :: ibv_wc > for IbvWc {
695
- fn from ( wc : ffi :: ibv_wc ) -> Self {
753
+ impl From < rdmacore_sys :: ibv_wc > for IbvWc {
754
+ fn from ( wc : rdmacore_sys :: ibv_wc ) -> Self {
696
755
IbvWc {
697
756
wr_id : wc. wr_id ( ) ,
698
757
len : wc. len ( ) ,
@@ -804,21 +863,21 @@ mod tests {
804
863
#[ test]
805
864
fn test_rdma_operation_conversion ( ) {
806
865
assert_eq ! (
807
- ffi :: ibv_wr_opcode:: IBV_WR_RDMA_WRITE ,
808
- ffi :: ibv_wr_opcode:: Type :: from( RdmaOperation :: Write )
866
+ rdmacore_sys :: ibv_wr_opcode:: IBV_WR_RDMA_WRITE ,
867
+ rdmacore_sys :: ibv_wr_opcode:: Type :: from( RdmaOperation :: Write )
809
868
) ;
810
869
assert_eq ! (
811
- ffi :: ibv_wr_opcode:: IBV_WR_RDMA_READ ,
812
- ffi :: ibv_wr_opcode:: Type :: from( RdmaOperation :: Read )
870
+ rdmacore_sys :: ibv_wr_opcode:: IBV_WR_RDMA_READ ,
871
+ rdmacore_sys :: ibv_wr_opcode:: Type :: from( RdmaOperation :: Read )
813
872
) ;
814
873
815
874
assert_eq ! (
816
875
RdmaOperation :: Write ,
817
- RdmaOperation :: from( ffi :: ibv_wc_opcode:: IBV_WC_RDMA_WRITE )
876
+ RdmaOperation :: from( rdmacore_sys :: ibv_wc_opcode:: IBV_WC_RDMA_WRITE )
818
877
) ;
819
878
assert_eq ! (
820
879
RdmaOperation :: Read ,
821
- RdmaOperation :: from( ffi :: ibv_wc_opcode:: IBV_WC_RDMA_READ )
880
+ RdmaOperation :: from( rdmacore_sys :: ibv_wc_opcode:: IBV_WC_RDMA_READ )
822
881
) ;
823
882
}
824
883
@@ -839,18 +898,18 @@ mod tests {
839
898
840
899
#[ test]
841
900
fn test_ibv_wc ( ) {
842
- let mut wc = ffi :: ibv_wc:: default ( ) ;
901
+ let mut wc = rdmacore_sys :: ibv_wc:: default ( ) ;
843
902
844
903
// SAFETY: modifies private fields through pointer manipulation
845
904
unsafe {
846
905
// Cast to pointer and modify the fields directly
847
- let wc_ptr = & mut wc as * mut ffi :: ibv_wc as * mut u8 ;
906
+ let wc_ptr = & mut wc as * mut rdmacore_sys :: ibv_wc as * mut u8 ;
848
907
849
908
// Set wr_id (at offset 0, u64)
850
909
* ( wc_ptr as * mut u64 ) = 42 ;
851
910
852
911
// Set status to SUCCESS (at offset 8, u32)
853
- * ( wc_ptr. add ( 8 ) as * mut i32 ) = ffi :: ibv_wc_status:: IBV_WC_SUCCESS as i32 ;
912
+ * ( wc_ptr. add ( 8 ) as * mut i32 ) = rdmacore_sys :: ibv_wc_status:: IBV_WC_SUCCESS as i32 ;
854
913
}
855
914
let ibv_wc = IbvWc :: from ( wc) ;
856
915
assert_eq ! ( ibv_wc. wr_id( ) , 42 ) ;
0 commit comments