@@ -47,95 +47,109 @@ fn adc(carry: u8, lhs: BigDigit, rhs: BigDigit, out: &mut BigDigit) -> u8 {
47
47
u8:: from ( b || d)
48
48
}
49
49
50
- /// Performs a part of the addition. Returns a tuple containing the carry state
51
- /// and the number of integers that were added
52
- ///
53
- /// By using as many registers as possible, we treat digits 5 by 5
54
50
#[ cfg( target_arch = "x86_64" ) ]
55
- unsafe fn schoolbook_add_assign_x86_64 (
56
- lhs : * mut u64 ,
57
- rhs : * const u64 ,
58
- mut size : usize ,
59
- ) -> ( bool , usize ) {
60
- size /= 5 ;
61
- if size == 0 {
62
- return ( false , 0 ) ;
63
- }
51
+ cfg_64 ! (
52
+ /// Performs a part of the addition. Returns a tuple containing the carry state
53
+ /// and the number of integers that were added
54
+ ///
55
+ /// By using as many registers as possible, we treat digits 5 by 5
56
+ unsafe fn schoolbook_add_assign_x86_64(
57
+ lhs: * mut u64 ,
58
+ rhs: * const u64 ,
59
+ mut size: usize ,
60
+ ) -> ( bool , usize ) {
61
+ size /= 5 ;
62
+ if size == 0 {
63
+ return ( false , 0 ) ;
64
+ }
64
65
65
- let mut c: u8 ;
66
- let mut idx = 0 ;
67
-
68
- asm ! (
69
- // Clear the carry flag
70
- "clc" ,
71
-
72
- "3:" ,
73
-
74
- // Copy a in registers
75
- "mov {a_tmp1}, qword ptr [{a} + 8*{idx}]" ,
76
- "mov {a_tmp2}, qword ptr [{a} + 8*{idx} + 8]" ,
77
- "mov {a_tmp3}, qword ptr [{a} + 8*{idx} + 16]" ,
78
- "mov {a_tmp4}, qword ptr [{a} + 8*{idx} + 24]" ,
79
- "mov {a_tmp5}, qword ptr [{a} + 8*{idx} + 32]" ,
80
-
81
- // Copy b in registers
82
- "mov {b_tmp1}, qword ptr [{b} + 8*{idx}]" ,
83
- "mov {b_tmp2}, qword ptr [{b} + 8*{idx} + 8]" ,
84
- "mov {b_tmp3}, qword ptr [{b} + 8*{idx} + 16]" ,
85
- "mov {b_tmp4}, qword ptr [{b} + 8*{idx} + 24]" ,
86
- "mov {b_tmp5}, qword ptr [{b} + 8*{idx} + 32]" ,
87
-
88
- // Perform the addition
89
- "adc {a_tmp1}, {b_tmp1}" ,
90
- "adc {a_tmp2}, {b_tmp2}" ,
91
- "adc {a_tmp3}, {b_tmp3}" ,
92
- "adc {a_tmp4}, {b_tmp4}" ,
93
- "adc {a_tmp5}, {b_tmp5}" ,
94
-
95
- // Copy the return values
96
- "mov qword ptr [{a} + 8*{idx}], {a_tmp1}" ,
97
- "mov qword ptr [{a} + 8*{idx} + 8], {a_tmp2}" ,
98
- "mov qword ptr [{a} + 8*{idx} + 16], {a_tmp3}" ,
99
- "mov qword ptr [{a} + 8*{idx} + 24], {a_tmp4}" ,
100
- "mov qword ptr [{a} + 8*{idx} + 32], {a_tmp5}" ,
101
-
102
- // Increment loop counter
103
- // `inc` and `dec` aren't modifying carry flag
104
- "inc {idx}" ,
105
- "inc {idx}" ,
106
- "inc {idx}" ,
107
- "inc {idx}" ,
108
- "inc {idx}" ,
109
- "dec {size}" ,
110
- "jnz 3b" ,
111
-
112
- // Output carry flag and clear
113
- "setc {c}" ,
114
- "clc" ,
115
-
116
- size = in( reg) size,
117
- a = in( reg) lhs,
118
- b = in( reg) rhs,
119
- c = lateout( reg_byte) c,
120
- idx = inout( reg) idx,
121
-
122
- a_tmp1 = out( reg) _,
123
- a_tmp2 = out( reg) _,
124
- a_tmp3 = out( reg) _,
125
- a_tmp4 = out( reg) _,
126
- a_tmp5 = out( reg) _,
127
-
128
- b_tmp1 = out( reg) _,
129
- b_tmp2 = out( reg) _,
130
- b_tmp3 = out( reg) _,
131
- b_tmp4 = out( reg) _,
132
- b_tmp5 = out( reg) _,
133
-
134
- options( nostack) ,
135
- ) ;
66
+ let mut c: u8 ;
67
+ let mut idx = 0 ;
68
+
69
+ asm!(
70
+ // Clear the carry flag
71
+ "clc" ,
72
+
73
+ "3:" ,
74
+
75
+ // Copy a in registers
76
+ "mov {a_tmp1}, qword ptr [{a} + 8*{idx}]" ,
77
+ "mov {a_tmp2}, qword ptr [{a} + 8*{idx} + 8]" ,
78
+ "mov {a_tmp3}, qword ptr [{a} + 8*{idx} + 16]" ,
79
+ "mov {a_tmp4}, qword ptr [{a} + 8*{idx} + 24]" ,
80
+ "mov {a_tmp5}, qword ptr [{a} + 8*{idx} + 32]" ,
81
+
82
+ // Copy b in registers
83
+ "mov {b_tmp1}, qword ptr [{b} + 8*{idx}]" ,
84
+ "mov {b_tmp2}, qword ptr [{b} + 8*{idx} + 8]" ,
85
+ "mov {b_tmp3}, qword ptr [{b} + 8*{idx} + 16]" ,
86
+ "mov {b_tmp4}, qword ptr [{b} + 8*{idx} + 24]" ,
87
+ "mov {b_tmp5}, qword ptr [{b} + 8*{idx} + 32]" ,
88
+
89
+ // Perform the addition
90
+ "adc {a_tmp1}, {b_tmp1}" ,
91
+ "adc {a_tmp2}, {b_tmp2}" ,
92
+ "adc {a_tmp3}, {b_tmp3}" ,
93
+ "adc {a_tmp4}, {b_tmp4}" ,
94
+ "adc {a_tmp5}, {b_tmp5}" ,
95
+
96
+ // Copy the return values
97
+ "mov qword ptr [{a} + 8*{idx}], {a_tmp1}" ,
98
+ "mov qword ptr [{a} + 8*{idx} + 8], {a_tmp2}" ,
99
+ "mov qword ptr [{a} + 8*{idx} + 16], {a_tmp3}" ,
100
+ "mov qword ptr [{a} + 8*{idx} + 24], {a_tmp4}" ,
101
+ "mov qword ptr [{a} + 8*{idx} + 32], {a_tmp5}" ,
102
+
103
+ // Increment loop counter
104
+ // `inc` and `dec` aren't modifying carry flag
105
+ "inc {idx}" ,
106
+ "inc {idx}" ,
107
+ "inc {idx}" ,
108
+ "inc {idx}" ,
109
+ "inc {idx}" ,
110
+ "dec {size}" ,
111
+ "jnz 3b" ,
112
+
113
+ // Output carry flag and clear
114
+ "setc {c}" ,
115
+ "clc" ,
116
+
117
+ size = in( reg) size,
118
+ a = in( reg) lhs,
119
+ b = in( reg) rhs,
120
+ c = lateout( reg_byte) c,
121
+ idx = inout( reg) idx,
122
+
123
+ a_tmp1 = out( reg) _,
124
+ a_tmp2 = out( reg) _,
125
+ a_tmp3 = out( reg) _,
126
+ a_tmp4 = out( reg) _,
127
+ a_tmp5 = out( reg) _,
128
+
129
+ b_tmp1 = out( reg) _,
130
+ b_tmp2 = out( reg) _,
131
+ b_tmp3 = out( reg) _,
132
+ b_tmp4 = out( reg) _,
133
+ b_tmp5 = out( reg) _,
134
+
135
+ options( nostack) ,
136
+ ) ;
137
+
138
+ ( c > 0 , idx)
139
+ }
140
+ ) ;
136
141
137
- ( c > 0 , idx)
138
- }
142
+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
143
+ cfg_32 ! (
144
+ /// TODO: The same trick as above can be applied to 32 bit targets
145
+ unsafe fn schoolbook_add_assign_x86_64(
146
+ _lhs: * mut u32 ,
147
+ _rhs: * const u32 ,
148
+ _size: usize ,
149
+ ) -> ( bool , usize ) {
150
+ ( false , 0 )
151
+ }
152
+ ) ;
139
153
140
154
/// Two argument addition of raw slices, `a += b`, returning the carry.
141
155
///
@@ -149,10 +163,10 @@ pub(super) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
149
163
150
164
let ( a_lo, a_hi) = a. split_at_mut ( b. len ( ) ) ;
151
165
152
- // On x86_64 machine, perform most of the addition via inline assembly
153
- #[ cfg( target_arch = "x86_64" ) ]
166
+ // On x86 machine, perform most of the addition via inline assembly
167
+ #[ cfg( any ( target_arch = "x86" , target_arch = " x86_64") ) ]
154
168
let ( c, done) = unsafe { schoolbook_add_assign_x86_64 ( a_lo. as_mut_ptr ( ) , b. as_ptr ( ) , b. len ( ) ) } ;
155
- #[ cfg( not( target_arch = "x86_64" ) ) ]
169
+ #[ cfg( not( any ( target_arch = "x86" , target_arch = " x86_64") ) ) ]
156
170
let ( c, done) = ( false , 0 ) ;
157
171
158
172
let mut carry = c as u8 ;
0 commit comments