@@ -124,6 +124,20 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
124
124
#endif
125
125
};
126
126
127
+ /*
128
+ * Nomenclature for variable names to simplify and clarify this code and ease
129
+ * any potential staring at it:
130
+ *
131
+ * @instr: source address of the original instructions in the kernel text as
132
+ * generated by the compiler.
133
+ *
134
+ * @buf: temporary buffer on which the patching operates. This buffer is
135
+ * eventually text-poked into the kernel image.
136
+ *
137
+ * @replacement/@repl: pointer to the opcodes which are replacing @instr, located
138
+ * in the .altinstr_replacement section.
139
+ */
140
+
127
141
/*
128
142
* Fill the buffer with a single effective instruction of size @len.
129
143
*
@@ -133,28 +147,28 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
133
147
* each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
134
148
* *jump* over instead of executing long and daft NOPs.
135
149
*/
136
- static void add_nop (u8 * instr , unsigned int len )
150
+ static void add_nop (u8 * buf , unsigned int len )
137
151
{
138
- u8 * target = instr + len ;
152
+ u8 * target = buf + len ;
139
153
140
154
if (!len )
141
155
return ;
142
156
143
157
if (len <= ASM_NOP_MAX ) {
144
- memcpy (instr , x86_nops [len ], len );
158
+ memcpy (buf , x86_nops [len ], len );
145
159
return ;
146
160
}
147
161
148
162
if (len < 128 ) {
149
- __text_gen_insn (instr , JMP8_INSN_OPCODE , instr , target , JMP8_INSN_SIZE );
150
- instr += JMP8_INSN_SIZE ;
163
+ __text_gen_insn (buf , JMP8_INSN_OPCODE , buf , target , JMP8_INSN_SIZE );
164
+ buf += JMP8_INSN_SIZE ;
151
165
} else {
152
- __text_gen_insn (instr , JMP32_INSN_OPCODE , instr , target , JMP32_INSN_SIZE );
153
- instr += JMP32_INSN_SIZE ;
166
+ __text_gen_insn (buf , JMP32_INSN_OPCODE , buf , target , JMP32_INSN_SIZE );
167
+ buf += JMP32_INSN_SIZE ;
154
168
}
155
169
156
- for (;instr < target ; instr ++ )
157
- * instr = INT3_INSN_OPCODE ;
170
+ for (;buf < target ; buf ++ )
171
+ * buf = INT3_INSN_OPCODE ;
158
172
}
159
173
160
174
extern s32 __retpoline_sites [], __retpoline_sites_end [];
@@ -187,12 +201,12 @@ static bool insn_is_nop(struct insn *insn)
187
201
* Find the offset of the first non-NOP instruction starting at @offset
188
202
* but no further than @len.
189
203
*/
190
- static int skip_nops (u8 * instr , int offset , int len )
204
+ static int skip_nops (u8 * buf , int offset , int len )
191
205
{
192
206
struct insn insn ;
193
207
194
208
for (; offset < len ; offset += insn .length ) {
195
- if (insn_decode_kernel (& insn , & instr [offset ]))
209
+ if (insn_decode_kernel (& insn , & buf [offset ]))
196
210
break ;
197
211
198
212
if (!insn_is_nop (& insn ))
@@ -202,67 +216,33 @@ static int skip_nops(u8 *instr, int offset, int len)
202
216
return offset ;
203
217
}
204
218
205
- /*
206
- * Optimize a sequence of NOPs, possibly preceded by an unconditional jump
207
- * to the end of the NOP sequence into a single NOP.
208
- */
209
- static bool
210
- __optimize_nops (u8 * instr , size_t len , struct insn * insn , int * next , int * prev , int * target )
211
- {
212
- int i = * next - insn -> length ;
213
-
214
- switch (insn -> opcode .bytes [0 ]) {
215
- case JMP8_INSN_OPCODE :
216
- case JMP32_INSN_OPCODE :
217
- * prev = i ;
218
- * target = * next + insn -> immediate .value ;
219
- return false;
220
- }
221
-
222
- if (insn_is_nop (insn )) {
223
- int nop = i ;
224
-
225
- * next = skip_nops (instr , * next , len );
226
- if (* target && * next == * target )
227
- nop = * prev ;
228
-
229
- add_nop (instr + nop , * next - nop );
230
- DUMP_BYTES (ALT , instr , len , "%px: [%d:%d) optimized NOPs: " , instr , nop , * next );
231
- return true;
232
- }
233
-
234
- * target = 0 ;
235
- return false;
236
- }
237
-
238
219
/*
239
220
* "noinline" to cause control flow change and thus invalidate I$ and
240
221
* cause refetch after modification.
241
222
*/
242
- static void __init_or_module noinline optimize_nops (u8 * instr , size_t len )
223
+ static void noinline optimize_nops (const u8 * const instr , u8 * buf , size_t len )
243
224
{
244
- int prev , target = 0 ;
245
-
246
225
for (int next , i = 0 ; i < len ; i = next ) {
247
226
struct insn insn ;
248
227
249
- if (insn_decode_kernel (& insn , & instr [i ]))
228
+ if (insn_decode_kernel (& insn , & buf [i ]))
250
229
return ;
251
230
252
231
next = i + insn .length ;
253
232
254
- __optimize_nops (instr , len , & insn , & next , & prev , & target );
255
- }
256
- }
233
+ if (insn_is_nop (& insn )) {
234
+ int nop = i ;
257
235
258
- static void __init_or_module noinline optimize_nops_inplace ( u8 * instr , size_t len )
259
- {
260
- unsigned long flags ;
236
+ /* Has the NOP already been optimized? */
237
+ if ( i + insn . length == len )
238
+ return ;
261
239
262
- local_irq_save (flags );
263
- optimize_nops (instr , len );
264
- sync_core ();
265
- local_irq_restore (flags );
240
+ next = skip_nops (buf , next , len );
241
+
242
+ add_nop (buf + nop , next - nop );
243
+ DUMP_BYTES (ALT , buf , len , "%px: [%d:%d) optimized NOPs: " , instr , nop , next );
244
+ }
245
+ }
266
246
}
267
247
268
248
/*
@@ -335,21 +315,16 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
335
315
return (target < src || target > src + src_len );
336
316
}
337
317
338
- void apply_relocation (u8 * buf , size_t len , u8 * dest , u8 * src , size_t src_len )
318
+ static void __apply_relocation (u8 * buf , const u8 * const instr , size_t instrlen , u8 * repl , size_t repl_len )
339
319
{
340
- int prev , target = 0 ;
341
-
342
- for (int next , i = 0 ; i < len ; i = next ) {
320
+ for (int next , i = 0 ; i < instrlen ; i = next ) {
343
321
struct insn insn ;
344
322
345
323
if (WARN_ON_ONCE (insn_decode_kernel (& insn , & buf [i ])))
346
324
return ;
347
325
348
326
next = i + insn .length ;
349
327
350
- if (__optimize_nops (buf , len , & insn , & next , & prev , & target ))
351
- continue ;
352
-
353
328
switch (insn .opcode .bytes [0 ]) {
354
329
case 0x0f :
355
330
if (insn .opcode .bytes [1 ] < 0x80 ||
@@ -361,18 +336,18 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
361
336
case JMP8_INSN_OPCODE :
362
337
case JMP32_INSN_OPCODE :
363
338
case CALL_INSN_OPCODE :
364
- if (need_reloc (next + insn .immediate .value , src , src_len )) {
339
+ if (need_reloc (next + insn .immediate .value , repl , repl_len )) {
365
340
apply_reloc (insn .immediate .nbytes ,
366
341
buf + i + insn_offset_immediate (& insn ),
367
- src - dest );
342
+ repl - instr );
368
343
}
369
344
370
345
/*
371
346
* Where possible, convert JMP.d32 into JMP.d8.
372
347
*/
373
348
if (insn .opcode .bytes [0 ] == JMP32_INSN_OPCODE ) {
374
349
s32 imm = insn .immediate .value ;
375
- imm += src - dest ;
350
+ imm += repl - instr ;
376
351
imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE ;
377
352
if ((imm >> 31 ) == (imm >> 7 )) {
378
353
buf [i + 0 ] = JMP8_INSN_OPCODE ;
@@ -385,15 +360,21 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
385
360
}
386
361
387
362
if (insn_rip_relative (& insn )) {
388
- if (need_reloc (next + insn .displacement .value , src , src_len )) {
363
+ if (need_reloc (next + insn .displacement .value , repl , repl_len )) {
389
364
apply_reloc (insn .displacement .nbytes ,
390
365
buf + i + insn_offset_displacement (& insn ),
391
- src - dest );
366
+ repl - instr );
392
367
}
393
368
}
394
369
}
395
370
}
396
371
372
+ void apply_relocation (u8 * buf , const u8 * const instr , size_t instrlen , u8 * repl , size_t repl_len )
373
+ {
374
+ __apply_relocation (buf , instr , instrlen , repl , repl_len );
375
+ optimize_nops (instr , buf , repl_len );
376
+ }
377
+
397
378
/* Low-level backend functions usable from alternative code replacements. */
398
379
DEFINE_ASM_FUNC (nop_func , "" , .entry .text );
399
380
EXPORT_SYMBOL_GPL (nop_func );
@@ -464,9 +445,9 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
464
445
void __init_or_module noinline apply_alternatives (struct alt_instr * start ,
465
446
struct alt_instr * end )
466
447
{
467
- struct alt_instr * a ;
468
- u8 * instr , * replacement ;
469
448
u8 insn_buff [MAX_PATCH_LEN ];
449
+ u8 * instr , * replacement ;
450
+ struct alt_instr * a ;
470
451
471
452
DPRINTK (ALT , "alt table %px, -> %px" , start , end );
472
453
@@ -504,7 +485,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
504
485
* patch if feature is *NOT* present.
505
486
*/
506
487
if (!boot_cpu_has (a -> cpuid ) == !(a -> flags & ALT_FLAG_NOT )) {
507
- optimize_nops_inplace (instr , a -> instrlen );
488
+ memcpy (insn_buff , instr , a -> instrlen );
489
+ optimize_nops (instr , insn_buff , a -> instrlen );
490
+ text_poke_early (instr , insn_buff , a -> instrlen );
508
491
continue ;
509
492
}
510
493
@@ -526,7 +509,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
526
509
for (; insn_buff_sz < a -> instrlen ; insn_buff_sz ++ )
527
510
insn_buff [insn_buff_sz ] = 0x90 ;
528
511
529
- apply_relocation (insn_buff , a -> instrlen , instr , replacement , a -> replacementlen );
512
+ apply_relocation (insn_buff , instr , a -> instrlen , replacement , a -> replacementlen );
530
513
531
514
DUMP_BYTES (ALT , instr , a -> instrlen , "%px: old_insn: " , instr );
532
515
DUMP_BYTES (ALT , replacement , a -> replacementlen , "%px: rpl_insn: " , replacement );
@@ -761,7 +744,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
761
744
762
745
len = patch_retpoline (addr , & insn , bytes );
763
746
if (len == insn .length ) {
764
- optimize_nops (bytes , len );
747
+ optimize_nops (addr , bytes , len );
765
748
DUMP_BYTES (RETPOLINE , ((u8 * )addr ), len , "%px: orig: " , addr );
766
749
DUMP_BYTES (RETPOLINE , ((u8 * )bytes ), len , "%px: repl: " , addr );
767
750
text_poke_early (addr , bytes , len );
0 commit comments