@@ -223,6 +223,37 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
223
223
}
224
224
EXPORT_SYMBOL_NS_GPL (cxl_dpa_debug , "CXL" );
225
225
226
+ /* See request_skip() kernel-doc */
227
+ static resource_size_t __adjust_skip (struct cxl_dev_state * cxlds ,
228
+ const resource_size_t skip_base ,
229
+ const resource_size_t skip_len ,
230
+ const char * requester )
231
+ {
232
+ const resource_size_t skip_end = skip_base + skip_len - 1 ;
233
+
234
+ for (int i = 0 ; i < cxlds -> nr_partitions ; i ++ ) {
235
+ const struct resource * part_res = & cxlds -> part [i ].res ;
236
+ resource_size_t adjust_start , adjust_end , size ;
237
+
238
+ adjust_start = max (skip_base , part_res -> start );
239
+ adjust_end = min (skip_end , part_res -> end );
240
+
241
+ if (adjust_end < adjust_start )
242
+ continue ;
243
+
244
+ size = adjust_end - adjust_start + 1 ;
245
+
246
+ if (!requester )
247
+ __release_region (& cxlds -> dpa_res , adjust_start , size );
248
+ else if (!__request_region (& cxlds -> dpa_res , adjust_start , size ,
249
+ requester , 0 ))
250
+ return adjust_start - skip_base ;
251
+ }
252
+
253
+ return skip_len ;
254
+ }
255
+ #define release_skip (c , b , l ) __adjust_skip((c), (b), (l), NULL)
256
+
226
257
/*
227
258
* Must be called in a context that synchronizes against this decoder's
228
259
* port ->remove() callback (like an endpoint decoder sysfs attribute)
@@ -241,7 +272,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
241
272
skip_start = res -> start - cxled -> skip ;
242
273
__release_region (& cxlds -> dpa_res , res -> start , resource_size (res ));
243
274
if (cxled -> skip )
244
- __release_region ( & cxlds -> dpa_res , skip_start , cxled -> skip );
275
+ release_skip ( cxlds , skip_start , cxled -> skip );
245
276
cxled -> skip = 0 ;
246
277
cxled -> dpa_res = NULL ;
247
278
put_device (& cxled -> cxld .dev );
@@ -268,6 +299,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
268
299
__cxl_dpa_release (cxled );
269
300
}
270
301
302
+ /**
303
+ * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree
304
+ * @cxlds: CXL.mem device context that parents @cxled
305
+ * @cxled: Endpoint decoder establishing new allocation that skips lower DPA
306
+ * @skip_base: DPA < start of new DPA allocation (DPAnew)
307
+ * @skip_len: @skip_base + @skip_len == DPAnew
308
+ *
309
+ * DPA 'skip' arises from out-of-sequence DPA allocation events relative
310
+ * to free capacity across multiple partitions. It is a wasteful event
311
+ * as usable DPA gets thrown away, but if a deployment has, for example,
312
+ * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM
313
+ * DPA, the free RAM DPA must be sacrificed to start allocating PMEM.
314
+ * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder
315
+ * Protection" for more details.
316
+ *
317
+ * A 'skip' always covers the last allocated DPA in a previous partition
318
+ * to the start of the current partition to allocate. Allocations never
319
+ * start in the middle of a partition, and allocations are always
320
+ * de-allocated in reverse order (see cxl_dpa_free(), or natural devm
321
+ * unwind order from forced in-order allocation).
322
+ *
323
+ * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip'
324
+ * would always be contained to a single partition. Given
325
+ * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip'
326
+ * might span "tail capacity of partition[0], all of partition[1], ...,
327
+ * all of partition[N-1]" to support allocating from partition[N]. That
328
+ * in turn interacts with the partition 'struct resource' boundaries
329
+ * within @cxlds->dpa_res whereby 'skip' requests need to be divided by
330
+ * partition. I.e. this is a quirk of using a 'struct resource' tree to
331
+ * detect range conflicts while also tracking partition boundaries in
332
+ * @cxlds->dpa_res.
333
+ */
334
+ static int request_skip (struct cxl_dev_state * cxlds ,
335
+ struct cxl_endpoint_decoder * cxled ,
336
+ const resource_size_t skip_base ,
337
+ const resource_size_t skip_len )
338
+ {
339
+ resource_size_t skipped = __adjust_skip (cxlds , skip_base , skip_len ,
340
+ dev_name (& cxled -> cxld .dev ));
341
+
342
+ if (skipped == skip_len )
343
+ return 0 ;
344
+
345
+ dev_dbg (cxlds -> dev ,
346
+ "%s: failed to reserve skipped space (%pa %pa %pa)\n" ,
347
+ dev_name (& cxled -> cxld .dev ), & skip_base , & skip_len , & skipped );
348
+
349
+ release_skip (cxlds , skip_base , skipped );
350
+
351
+ return - EBUSY ;
352
+ }
353
+
271
354
static int __cxl_dpa_reserve (struct cxl_endpoint_decoder * cxled ,
272
355
resource_size_t base , resource_size_t len ,
273
356
resource_size_t skipped )
@@ -276,7 +359,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
276
359
struct cxl_port * port = cxled_to_port (cxled );
277
360
struct cxl_dev_state * cxlds = cxlmd -> cxlds ;
278
361
struct device * dev = & port -> dev ;
362
+ enum cxl_decoder_mode mode ;
279
363
struct resource * res ;
364
+ int rc ;
280
365
281
366
lockdep_assert_held_write (& cxl_dpa_rwsem );
282
367
@@ -305,37 +390,33 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
305
390
}
306
391
307
392
if (skipped ) {
308
- res = __request_region (& cxlds -> dpa_res , base - skipped , skipped ,
309
- dev_name (& cxled -> cxld .dev ), 0 );
310
- if (!res ) {
311
- dev_dbg (dev ,
312
- "decoder%d.%d: failed to reserve skipped space\n" ,
313
- port -> id , cxled -> cxld .id );
314
- return - EBUSY ;
315
- }
393
+ rc = request_skip (cxlds , cxled , base - skipped , skipped );
394
+ if (rc )
395
+ return rc ;
316
396
}
317
397
res = __request_region (& cxlds -> dpa_res , base , len ,
318
398
dev_name (& cxled -> cxld .dev ), 0 );
319
399
if (!res ) {
320
400
dev_dbg (dev , "decoder%d.%d: failed to reserve allocation\n" ,
321
401
port -> id , cxled -> cxld .id );
322
402
if (skipped )
323
- __release_region (& cxlds -> dpa_res , base - skipped ,
324
- skipped );
403
+ release_skip (cxlds , base - skipped , skipped );
325
404
return - EBUSY ;
326
405
}
327
406
cxled -> dpa_res = res ;
328
407
cxled -> skip = skipped ;
329
408
330
- if (to_pmem_res (cxlds ) && resource_contains (to_pmem_res (cxlds ), res ))
331
- cxled -> mode = CXL_DECODER_PMEM ;
332
- else if (to_ram_res (cxlds ) && resource_contains (to_ram_res (cxlds ), res ))
333
- cxled -> mode = CXL_DECODER_RAM ;
334
- else {
409
+ mode = CXL_DECODER_NONE ;
410
+ for (int i = 0 ; cxlds -> nr_partitions ; i ++ )
411
+ if (resource_contains (& cxlds -> part [i ].res , res )) {
412
+ mode = cxl_part_mode (cxlds -> part [i ].mode );
413
+ break ;
414
+ }
415
+
416
+ if (mode == CXL_DECODER_NONE )
335
417
dev_warn (dev , "decoder%d.%d: %pr does not map any partition\n" ,
336
418
port -> id , cxled -> cxld .id , res );
337
- cxled -> mode = CXL_DECODER_NONE ;
338
- }
419
+ cxled -> mode = mode ;
339
420
340
421
port -> hdm_end ++ ;
341
422
get_device (& cxled -> cxld .dev );
@@ -542,15 +623,13 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
542
623
int cxl_dpa_alloc (struct cxl_endpoint_decoder * cxled , unsigned long long size )
543
624
{
544
625
struct cxl_memdev * cxlmd = cxled_to_memdev (cxled );
545
- resource_size_t free_ram_start , free_pmem_start ;
546
626
struct cxl_port * port = cxled_to_port (cxled );
547
627
struct cxl_dev_state * cxlds = cxlmd -> cxlds ;
548
628
struct device * dev = & cxled -> cxld .dev ;
549
- resource_size_t start , avail , skip ;
629
+ struct resource * res , * prev = NULL ;
630
+ resource_size_t start , avail , skip , skip_start ;
550
631
struct resource * p , * last ;
551
- const struct resource * ram_res = to_ram_res (cxlds );
552
- const struct resource * pmem_res = to_pmem_res (cxlds );
553
- int rc ;
632
+ int part , rc ;
554
633
555
634
down_write (& cxl_dpa_rwsem );
556
635
if (cxled -> cxld .region ) {
@@ -566,47 +645,53 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
566
645
goto out ;
567
646
}
568
647
569
- for (p = ram_res -> child , last = NULL ; p ; p = p -> sibling )
570
- last = p ;
571
- if (last )
572
- free_ram_start = last -> end + 1 ;
573
- else
574
- free_ram_start = ram_res -> start ;
648
+ part = -1 ;
649
+ for (int i = 0 ; i < cxlds -> nr_partitions ; i ++ ) {
650
+ if (cxled -> mode == cxl_part_mode (cxlds -> part [i ].mode )) {
651
+ part = i ;
652
+ break ;
653
+ }
654
+ }
655
+
656
+ if (part < 0 ) {
657
+ rc = - EBUSY ;
658
+ goto out ;
659
+ }
575
660
576
- for (p = pmem_res -> child , last = NULL ; p ; p = p -> sibling )
661
+ res = & cxlds -> part [part ].res ;
662
+ for (p = res -> child , last = NULL ; p ; p = p -> sibling )
577
663
last = p ;
578
664
if (last )
579
- free_pmem_start = last -> end + 1 ;
665
+ start = last -> end + 1 ;
580
666
else
581
- free_pmem_start = pmem_res -> start ;
667
+ start = res -> start ;
582
668
583
- if (cxled -> mode == CXL_DECODER_RAM ) {
584
- start = free_ram_start ;
585
- avail = ram_res -> end - start + 1 ;
586
- skip = 0 ;
587
- } else if (cxled -> mode == CXL_DECODER_PMEM ) {
588
- resource_size_t skip_start , skip_end ;
589
-
590
- start = free_pmem_start ;
591
- avail = pmem_res -> end - start + 1 ;
592
- skip_start = free_ram_start ;
593
-
594
- /*
595
- * If some pmem is already allocated, then that allocation
596
- * already handled the skip.
597
- */
598
- if (pmem_res -> child &&
599
- skip_start == pmem_res -> child -> start )
600
- skip_end = skip_start - 1 ;
601
- else
602
- skip_end = start - 1 ;
603
- skip = skip_end - skip_start + 1 ;
604
- } else {
605
- dev_dbg (dev , "mode not set\n" );
606
- rc = - EINVAL ;
607
- goto out ;
669
+ /*
670
+ * To allocate at partition N, a skip needs to be calculated for all
671
+ * unallocated space at lower partitions indices.
672
+ *
673
+ * If a partition has any allocations, the search can end because a
674
+ * previous cxl_dpa_alloc() invocation is assumed to have accounted for
675
+ * all previous partitions.
676
+ */
677
+ skip_start = CXL_RESOURCE_NONE ;
678
+ for (int i = part ; i ; i -- ) {
679
+ prev = & cxlds -> part [i - 1 ].res ;
680
+ for (p = prev -> child , last = NULL ; p ; p = p -> sibling )
681
+ last = p ;
682
+ if (last ) {
683
+ skip_start = last -> end + 1 ;
684
+ break ;
685
+ }
686
+ skip_start = prev -> start ;
608
687
}
609
688
689
+ avail = res -> end - start + 1 ;
690
+ if (skip_start == CXL_RESOURCE_NONE )
691
+ skip = 0 ;
692
+ else
693
+ skip = res -> start - skip_start ;
694
+
610
695
if (size > avail ) {
611
696
dev_dbg (dev , "%pa exceeds available %s capacity: %pa\n" , & size ,
612
697
cxl_decoder_mode_name (cxled -> mode ), & avail );
0 commit comments