43
43
*/
44
44
#define NVME_MAX_KB_SZ 8192
45
45
#define NVME_MAX_SEGS 128
46
+ #define NVME_MAX_META_SEGS 15
46
47
#define NVME_MAX_NR_ALLOCATIONS 5
47
48
48
49
static int use_threaded_interrupts ;
@@ -144,6 +145,7 @@ struct nvme_dev {
144
145
struct sg_table * hmb_sgt ;
145
146
146
147
mempool_t * iod_mempool ;
148
+ mempool_t * iod_meta_mempool ;
147
149
148
150
/* shadow doorbell buffer support: */
149
151
__le32 * dbbuf_dbs ;
@@ -239,6 +241,8 @@ struct nvme_iod {
239
241
dma_addr_t first_dma ;
240
242
dma_addr_t meta_dma ;
241
243
struct sg_table sgt ;
244
+ struct sg_table meta_sgt ;
245
+ union nvme_descriptor meta_list ;
242
246
union nvme_descriptor list [NVME_MAX_NR_ALLOCATIONS ];
243
247
};
244
248
@@ -506,6 +510,14 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
506
510
spin_unlock (& nvmeq -> sq_lock );
507
511
}
508
512
513
+ static inline bool nvme_pci_metadata_use_sgls (struct nvme_dev * dev ,
514
+ struct request * req )
515
+ {
516
+ if (!nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
517
+ return false;
518
+ return req -> nr_integrity_segments > 1 ;
519
+ }
520
+
509
521
static inline bool nvme_pci_use_sgls (struct nvme_dev * dev , struct request * req ,
510
522
int nseg )
511
523
{
@@ -518,6 +530,8 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
518
530
return false;
519
531
if (!nvmeq -> qid )
520
532
return false;
533
+ if (nvme_pci_metadata_use_sgls (dev , req ))
534
+ return true;
521
535
if (!sgl_threshold || avg_seg_size < sgl_threshold )
522
536
return false;
523
537
return true;
@@ -780,7 +794,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
780
794
struct bio_vec bv = req_bvec (req );
781
795
782
796
if (!is_pci_p2pdma_page (bv .bv_page )) {
783
- if ((bv .bv_offset & (NVME_CTRL_PAGE_SIZE - 1 )) +
797
+ if (!nvme_pci_metadata_use_sgls (dev , req ) &&
798
+ (bv .bv_offset & (NVME_CTRL_PAGE_SIZE - 1 )) +
784
799
bv .bv_len <= NVME_CTRL_PAGE_SIZE * 2 )
785
800
return nvme_setup_prp_simple (dev , req ,
786
801
& cmnd -> rw , & bv );
@@ -824,11 +839,69 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
824
839
return ret ;
825
840
}
826
841
827
- static blk_status_t nvme_map_metadata (struct nvme_dev * dev , struct request * req ,
828
- struct nvme_command * cmnd )
842
+ static blk_status_t nvme_pci_setup_meta_sgls (struct nvme_dev * dev ,
843
+ struct request * req )
844
+ {
845
+ struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
846
+ struct nvme_rw_command * cmnd = & iod -> cmd .rw ;
847
+ struct nvme_sgl_desc * sg_list ;
848
+ struct scatterlist * sgl , * sg ;
849
+ unsigned int entries ;
850
+ dma_addr_t sgl_dma ;
851
+ int rc , i ;
852
+
853
+ iod -> meta_sgt .sgl = mempool_alloc (dev -> iod_meta_mempool , GFP_ATOMIC );
854
+ if (!iod -> meta_sgt .sgl )
855
+ return BLK_STS_RESOURCE ;
856
+
857
+ sg_init_table (iod -> meta_sgt .sgl , req -> nr_integrity_segments );
858
+ iod -> meta_sgt .orig_nents = blk_rq_map_integrity_sg (req ,
859
+ iod -> meta_sgt .sgl );
860
+ if (!iod -> meta_sgt .orig_nents )
861
+ goto out_free_sg ;
862
+
863
+ rc = dma_map_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ),
864
+ DMA_ATTR_NO_WARN );
865
+ if (rc )
866
+ goto out_free_sg ;
867
+
868
+ sg_list = dma_pool_alloc (dev -> prp_small_pool , GFP_ATOMIC , & sgl_dma );
869
+ if (!sg_list )
870
+ goto out_unmap_sg ;
871
+
872
+ entries = iod -> meta_sgt .nents ;
873
+ iod -> meta_list .sg_list = sg_list ;
874
+ iod -> meta_dma = sgl_dma ;
875
+
876
+ cmnd -> flags = NVME_CMD_SGL_METASEG ;
877
+ cmnd -> metadata = cpu_to_le64 (sgl_dma );
878
+
879
+ sgl = iod -> meta_sgt .sgl ;
880
+ if (entries == 1 ) {
881
+ nvme_pci_sgl_set_data (sg_list , sgl );
882
+ return BLK_STS_OK ;
883
+ }
884
+
885
+ sgl_dma += sizeof (* sg_list );
886
+ nvme_pci_sgl_set_seg (sg_list , sgl_dma , entries );
887
+ for_each_sg (sgl , sg , entries , i )
888
+ nvme_pci_sgl_set_data (& sg_list [i + 1 ], sg );
889
+
890
+ return BLK_STS_OK ;
891
+
892
+ out_unmap_sg :
893
+ dma_unmap_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ), 0 );
894
+ out_free_sg :
895
+ mempool_free (iod -> meta_sgt .sgl , dev -> iod_meta_mempool );
896
+ return BLK_STS_RESOURCE ;
897
+ }
898
+
899
+ static blk_status_t nvme_pci_setup_meta_mptr (struct nvme_dev * dev ,
900
+ struct request * req )
829
901
{
830
902
struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
831
903
struct bio_vec bv = rq_integrity_vec (req );
904
+ struct nvme_command * cmnd = & iod -> cmd ;
832
905
833
906
iod -> meta_dma = dma_map_bvec (dev -> dev , & bv , rq_dma_dir (req ), 0 );
834
907
if (dma_mapping_error (dev -> dev , iod -> meta_dma ))
@@ -837,6 +910,13 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
837
910
return BLK_STS_OK ;
838
911
}
839
912
913
+ static blk_status_t nvme_map_metadata (struct nvme_dev * dev , struct request * req )
914
+ {
915
+ if (nvme_pci_metadata_use_sgls (dev , req ))
916
+ return nvme_pci_setup_meta_sgls (dev , req );
917
+ return nvme_pci_setup_meta_mptr (dev , req );
918
+ }
919
+
840
920
static blk_status_t nvme_prep_rq (struct nvme_dev * dev , struct request * req )
841
921
{
842
922
struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
@@ -845,6 +925,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
845
925
iod -> aborted = false;
846
926
iod -> nr_allocations = -1 ;
847
927
iod -> sgt .nents = 0 ;
928
+ iod -> meta_sgt .nents = 0 ;
848
929
849
930
ret = nvme_setup_cmd (req -> q -> queuedata , req );
850
931
if (ret )
@@ -857,7 +938,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
857
938
}
858
939
859
940
if (blk_integrity_rq (req )) {
860
- ret = nvme_map_metadata (dev , req , & iod -> cmd );
941
+ ret = nvme_map_metadata (dev , req );
861
942
if (ret )
862
943
goto out_unmap_data ;
863
944
}
@@ -955,17 +1036,31 @@ static void nvme_queue_rqs(struct rq_list *rqlist)
955
1036
* rqlist = requeue_list ;
956
1037
}
957
1038
1039
+ static __always_inline void nvme_unmap_metadata (struct nvme_dev * dev ,
1040
+ struct request * req )
1041
+ {
1042
+ struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
1043
+
1044
+ if (!iod -> meta_sgt .nents ) {
1045
+ dma_unmap_page (dev -> dev , iod -> meta_dma ,
1046
+ rq_integrity_vec (req ).bv_len ,
1047
+ rq_dma_dir (req ));
1048
+ return ;
1049
+ }
1050
+
1051
+ dma_pool_free (dev -> prp_small_pool , iod -> meta_list .sg_list ,
1052
+ iod -> meta_dma );
1053
+ dma_unmap_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ), 0 );
1054
+ mempool_free (iod -> meta_sgt .sgl , dev -> iod_meta_mempool );
1055
+ }
1056
+
958
1057
static __always_inline void nvme_pci_unmap_rq (struct request * req )
959
1058
{
960
1059
struct nvme_queue * nvmeq = req -> mq_hctx -> driver_data ;
961
1060
struct nvme_dev * dev = nvmeq -> dev ;
962
1061
963
- if (blk_integrity_rq (req )) {
964
- struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
965
-
966
- dma_unmap_page (dev -> dev , iod -> meta_dma ,
967
- rq_integrity_vec (req ).bv_len , rq_dma_dir (req ));
968
- }
1062
+ if (blk_integrity_rq (req ))
1063
+ nvme_unmap_metadata (dev , req );
969
1064
970
1065
if (blk_rq_nr_phys_segments (req ))
971
1066
nvme_unmap_data (dev , req );
@@ -2761,6 +2856,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
2761
2856
2762
2857
static int nvme_pci_alloc_iod_mempool (struct nvme_dev * dev )
2763
2858
{
2859
+ size_t meta_size = sizeof (struct scatterlist ) * (NVME_MAX_META_SEGS + 1 );
2764
2860
size_t alloc_size = sizeof (struct scatterlist ) * NVME_MAX_SEGS ;
2765
2861
2766
2862
dev -> iod_mempool = mempool_create_node (1 ,
@@ -2769,7 +2865,18 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
2769
2865
dev_to_node (dev -> dev ));
2770
2866
if (!dev -> iod_mempool )
2771
2867
return - ENOMEM ;
2868
+
2869
+ dev -> iod_meta_mempool = mempool_create_node (1 ,
2870
+ mempool_kmalloc , mempool_kfree ,
2871
+ (void * )meta_size , GFP_KERNEL ,
2872
+ dev_to_node (dev -> dev ));
2873
+ if (!dev -> iod_meta_mempool )
2874
+ goto free ;
2875
+
2772
2876
return 0 ;
2877
+ free :
2878
+ mempool_destroy (dev -> iod_mempool );
2879
+ return - ENOMEM ;
2773
2880
}
2774
2881
2775
2882
static void nvme_free_tagset (struct nvme_dev * dev )
@@ -2834,6 +2941,11 @@ static void nvme_reset_work(struct work_struct *work)
2834
2941
if (result )
2835
2942
goto out ;
2836
2943
2944
+ if (nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
2945
+ dev -> ctrl .max_integrity_segments = NVME_MAX_META_SEGS ;
2946
+ else
2947
+ dev -> ctrl .max_integrity_segments = 1 ;
2948
+
2837
2949
nvme_dbbuf_dma_alloc (dev );
2838
2950
2839
2951
result = nvme_setup_host_mem (dev );
@@ -3101,11 +3213,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
3101
3213
dev -> ctrl .max_hw_sectors = min_t (u32 ,
3102
3214
NVME_MAX_KB_SZ << 1 , dma_opt_mapping_size (& pdev -> dev ) >> 9 );
3103
3215
dev -> ctrl .max_segments = NVME_MAX_SEGS ;
3104
-
3105
- /*
3106
- * There is no support for SGLs for metadata (yet), so we are limited to
3107
- * a single integrity segment for the separate metadata pointer.
3108
- */
3109
3216
dev -> ctrl .max_integrity_segments = 1 ;
3110
3217
return dev ;
3111
3218
@@ -3168,6 +3275,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
3168
3275
if (result )
3169
3276
goto out_disable ;
3170
3277
3278
+ if (nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
3279
+ dev -> ctrl .max_integrity_segments = NVME_MAX_META_SEGS ;
3280
+ else
3281
+ dev -> ctrl .max_integrity_segments = 1 ;
3282
+
3171
3283
nvme_dbbuf_dma_alloc (dev );
3172
3284
3173
3285
result = nvme_setup_host_mem (dev );
@@ -3210,6 +3322,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
3210
3322
nvme_free_queues (dev , 0 );
3211
3323
out_release_iod_mempool :
3212
3324
mempool_destroy (dev -> iod_mempool );
3325
+ mempool_destroy (dev -> iod_meta_mempool );
3213
3326
out_release_prp_pools :
3214
3327
nvme_release_prp_pools (dev );
3215
3328
out_dev_unmap :
@@ -3275,6 +3388,7 @@ static void nvme_remove(struct pci_dev *pdev)
3275
3388
nvme_dbbuf_dma_free (dev );
3276
3389
nvme_free_queues (dev , 0 );
3277
3390
mempool_destroy (dev -> iod_mempool );
3391
+ mempool_destroy (dev -> iod_meta_mempool );
3278
3392
nvme_release_prp_pools (dev );
3279
3393
nvme_dev_unmap (dev );
3280
3394
nvme_uninit_ctrl (& dev -> ctrl );
0 commit comments