Skip to content

Commit ee96dd9

Browse files
committed
Merge tag 'libnvdimm-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "The update for this cycle includes the deprecation of block-aperture mode and a new perf events interface for the papr_scm nvdimm driver. The perf events approach was acked by PeterZ. - Add perf support for nvdimm events, initially only for 'papr_scm' devices. - Deprecate the 'block aperture' support in libnvdimm, it only ever existed in the specification, not in shipping product" * tag 'libnvdimm-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: nvdimm/blk: Fix title level MAINTAINERS: remove section LIBNVDIMM BLK: MMIO-APERTURE DRIVER powerpc/papr_scm: Fix build failure when drivers/nvdimm: Fix build failure when CONFIG_PERF_EVENTS is not set nvdimm/region: Delete nd_blk_region infrastructure ACPI: NFIT: Remove block aperture support nvdimm/namespace: Delete nd_namespace_blk nvdimm/namespace: Delete blk namespace consideration in shared paths nvdimm/blk: Delete the block-aperture window driver nvdimm/region: Fix default alignment for small regions docs: ABI: sysfs-bus-nvdimm: Document sysfs event format entries for nvdimm pmu powerpc/papr_scm: Add perf interface support drivers/nvdimm: Add perf interface to expose nvdimm performance stats drivers/nvdimm: Add nvdimm pmu structure
2 parents d888c83 + ada8d8d commit ee96dd9

File tree

27 files changed

+832
-2428
lines changed

27 files changed

+832
-2428
lines changed

Documentation/ABI/testing/sysfs-bus-nvdimm

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,38 @@ Description:
66

77
The libnvdimm sub-system implements a common sysfs interface for
88
platform nvdimm resources. See Documentation/driver-api/nvdimm/.
9+
10+
What: /sys/bus/event_source/devices/nmemX/format
11+
Date: February 2022
12+
KernelVersion: 5.18
13+
Contact: Kajol Jain <kjain@linux.ibm.com>
14+
Description: (RO) Attribute group to describe the magic bits
15+
that go into perf_event_attr.config for a particular pmu.
16+
(See ABI/testing/sysfs-bus-event_source-devices-format).
17+
18+
Each attribute under this group defines a bit range of the
19+
perf_event_attr.config. Supported attribute is listed
20+
below::
21+
event = "config:0-4" - event ID
22+
23+
For example::
24+
ctl_res_cnt = "event=0x1"
25+
26+
What: /sys/bus/event_source/devices/nmemX/events
27+
Date: February 2022
28+
KernelVersion: 5.18
29+
Contact: Kajol Jain <kjain@linux.ibm.com>
30+
Description: (RO) Attribute group to describe performance monitoring events
31+
for the nvdimm memory device. Each attribute in this group
32+
describes a single performance monitoring event supported by
33+
this nvdimm pmu. The name of the file is the name of the event.
34+
(See ABI/testing/sysfs-bus-event_source-devices-events). A
35+
listing of the events supported by a given nvdimm provider type
36+
can be found in Documentation/driver-api/nvdimm/$provider.
37+
38+
What: /sys/bus/event_source/devices/nmemX/cpumask
39+
Date: February 2022
40+
KernelVersion: 5.18
41+
Contact: Kajol Jain <kjain@linux.ibm.com>
42+
Description: (RO) This sysfs file exposes the cpumask which is designated to
43+
to retrieve nvdimm pmu event counter data.

Documentation/driver-api/nvdimm/nvdimm.rst

Lines changed: 88 additions & 318 deletions
Large diffs are not rendered by default.

MAINTAINERS

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11121,17 +11121,6 @@ F: drivers/ata/
1112111121
F: include/linux/ata.h
1112211122
F: include/linux/libata.h
1112311123

11124-
LIBNVDIMM BLK: MMIO-APERTURE DRIVER
11125-
M: Dan Williams <dan.j.williams@intel.com>
11126-
M: Vishal Verma <vishal.l.verma@intel.com>
11127-
M: Dave Jiang <dave.jiang@intel.com>
11128-
L: nvdimm@lists.linux.dev
11129-
S: Supported
11130-
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
11131-
P: Documentation/nvdimm/maintainer-entry-profile.rst
11132-
F: drivers/nvdimm/blk.c
11133-
F: drivers/nvdimm/region_devs.c
11134-
1113511124
LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
1113611125
M: Vishal Verma <vishal.l.verma@intel.com>
1113711126
M: Dan Williams <dan.j.williams@intel.com>

arch/powerpc/include/asm/device.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ struct dev_archdata {
4848

4949
struct pdev_archdata {
5050
u64 dma_mask;
51+
/*
52+
* Pointer to nvdimm_pmu structure, to handle the unregistering
53+
* of pmu device
54+
*/
55+
void *priv;
5156
};
5257

5358
#endif /* _ASM_POWERPC_DEVICE_H */

arch/powerpc/platforms/pseries/papr_scm.c

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <asm/papr_pdsm.h>
2020
#include <asm/mce.h>
2121
#include <asm/unaligned.h>
22+
#include <linux/perf_event.h>
2223

2324
#define BIND_ANY_ADDR (~0ul)
2425

@@ -124,6 +125,8 @@ struct papr_scm_priv {
124125
/* The bits which needs to be overridden */
125126
u64 health_bitmap_inject_mask;
126127

128+
/* array to have event_code and stat_id mappings */
129+
char **nvdimm_events_map;
127130
};
128131

129132
static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -344,6 +347,225 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
344347
return 0;
345348
}
346349

350+
#ifdef CONFIG_PERF_EVENTS
351+
#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
352+
353+
static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
354+
{
355+
struct papr_scm_perf_stat *stat;
356+
struct papr_scm_perf_stats *stats;
357+
struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
358+
int rc, size;
359+
360+
/* Allocate request buffer enough to hold single performance stat */
361+
size = sizeof(struct papr_scm_perf_stats) +
362+
sizeof(struct papr_scm_perf_stat);
363+
364+
if (!p || !p->nvdimm_events_map)
365+
return -EINVAL;
366+
367+
stats = kzalloc(size, GFP_KERNEL);
368+
if (!stats)
369+
return -ENOMEM;
370+
371+
stat = &stats->scm_statistic[0];
372+
memcpy(&stat->stat_id,
373+
p->nvdimm_events_map[event->attr.config],
374+
sizeof(stat->stat_id));
375+
stat->stat_val = 0;
376+
377+
rc = drc_pmem_query_stats(p, stats, 1);
378+
if (rc < 0) {
379+
kfree(stats);
380+
return rc;
381+
}
382+
383+
*count = be64_to_cpu(stat->stat_val);
384+
kfree(stats);
385+
return 0;
386+
}
387+
388+
static int papr_scm_pmu_event_init(struct perf_event *event)
389+
{
390+
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
391+
struct papr_scm_priv *p;
392+
393+
if (!nd_pmu)
394+
return -EINVAL;
395+
396+
/* test the event attr type for PMU enumeration */
397+
if (event->attr.type != event->pmu->type)
398+
return -ENOENT;
399+
400+
/* it does not support event sampling mode */
401+
if (is_sampling_event(event))
402+
return -EOPNOTSUPP;
403+
404+
/* no branch sampling */
405+
if (has_branch_stack(event))
406+
return -EOPNOTSUPP;
407+
408+
p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
409+
if (!p)
410+
return -EINVAL;
411+
412+
/* Invalid eventcode */
413+
if (event->attr.config == 0 || event->attr.config > 16)
414+
return -EINVAL;
415+
416+
return 0;
417+
}
418+
419+
static int papr_scm_pmu_add(struct perf_event *event, int flags)
420+
{
421+
u64 count;
422+
int rc;
423+
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
424+
425+
if (!nd_pmu)
426+
return -EINVAL;
427+
428+
if (flags & PERF_EF_START) {
429+
rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
430+
if (rc)
431+
return rc;
432+
433+
local64_set(&event->hw.prev_count, count);
434+
}
435+
436+
return 0;
437+
}
438+
439+
static void papr_scm_pmu_read(struct perf_event *event)
440+
{
441+
u64 prev, now;
442+
int rc;
443+
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
444+
445+
if (!nd_pmu)
446+
return;
447+
448+
rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
449+
if (rc)
450+
return;
451+
452+
prev = local64_xchg(&event->hw.prev_count, now);
453+
local64_add(now - prev, &event->count);
454+
}
455+
456+
static void papr_scm_pmu_del(struct perf_event *event, int flags)
457+
{
458+
papr_scm_pmu_read(event);
459+
}
460+
461+
static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
462+
{
463+
struct papr_scm_perf_stat *stat;
464+
struct papr_scm_perf_stats *stats;
465+
char *statid;
466+
int index, rc, count;
467+
u32 available_events;
468+
469+
if (!p->stat_buffer_len)
470+
return -ENOENT;
471+
472+
available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats))
473+
/ sizeof(struct papr_scm_perf_stat);
474+
475+
/* Allocate the buffer for phyp where stats are written */
476+
stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
477+
if (!stats) {
478+
rc = -ENOMEM;
479+
return rc;
480+
}
481+
482+
/* Allocate memory to nvdimm_event_map */
483+
p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL);
484+
if (!p->nvdimm_events_map) {
485+
rc = -ENOMEM;
486+
goto out_stats;
487+
}
488+
489+
/* Called to get list of events supported */
490+
rc = drc_pmem_query_stats(p, stats, 0);
491+
if (rc)
492+
goto out_nvdimm_events_map;
493+
494+
for (index = 0, stat = stats->scm_statistic, count = 0;
495+
index < available_events; index++, ++stat) {
496+
statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
497+
if (!statid) {
498+
rc = -ENOMEM;
499+
goto out_nvdimm_events_map;
500+
}
501+
502+
strcpy(statid, stat->stat_id);
503+
p->nvdimm_events_map[count] = statid;
504+
count++;
505+
}
506+
p->nvdimm_events_map[count] = NULL;
507+
kfree(stats);
508+
return 0;
509+
510+
out_nvdimm_events_map:
511+
kfree(p->nvdimm_events_map);
512+
out_stats:
513+
kfree(stats);
514+
return rc;
515+
}
516+
517+
static void papr_scm_pmu_register(struct papr_scm_priv *p)
518+
{
519+
struct nvdimm_pmu *nd_pmu;
520+
int rc, nodeid;
521+
522+
nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
523+
if (!nd_pmu) {
524+
rc = -ENOMEM;
525+
goto pmu_err_print;
526+
}
527+
528+
rc = papr_scm_pmu_check_events(p, nd_pmu);
529+
if (rc)
530+
goto pmu_check_events_err;
531+
532+
nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
533+
nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
534+
nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
535+
nd_pmu->pmu.read = papr_scm_pmu_read;
536+
nd_pmu->pmu.add = papr_scm_pmu_add;
537+
nd_pmu->pmu.del = papr_scm_pmu_del;
538+
539+
nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
540+
PERF_PMU_CAP_NO_EXCLUDE;
541+
542+
/*updating the cpumask variable */
543+
nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
544+
nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);
545+
546+
rc = register_nvdimm_pmu(nd_pmu, p->pdev);
547+
if (rc)
548+
goto pmu_register_err;
549+
550+
/*
551+
* Set archdata.priv value to nvdimm_pmu structure, to handle the
552+
* unregistering of pmu device.
553+
*/
554+
p->pdev->archdata.priv = nd_pmu;
555+
return;
556+
557+
pmu_register_err:
558+
kfree(p->nvdimm_events_map);
559+
pmu_check_events_err:
560+
kfree(nd_pmu);
561+
pmu_err_print:
562+
dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
563+
}
564+
565+
#else
566+
static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
567+
#endif
568+
347569
/*
348570
* Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
349571
* health information.
@@ -1320,6 +1542,7 @@ static int papr_scm_probe(struct platform_device *pdev)
13201542
goto err2;
13211543

13221544
platform_set_drvdata(pdev, p);
1545+
papr_scm_pmu_register(p);
13231546

13241547
return 0;
13251548

@@ -1338,6 +1561,12 @@ static int papr_scm_remove(struct platform_device *pdev)
13381561

13391562
nvdimm_bus_unregister(p->bus);
13401563
drc_pmem_unbind(p);
1564+
1565+
if (pdev->archdata.priv)
1566+
unregister_nvdimm_pmu(pdev->archdata.priv);
1567+
1568+
pdev->archdata.priv = NULL;
1569+
kfree(p->nvdimm_events_map);
13411570
kfree(p->bus_desc.provider_name);
13421571
kfree(p);
13431572

0 commit comments

Comments
 (0)