Skip to content

Commit

Permalink
powerpc/papr_scm: Add perf interface support
Browse files Browse the repository at this point in the history
Performance monitoring support for papr-scm nvdimm devices
via perf interface is added which includes addition of pmu
functions like add/del/read/event_init for nvdimm_pmu struture.

A new parameter 'priv' in added to the pdev_archdata structure to save
nvdimm_pmu device pointer, to handle the unregistering of pmu device.

papr_scm_pmu_register function populates the nvdimm_pmu structure
with name, capabilities, cpumask along with event handling
functions. Finally the populated nvdimm_pmu structure is passed to
register the pmu device. Event handling functions internally uses
hcall to get events and counter data.

Result in power9 machine with 2 nvdimm device:

Ex: List all event by perf list

command:# perf list nmem

  nmem0/cache_rh_cnt/                                [Kernel PMU event]
  nmem0/cache_wh_cnt/                                [Kernel PMU event]
  nmem0/cri_res_util/                                [Kernel PMU event]
  nmem0/ctl_res_cnt/                                 [Kernel PMU event]
  nmem0/ctl_res_tm/                                  [Kernel PMU event]
  nmem0/fast_w_cnt/                                  [Kernel PMU event]
  nmem0/host_l_cnt/                                  [Kernel PMU event]
  nmem0/host_l_dur/                                  [Kernel PMU event]
  nmem0/host_s_cnt/                                  [Kernel PMU event]
  nmem0/host_s_dur/                                  [Kernel PMU event]
  nmem0/med_r_cnt/                                   [Kernel PMU event]
  nmem0/med_r_dur/                                   [Kernel PMU event]
  nmem0/med_w_cnt/                                   [Kernel PMU event]
  nmem0/med_w_dur/                                   [Kernel PMU event]
  nmem0/mem_life/                                    [Kernel PMU event]
  nmem0/poweron_secs/                                [Kernel PMU event]
  ...
  nmem1/mem_life/                                    [Kernel PMU event]
  nmem1/poweron_secs/                                [Kernel PMU event]

Acked-by: Peter Zijlstra (Intel) <[email protected]>
Tested-by: Nageswara R Sastry <[email protected]>
Signed-off-by: Kajol Jain <[email protected]>
[Add numa_map_to_online_node function call to get online node id]
Reported-by: Nageswara R Sastry <[email protected]>
Reviewed-by: Madhavan Srinivasan <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Dan Williams <[email protected]>
  • Loading branch information
kjain101 authored and djbw committed Mar 10, 2022
1 parent 0fab1ba commit 4c08d4b
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 0 deletions.
5 changes: 5 additions & 0 deletions arch/powerpc/include/asm/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ struct dev_archdata {

struct pdev_archdata {
u64 dma_mask;
/*
* Pointer to nvdimm_pmu structure, to handle the unregistering
* of pmu device
*/
void *priv;
};

#endif /* _ASM_POWERPC_DEVICE_H */
225 changes: 225 additions & 0 deletions arch/powerpc/platforms/pseries/papr_scm.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
#include <linux/perf_event.h>

#define BIND_ANY_ADDR (~0ul)

Expand Down Expand Up @@ -68,6 +69,8 @@
#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
#define PAPR_SCM_PERF_STATS_VERSION 0x1

#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)

/* Struct holding a single performance metric */
struct papr_scm_perf_stat {
u8 stat_id[8];
Expand Down Expand Up @@ -120,6 +123,9 @@ struct papr_scm_priv {

/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;

/* array to have event_code and stat_id mappings */
char **nvdimm_events_map;
};

static int papr_scm_pmem_flush(struct nd_region *nd_region,
Expand Down Expand Up @@ -340,6 +346,218 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
return 0;
}

static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
{
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
int rc, size;

/* Allocate request buffer enough to hold single performance stat */
size = sizeof(struct papr_scm_perf_stats) +
sizeof(struct papr_scm_perf_stat);

if (!p || !p->nvdimm_events_map)
return -EINVAL;

stats = kzalloc(size, GFP_KERNEL);
if (!stats)
return -ENOMEM;

stat = &stats->scm_statistic[0];
memcpy(&stat->stat_id,
p->nvdimm_events_map[event->attr.config],
sizeof(stat->stat_id));
stat->stat_val = 0;

rc = drc_pmem_query_stats(p, stats, 1);
if (rc < 0) {
kfree(stats);
return rc;
}

*count = be64_to_cpu(stat->stat_val);
kfree(stats);
return 0;
}

static int papr_scm_pmu_event_init(struct perf_event *event)
{
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
struct papr_scm_priv *p;

if (!nd_pmu)
return -EINVAL;

/* test the event attr type for PMU enumeration */
if (event->attr.type != event->pmu->type)
return -ENOENT;

/* it does not support event sampling mode */
if (is_sampling_event(event))
return -EOPNOTSUPP;

/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
if (!p)
return -EINVAL;

/* Invalid eventcode */
if (event->attr.config == 0 || event->attr.config > 16)
return -EINVAL;

return 0;
}

static int papr_scm_pmu_add(struct perf_event *event, int flags)
{
u64 count;
int rc;
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);

if (!nd_pmu)
return -EINVAL;

if (flags & PERF_EF_START) {
rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
if (rc)
return rc;

local64_set(&event->hw.prev_count, count);
}

return 0;
}

static void papr_scm_pmu_read(struct perf_event *event)
{
u64 prev, now;
int rc;
struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);

if (!nd_pmu)
return;

rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
if (rc)
return;

prev = local64_xchg(&event->hw.prev_count, now);
local64_add(now - prev, &event->count);
}

static void papr_scm_pmu_del(struct perf_event *event, int flags)
{
papr_scm_pmu_read(event);
}

static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
{
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
char *statid;
int index, rc, count;
u32 available_events;

if (!p->stat_buffer_len)
return -ENOENT;

available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats))
/ sizeof(struct papr_scm_perf_stat);

/* Allocate the buffer for phyp where stats are written */
stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
if (!stats) {
rc = -ENOMEM;
return rc;
}

/* Allocate memory to nvdimm_event_map */
p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL);
if (!p->nvdimm_events_map) {
rc = -ENOMEM;
goto out_stats;
}

/* Called to get list of events supported */
rc = drc_pmem_query_stats(p, stats, 0);
if (rc)
goto out_nvdimm_events_map;

for (index = 0, stat = stats->scm_statistic, count = 0;
index < available_events; index++, ++stat) {
statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
if (!statid) {
rc = -ENOMEM;
goto out_nvdimm_events_map;
}

strcpy(statid, stat->stat_id);
p->nvdimm_events_map[count] = statid;
count++;
}
p->nvdimm_events_map[count] = NULL;
kfree(stats);
return 0;

out_nvdimm_events_map:
kfree(p->nvdimm_events_map);
out_stats:
kfree(stats);
return rc;
}

static void papr_scm_pmu_register(struct papr_scm_priv *p)
{
struct nvdimm_pmu *nd_pmu;
int rc, nodeid;

nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
if (!nd_pmu) {
rc = -ENOMEM;
goto pmu_err_print;
}

rc = papr_scm_pmu_check_events(p, nd_pmu);
if (rc)
goto pmu_check_events_err;

nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
nd_pmu->pmu.read = papr_scm_pmu_read;
nd_pmu->pmu.add = papr_scm_pmu_add;
nd_pmu->pmu.del = papr_scm_pmu_del;

nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
PERF_PMU_CAP_NO_EXCLUDE;

/*updating the cpumask variable */
nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);

rc = register_nvdimm_pmu(nd_pmu, p->pdev);
if (rc)
goto pmu_register_err;

/*
* Set archdata.priv value to nvdimm_pmu structure, to handle the
* unregistering of pmu device.
*/
p->pdev->archdata.priv = nd_pmu;
return;

pmu_register_err:
kfree(p->nvdimm_events_map);
pmu_check_events_err:
kfree(nd_pmu);
pmu_err_print:
dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
}

/*
* Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
* health information.
Expand Down Expand Up @@ -1236,6 +1454,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;

platform_set_drvdata(pdev, p);
papr_scm_pmu_register(p);

return 0;

Expand All @@ -1254,6 +1473,12 @@ static int papr_scm_remove(struct platform_device *pdev)

nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);

if (pdev->archdata.priv)
unregister_nvdimm_pmu(pdev->archdata.priv);

pdev->archdata.priv = NULL;
kfree(p->nvdimm_events_map);
kfree(p->bus_desc.provider_name);
kfree(p);

Expand Down

0 comments on commit 4c08d4b

Please sign in to comment.