Skip to content

Commit 26a4dc2

Browse files
jasuarezmelissawen
authored andcommitted
drm/v3d: Expose performance counters to userspace
The V3D engine has several hardware performance counters that can of interest for userspace performance analysis tools. This exposes new ioctls to create and destroy performance monitor objects, as well as to query the counter values. Each created performance monitor object has an ID that can be attached to CL/CSD submissions, so the driver enables the requested counters when the job is submitted, and updates the performance monitor values when the job is done. It is up to the user to ensure all the jobs have been finished before getting the performance monitor values. It is also up to the user to properly synchronize BCL jobs when submitting jobs with different performance monitors attached. Cc: Daniel Vetter <[email protected]> Cc: David Airlie <[email protected]> Cc: Emma Anholt <[email protected]> To: [email protected] Signed-off-by: Juan A. Suarez Romero <[email protected]> Acked-by: Melissa Wen <[email protected]> Signed-off-by: Melissa Wen <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 56f0729 commit 26a4dc2

File tree

8 files changed

+470
-0
lines changed

8 files changed

+470
-0
lines changed

drivers/gpu/drm/v3d/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ v3d-y := \
99
v3d_gem.o \
1010
v3d_irq.o \
1111
v3d_mmu.o \
12+
v3d_perfmon.o \
1213
v3d_trace_points.o \
1314
v3d_sched.o
1415

drivers/gpu/drm/v3d/v3d_drv.c

+8
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
9494
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
9595
args->value = 1;
9696
return 0;
97+
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
98+
args->value = (v3d->ver >= 40);
99+
return 0;
97100
default:
98101
DRM_DEBUG("Unknown parameter %d\n", args->param);
99102
return -EINVAL;
@@ -121,6 +124,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
121124
1, NULL);
122125
}
123126

127+
v3d_perfmon_open_file(v3d_priv);
124128
file->driver_priv = v3d_priv;
125129

126130
return 0;
@@ -136,6 +140,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
136140
drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
137141
}
138142

143+
v3d_perfmon_close_file(v3d_priv);
139144
kfree(v3d_priv);
140145
}
141146

@@ -156,6 +161,9 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
156161
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
157162
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
158163
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
164+
DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
165+
DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
166+
DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
159167
};
160168

161169
static const struct drm_driver v3d_drm_driver = {

drivers/gpu/drm/v3d/v3d_drv.h

+63
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,40 @@ struct v3d_queue_state {
3737
u64 emit_seqno;
3838
};
3939

40+
/* Performance monitor object. The perform lifetime is controlled by userspace
41+
* using perfmon related ioctls. A perfmon can be attached to a submit_cl
42+
* request, and when this is the case, HW perf counters will be activated just
43+
* before the submit_cl is submitted to the GPU and disabled when the job is
44+
* done. This way, only events related to a specific job will be counted.
45+
*/
46+
struct v3d_perfmon {
47+
/* Tracks the number of users of the perfmon, when this counter reaches
48+
* zero the perfmon is destroyed.
49+
*/
50+
refcount_t refcnt;
51+
52+
/* Protects perfmon stop, as it can be invoked from multiple places. */
53+
struct mutex lock;
54+
55+
/* Number of counters activated in this perfmon instance
56+
* (should be less than DRM_V3D_MAX_PERF_COUNTERS).
57+
*/
58+
u8 ncounters;
59+
60+
/* Events counted by the HW perf counters. */
61+
u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
62+
63+
/* Storage for counter values. Counters are incremented by the
64+
* HW perf counter values every time the perfmon is attached
65+
* to a GPU job. This way, perfmon users don't have to
66+
* retrieve the results after each job if they want to track
67+
* events covering several submissions. Note that counter
68+
* values can't be reset, but you can fake a reset by
69+
* destroying the perfmon and creating a new one.
70+
*/
71+
u64 values[];
72+
};
73+
4074
struct v3d_dev {
4175
struct drm_device drm;
4276

@@ -89,6 +123,9 @@ struct v3d_dev {
89123
*/
90124
spinlock_t job_lock;
91125

126+
/* Used to track the active perfmon if any. */
127+
struct v3d_perfmon *active_perfmon;
128+
92129
/* Protects bo_stats */
93130
struct mutex bo_lock;
94131

@@ -133,6 +170,11 @@ v3d_has_csd(struct v3d_dev *v3d)
133170
struct v3d_file_priv {
134171
struct v3d_dev *v3d;
135172

173+
struct {
174+
struct idr idr;
175+
struct mutex lock;
176+
} perfmon;
177+
136178
struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
137179
};
138180

@@ -205,6 +247,11 @@ struct v3d_job {
205247
*/
206248
struct dma_fence *done_fence;
207249

250+
/* Pointer to a performance monitor object if the user requested it,
251+
* NULL otherwise.
252+
*/
253+
struct v3d_perfmon *perfmon;
254+
208255
/* Callback for the freeing of the job on refcount going to 0. */
209256
void (*free)(struct kref *ref);
210257
};
@@ -353,3 +400,19 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
353400
/* v3d_sched.c */
354401
int v3d_sched_init(struct v3d_dev *v3d);
355402
void v3d_sched_fini(struct v3d_dev *v3d);
403+
404+
/* v3d_perfmon.c */
405+
void v3d_perfmon_get(struct v3d_perfmon *perfmon);
406+
void v3d_perfmon_put(struct v3d_perfmon *perfmon);
407+
void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon);
408+
void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
409+
bool capture);
410+
struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id);
411+
void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
412+
void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
413+
int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
414+
struct drm_file *file_priv);
415+
int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
416+
struct drm_file *file_priv);
417+
int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
418+
struct drm_file *file_priv);

drivers/gpu/drm/v3d/v3d_gem.c

+31
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ v3d_reset(struct v3d_dev *v3d)
126126
v3d_mmu_set_page_table(v3d);
127127
v3d_irq_reset(v3d);
128128

129+
v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
130+
129131
trace_v3d_reset_end(dev);
130132
}
131133

@@ -375,6 +377,9 @@ v3d_job_free(struct kref *ref)
375377
pm_runtime_mark_last_busy(job->v3d->drm.dev);
376378
pm_runtime_put_autosuspend(job->v3d->drm.dev);
377379

380+
if (job->perfmon)
381+
v3d_perfmon_put(job->perfmon);
382+
378383
kfree(job);
379384
}
380385

@@ -539,6 +544,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
539544

540545
trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
541546

547+
if (args->pad != 0)
548+
return -EINVAL;
549+
542550
if (args->flags != 0 &&
543551
args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
544552
DRM_INFO("invalid flags: %d\n", args->flags);
@@ -611,8 +619,20 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
611619
if (ret)
612620
goto fail;
613621

622+
if (args->perfmon_id) {
623+
render->base.perfmon = v3d_perfmon_find(v3d_priv,
624+
args->perfmon_id);
625+
626+
if (!render->base.perfmon) {
627+
ret = -ENOENT;
628+
goto fail;
629+
}
630+
}
631+
614632
mutex_lock(&v3d->sched_lock);
615633
if (bin) {
634+
bin->base.perfmon = render->base.perfmon;
635+
v3d_perfmon_get(bin->base.perfmon);
616636
ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
617637
if (ret)
618638
goto fail_unreserve;
@@ -633,6 +653,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
633653
ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
634654
if (ret)
635655
goto fail_unreserve;
656+
clean_job->perfmon = render->base.perfmon;
657+
v3d_perfmon_get(clean_job->perfmon);
636658
ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
637659
if (ret)
638660
goto fail_unreserve;
@@ -827,6 +849,15 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
827849
if (ret)
828850
goto fail;
829851

852+
if (args->perfmon_id) {
853+
job->base.perfmon = v3d_perfmon_find(v3d_priv,
854+
args->perfmon_id);
855+
if (!job->base.perfmon) {
856+
ret = -ENOENT;
857+
goto fail;
858+
}
859+
}
860+
830861
mutex_lock(&v3d->sched_lock);
831862
ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
832863
if (ret)

0 commit comments

Comments
 (0)