Skip to content

Commit dd0e89e

Browse files
dceraolorodrigovivi
authored andcommitted
drm/xe/gsc: GSC FW load
The GSC FW must be copied in a 4MB stolen memory allocation, whose GGTT address is then passed as a parameter to a dedicated load instruction submitted via the GSC engine. Since the GSC load is relatively slow (up to 250ms), we perform it asynchronously via a worker. This requires us to make sure that the worker has stopped before suspending/unloading. Note that we can't yet use xe_migrate_copy for the copy because it doesn't work with stolen memory right now, so we do a memcpy from the CPU side instead. v2: add comment about timeout value, fix GSC status checking before load (John) Bspec: 65306, 65346 Signed-off-by: Daniele Ceraolo Spurio <[email protected]> Cc: Alan Previn <[email protected]> Cc: John Harrison <[email protected]> Reviewed-by: John Harrison <[email protected]> Signed-off-by: Rodrigo Vivi <[email protected]>
1 parent 985d5a4 commit dd0e89e

File tree

7 files changed

+345
-1
lines changed

7 files changed

+345
-1
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2023 Intel Corporation
4+
*/
5+
6+
#ifndef _XE_GSC_COMMANDS_H_
7+
#define _XE_GSC_COMMANDS_H_
8+
9+
#include "instructions/xe_instr_defs.h"
10+
11+
/*
12+
* All GSCCS-specific commands have fixed length, so we can include it in the
13+
* defines. Note that the generic GSC command header structure includes an
14+
* optional data field in bits 9-21, but there are no commands that actually use
15+
* it; some of the commands are instead defined as having an extended length
16+
* field spanning bits 0-15, even if the extra bits are not required because the
17+
* longest GSCCS command is only 8 dwords. To handle this, the defines below use
18+
* a single field for both data and len. If we ever get a commands that does
19+
* actually have data and this approach doesn't work for it we can re-work it
20+
* at that point.
21+
*/
22+
23+
#define GSC_OPCODE REG_GENMASK(28, 22)
24+
#define GSC_CMD_DATA_AND_LEN REG_GENMASK(21, 0)
25+
26+
#define __GSC_INSTR(op, dl) \
27+
(XE_INSTR_GSC | \
28+
REG_FIELD_PREP(GSC_OPCODE, op) | \
29+
REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl))
30+
31+
#define GSC_FW_LOAD __GSC_INSTR(1, 2)
32+
#define GSC_FW_LOAD_LIMIT_VALID REG_BIT(31)
33+
34+
#endif

drivers/gpu/drm/xe/instructions/xe_instr_defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
#define XE_INSTR_CMD_TYPE GENMASK(31, 29)
1717
#define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
18+
#define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
1819
#define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
1920

2021
/*

drivers/gpu/drm/xe/regs/xe_gsc_regs.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2023 Intel Corporation
4+
*/
5+
6+
#ifndef _XE_GSC_REGS_H_
7+
#define _XE_GSC_REGS_H_
8+
9+
#include <linux/compiler.h>
10+
#include <linux/types.h>
11+
12+
#include "regs/xe_reg_defs.h"
13+
14+
/* Definitions of GSC H/W registers, bits, etc */
15+
16+
#define MTL_GSC_HECI1_BASE 0x00116000
17+
#define MTL_GSC_HECI2_BASE 0x00117000
18+
19+
/*
20+
* The FWSTS register values are FW defined and can be different between
21+
* HECI1 and HECI2
22+
*/
23+
#define HECI_FWSTS1(base) XE_REG((base) + 0xc40)
24+
#define HECI1_FWSTS1_CURRENT_STATE REG_GENMASK(3, 0)
25+
#define HECI1_FWSTS1_CURRENT_STATE_RESET 0
26+
#define HECI1_FWSTS1_PROXY_STATE_NORMAL 5
27+
#define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9)
28+
29+
#endif

drivers/gpu/drm/xe/xe_gsc.c

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,174 @@
55

66
#include "xe_gsc.h"
77

8+
#include <drm/drm_managed.h>
9+
10+
#include "xe_bb.h"
11+
#include "xe_bo.h"
812
#include "xe_device.h"
13+
#include "xe_exec_queue.h"
914
#include "xe_gt.h"
1015
#include "xe_gt_printk.h"
16+
#include "xe_map.h"
17+
#include "xe_mmio.h"
18+
#include "xe_sched_job.h"
1119
#include "xe_uc_fw.h"
20+
#include "instructions/xe_gsc_commands.h"
21+
#include "regs/xe_gsc_regs.h"
1222

1323
static struct xe_gt *
1424
gsc_to_gt(struct xe_gsc *gsc)
1525
{
1626
return container_of(gsc, struct xe_gt, uc.gsc);
1727
}
1828

29+
static int memcpy_fw(struct xe_gsc *gsc)
30+
{
31+
struct xe_gt *gt = gsc_to_gt(gsc);
32+
struct xe_device *xe = gt_to_xe(gt);
33+
u32 fw_size = gsc->fw.size;
34+
void *storage;
35+
36+
/*
37+
* FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
38+
* a memcpy for now.
39+
*/
40+
storage = kmalloc(fw_size, GFP_KERNEL);
41+
if (!storage)
42+
return -ENOMEM;
43+
44+
xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
45+
xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
46+
xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
47+
48+
kfree(storage);
49+
50+
return 0;
51+
}
52+
53+
static int emit_gsc_upload(struct xe_gsc *gsc)
54+
{
55+
struct xe_gt *gt = gsc_to_gt(gsc);
56+
u64 offset = xe_bo_ggtt_addr(gsc->private);
57+
struct xe_bb *bb;
58+
struct xe_sched_job *job;
59+
struct dma_fence *fence;
60+
long timeout;
61+
62+
bb = xe_bb_new(gt, 4, false);
63+
if (IS_ERR(bb))
64+
return PTR_ERR(bb);
65+
66+
bb->cs[bb->len++] = GSC_FW_LOAD;
67+
bb->cs[bb->len++] = lower_32_bits(offset);
68+
bb->cs[bb->len++] = upper_32_bits(offset);
69+
bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
70+
71+
job = xe_bb_create_job(gsc->q, bb);
72+
if (IS_ERR(job)) {
73+
xe_bb_free(bb, NULL);
74+
return PTR_ERR(job);
75+
}
76+
77+
xe_sched_job_arm(job);
78+
fence = dma_fence_get(&job->drm.s_fence->finished);
79+
xe_sched_job_push(job);
80+
81+
timeout = dma_fence_wait_timeout(fence, false, HZ);
82+
dma_fence_put(fence);
83+
xe_bb_free(bb, NULL);
84+
if (timeout < 0)
85+
return timeout;
86+
else if (!timeout)
87+
return -ETIME;
88+
89+
return 0;
90+
}
91+
92+
static int gsc_fw_is_loaded(struct xe_gt *gt)
93+
{
94+
return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
95+
HECI1_FWSTS1_INIT_COMPLETE;
96+
}
97+
98+
static int gsc_fw_wait(struct xe_gt *gt)
99+
{
100+
/*
101+
* GSC load can take up to 250ms from the moment the instruction is
102+
* executed by the GSCCS. To account for possible submission delays or
103+
* other issues, we use a 500ms timeout in the wait here.
104+
*/
105+
return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
106+
HECI1_FWSTS1_INIT_COMPLETE,
107+
HECI1_FWSTS1_INIT_COMPLETE,
108+
500 * USEC_PER_MSEC, NULL, false);
109+
}
110+
111+
static int gsc_upload(struct xe_gsc *gsc)
112+
{
113+
struct xe_gt *gt = gsc_to_gt(gsc);
114+
struct xe_device *xe = gt_to_xe(gt);
115+
int err;
116+
117+
/* we should only be here if the init step were successful */
118+
xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
119+
120+
if (gsc_fw_is_loaded(gt)) {
121+
xe_gt_err(gt, "GSC already loaded at upload time\n");
122+
return -EEXIST;
123+
}
124+
125+
err = memcpy_fw(gsc);
126+
if (err) {
127+
xe_gt_err(gt, "Failed to memcpy GSC FW\n");
128+
return err;
129+
}
130+
131+
err = emit_gsc_upload(gsc);
132+
if (err) {
133+
xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
134+
return err;
135+
}
136+
137+
err = gsc_fw_wait(gt);
138+
if (err) {
139+
xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
140+
return err;
141+
}
142+
143+
xe_gt_dbg(gt, "GSC FW async load completed\n");
144+
145+
return 0;
146+
}
147+
148+
static void gsc_work(struct work_struct *work)
149+
{
150+
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
151+
struct xe_gt *gt = gsc_to_gt(gsc);
152+
struct xe_device *xe = gt_to_xe(gt);
153+
int ret;
154+
155+
xe_device_mem_access_get(xe);
156+
xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
157+
158+
ret = gsc_upload(gsc);
159+
if (ret && ret != -EEXIST)
160+
xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
161+
else
162+
xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
163+
164+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
165+
xe_device_mem_access_put(xe);
166+
}
167+
19168
int xe_gsc_init(struct xe_gsc *gsc)
20169
{
21170
struct xe_gt *gt = gsc_to_gt(gsc);
22171
struct xe_tile *tile = gt_to_tile(gt);
23172
int ret;
24173

25174
gsc->fw.type = XE_UC_FW_TYPE_GSC;
175+
INIT_WORK(&gsc->work, gsc_work);
26176

27177
/* The GSC uC is only available on the media GT */
28178
if (tile->media_gt && (gt != tile->media_gt)) {
@@ -50,3 +200,103 @@ int xe_gsc_init(struct xe_gsc *gsc)
50200
return ret;
51201
}
52202

203+
static void free_resources(struct drm_device *drm, void *arg)
204+
{
205+
struct xe_gsc *gsc = arg;
206+
207+
if (gsc->wq) {
208+
destroy_workqueue(gsc->wq);
209+
gsc->wq = NULL;
210+
}
211+
212+
if (gsc->q) {
213+
xe_exec_queue_put(gsc->q);
214+
gsc->q = NULL;
215+
}
216+
217+
if (gsc->private) {
218+
xe_bo_unpin_map_no_vm(gsc->private);
219+
gsc->private = NULL;
220+
}
221+
}
222+
223+
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
224+
{
225+
struct xe_gt *gt = gsc_to_gt(gsc);
226+
struct xe_tile *tile = gt_to_tile(gt);
227+
struct xe_device *xe = gt_to_xe(gt);
228+
struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
229+
struct xe_exec_queue *q;
230+
struct workqueue_struct *wq;
231+
struct xe_bo *bo;
232+
int err;
233+
234+
if (!xe_uc_fw_is_available(&gsc->fw))
235+
return 0;
236+
237+
if (!hwe)
238+
return -ENODEV;
239+
240+
bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
241+
ttm_bo_type_kernel,
242+
XE_BO_CREATE_STOLEN_BIT |
243+
XE_BO_CREATE_GGTT_BIT);
244+
if (IS_ERR(bo))
245+
return PTR_ERR(bo);
246+
247+
q = xe_exec_queue_create(xe, NULL,
248+
BIT(hwe->logical_instance), 1, hwe,
249+
EXEC_QUEUE_FLAG_KERNEL |
250+
EXEC_QUEUE_FLAG_PERMANENT);
251+
if (IS_ERR(q)) {
252+
xe_gt_err(gt, "Failed to create queue for GSC submission\n");
253+
err = PTR_ERR(q);
254+
goto out_bo;
255+
}
256+
257+
wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
258+
if (!wq) {
259+
err = -ENOMEM;
260+
goto out_q;
261+
}
262+
263+
gsc->private = bo;
264+
gsc->q = q;
265+
gsc->wq = wq;
266+
267+
err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
268+
if (err)
269+
return err;
270+
271+
xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
272+
273+
return 0;
274+
275+
out_q:
276+
xe_exec_queue_put(q);
277+
out_bo:
278+
xe_bo_unpin_map_no_vm(bo);
279+
return err;
280+
}
281+
282+
void xe_gsc_load_start(struct xe_gsc *gsc)
283+
{
284+
struct xe_gt *gt = gsc_to_gt(gsc);
285+
286+
if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
287+
return;
288+
289+
/* GSC FW survives GT reset and D3Hot */
290+
if (gsc_fw_is_loaded(gt)) {
291+
xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
292+
return;
293+
}
294+
295+
queue_work(gsc->wq, &gsc->work);
296+
}
297+
298+
void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
299+
{
300+
if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
301+
flush_work(&gsc->work);
302+
}

drivers/gpu/drm/xe/xe_gsc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@
99
#include "xe_gsc_types.h"
1010

1111
int xe_gsc_init(struct xe_gsc *gsc);
12+
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
13+
void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
14+
void xe_gsc_load_start(struct xe_gsc *gsc);
1215

1316
#endif

drivers/gpu/drm/xe/xe_gsc_types.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,13 @@
66
#ifndef _XE_GSC_TYPES_H_
77
#define _XE_GSC_TYPES_H_
88

9+
#include <linux/workqueue.h>
10+
911
#include "xe_uc_fw_types.h"
1012

13+
struct xe_bo;
14+
struct xe_exec_queue;
15+
1116
/**
1217
* struct xe_gsc - GSC
1318
*/
@@ -17,6 +22,18 @@ struct xe_gsc {
1722

1823
/** @security_version: SVN found in the fetched blob */
1924
u32 security_version;
25+
26+
/** @private: Private data for use by the GSC FW */
27+
struct xe_bo *private;
28+
29+
/** @q: Default queue used for submissions to GSC FW */
30+
struct xe_exec_queue *q;
31+
32+
/** @wq: workqueue to handle jobs for delayed load and proxy handling */
33+
struct workqueue_struct *wq;
34+
35+
/** @work: delayed load and proxy handling work */
36+
struct work_struct work;
2037
};
2138

2239
#endif

0 commit comments

Comments
 (0)