Skip to content

Commit 5d5a5ce

Browse files
author
Phil Elwell
committed
vchiq_arm: Two cacheing fixes
1) Make fragment size vary with cache line size Without this patch, non-cache-line-aligned transfers may corrupt (or be corrupted by) adjacent data structures. Both ARM and VC need to be updated to enable this feature. This is ensured by having the loader apply a new DT parameter - cache-line-size. The existence of this parameter guarantees that the kernel is capable, and the parameter will only be modified from the safe default if the loader is capable. 2) Flush/invalidate vmalloc'd memory, and invalidate after reads
1 parent 038b780 commit 5d5a5ce

File tree

2 files changed

+77
-40
lines changed

2 files changed

+77
-40
lines changed

arch/arm/boot/dts/bcm2708_common.dtsi

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@
218218
compatible = "brcm,bcm2835-vchiq";
219219
reg = <0x7e00b840 0xf>;
220220
interrupts = <0 2>;
221+
cache-line-size = <32>;
221222
};
222223

223224
thermal: thermal {
@@ -270,4 +271,8 @@
270271
clock-frequency = <126000000>;
271272
};
272273
};
274+
275+
__overrides__ {
276+
cache_line_size = <&vchiq>, "cache-line-size:0";
277+
};
273278
};

drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c

Lines changed: 72 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <linux/platform_data/mailbox-bcm2708.h>
4343
#include <linux/platform_device.h>
4444
#include <linux/uaccess.h>
45+
#include <linux/of.h>
4546
#include <asm/pgtable.h>
4647

4748
#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
@@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
6465
} VCHIQ_2835_ARM_STATE_T;
6566

6667
static void __iomem *g_regs;
67-
static FRAGMENTS_T *g_fragments_base;
68-
static FRAGMENTS_T *g_free_fragments;
68+
static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
69+
static unsigned int g_fragments_size;
70+
static char *g_fragments_base;
71+
static char *g_free_fragments;
6972
static struct semaphore g_free_fragments_sema;
7073
static unsigned long g_virt_to_bus_offset;
7174

@@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
9598

9699
g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
97100

101+
(void)of_property_read_u32(dev->of_node, "cache-line-size",
102+
&g_cache_line_size);
103+
g_fragments_size = 2 * g_cache_line_size;
104+
98105
/* Allocate space for the channels in coherent memory */
99106
slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
100-
frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
107+
frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
101108

102109
slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
103110
&slot_phys, GFP_KERNEL);
@@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
117124
vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
118125
MAX_FRAGMENTS;
119126

120-
g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
127+
g_fragments_base = (char *)slot_mem + slot_mem_size;
121128
slot_mem_size += frag_mem_size;
122129

123130
g_free_fragments = g_fragments_base;
124131
for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
125-
*(FRAGMENTS_T **)&g_fragments_base[i] =
126-
&g_fragments_base[i + 1];
132+
*(char **)&g_fragments_base[i*g_fragments_size] =
133+
&g_fragments_base[(i + 1)*g_fragments_size];
127134
}
128-
*(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
135+
*(char **)&g_fragments_base[i * g_fragments_size] = NULL;
129136
sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
130137

131138
if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
@@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id)
344351
** cached area.
345352
346353
** N.B. This implementation plays slightly fast and loose with the Linux
347-
** driver programming rules, e.g. its use of __virt_to_bus instead of
354+
** driver programming rules, e.g. its use of dmac_map_area instead of
348355
** dma_map_single, but it isn't a multi-platform driver and it benefits
349356
** from increased speed as a result.
350357
*/
@@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
355362
{
356363
PAGELIST_T *pagelist;
357364
struct page **pages;
358-
struct page *page;
359365
unsigned long *addrs;
360366
unsigned int num_pages, offset, i;
361367
char *addr, *base_addr, *next_addr;
@@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
386392
pages = (struct page **)(addrs + num_pages + 1);
387393

388394
if (is_vmalloc_addr(buf)) {
389-
for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
390-
pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
395+
int dir = (type == PAGELIST_WRITE) ?
396+
DMA_TO_DEVICE : DMA_FROM_DEVICE;
397+
unsigned long length = pagelist->length;
398+
unsigned int offset = pagelist->offset;
399+
400+
for (actual_pages = 0; actual_pages < num_pages;
401+
actual_pages++) {
402+
struct page *pg = vmalloc_to_page(buf + (actual_pages *
403+
PAGE_SIZE));
404+
size_t bytes = PAGE_SIZE - offset;
405+
406+
if (bytes > length)
407+
bytes = length;
408+
pages[actual_pages] = pg;
409+
dmac_map_area(page_address(pg) + offset, bytes, dir);
410+
length -= bytes;
411+
offset = 0;
391412
}
392-
*need_release = 0; /* do not try and release vmalloc pages */
413+
*need_release = 0; /* do not try and release vmalloc pages */
393414
} else {
394415
down_read(&task->mm->mmap_sem);
395416
actual_pages = get_user_pages(task, task->mm,
@@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
418439
actual_pages = -ENOMEM;
419440
return actual_pages;
420441
}
421-
*need_release = 1; /* release user pages */
442+
*need_release = 1; /* release user pages */
422443
}
423444

424445
pagelist->length = count;
@@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
451472

452473
/* Partial cache lines (fragments) require special measures */
453474
if ((type == PAGELIST_READ) &&
454-
((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
475+
((pagelist->offset & (g_cache_line_size - 1)) ||
455476
((pagelist->offset + pagelist->length) &
456-
(CACHE_LINE_SIZE - 1)))) {
457-
FRAGMENTS_T *fragments;
477+
(g_cache_line_size - 1)))) {
478+
char *fragments;
458479

459480
if (down_interruptible(&g_free_fragments_sema) != 0) {
460481
kfree(pagelist);
@@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
464485
WARN_ON(g_free_fragments == NULL);
465486

466487
down(&g_free_fragments_mutex);
467-
fragments = (FRAGMENTS_T *) g_free_fragments;
488+
fragments = g_free_fragments;
468489
WARN_ON(fragments == NULL);
469-
g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
490+
g_free_fragments = *(char **) g_free_fragments;
470491
up(&g_free_fragments_mutex);
471-
pagelist->type =
472-
PAGELIST_READ_WITH_FRAGMENTS + (fragments -
473-
g_fragments_base);
492+
pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
493+
(fragments - g_fragments_base) / g_fragments_size;
474494
}
475495

476-
for (page = virt_to_page(pagelist);
477-
page <= virt_to_page(addrs + num_pages - 1); page++) {
478-
flush_dcache_page(page);
479-
}
496+
dmac_flush_range(pagelist, addrs + num_pages);
480497

481498
*ppagelist = pagelist;
482499

@@ -502,46 +519,61 @@ free_pagelist(PAGELIST_T *pagelist, int actual)
502519

503520
/* Deal with any partial cache lines (fragments) */
504521
if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
505-
FRAGMENTS_T *fragments = g_fragments_base +
506-
(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
522+
char *fragments = g_fragments_base +
523+
(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
524+
g_fragments_size;
507525
int head_bytes, tail_bytes;
508-
head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
509-
(CACHE_LINE_SIZE - 1);
526+
head_bytes = (g_cache_line_size - pagelist->offset) &
527+
(g_cache_line_size - 1);
510528
tail_bytes = (pagelist->offset + actual) &
511-
(CACHE_LINE_SIZE - 1);
529+
(g_cache_line_size - 1);
512530

513531
if ((actual >= 0) && (head_bytes != 0)) {
514532
if (head_bytes > actual)
515533
head_bytes = actual;
516534

517535
memcpy((char *)page_address(pages[0]) +
518536
pagelist->offset,
519-
fragments->headbuf,
537+
fragments,
520538
head_bytes);
521539
}
522540
if ((actual >= 0) && (head_bytes < actual) &&
523541
(tail_bytes != 0)) {
524542
memcpy((char *)page_address(pages[num_pages - 1]) +
525543
((pagelist->offset + actual) &
526-
(PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
527-
fragments->tailbuf, tail_bytes);
544+
(PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
545+
fragments + g_cache_line_size,
546+
tail_bytes);
528547
}
529548

530549
down(&g_free_fragments_mutex);
531-
*(FRAGMENTS_T **) fragments = g_free_fragments;
550+
*(char **)fragments = g_free_fragments;
532551
g_free_fragments = fragments;
533552
up(&g_free_fragments_mutex);
534553
up(&g_free_fragments_sema);
535554
}
536555

537-
if (*need_release) {
538-
for (i = 0; i < num_pages; i++) {
539-
if (pagelist->type != PAGELIST_WRITE)
540-
set_page_dirty(pages[i]);
556+
if (*need_release) {
557+
unsigned int length = pagelist->length;
558+
unsigned int offset = pagelist->offset;
541559

542-
page_cache_release(pages[i]);
560+
for (i = 0; i < num_pages; i++) {
561+
struct page *pg = pages[i];
562+
563+
if (pagelist->type != PAGELIST_WRITE) {
564+
unsigned int bytes = PAGE_SIZE - offset;
565+
566+
if (bytes > length)
567+
bytes = length;
568+
dmac_unmap_area(page_address(pg) + offset,
569+
bytes, DMA_FROM_DEVICE);
570+
length -= bytes;
571+
offset = 0;
572+
set_page_dirty(pg);
573+
}
574+
page_cache_release(pg);
543575
}
544-
}
576+
}
545577

546578
kfree(pagelist);
547579
}

0 commit comments

Comments
 (0)