Skip to content

Commit fc26a1f

Browse files
committed
POC for JP
Signed-off-by: James Brodman <[email protected]>
1 parent 91a9796 commit fc26a1f

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,25 @@ class __SYCL_EXPORT handler {
871871

872872
template <typename KernelName = detail::auto_name, typename KernelType>
873873
void parallel_for(range<1> NumWorkItems, KernelType KernelFunc) {
874-
parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
874+
bool isPrime = true;
875+
size_t R = NumWorkItems[0];
876+
for (int i = 2; i <= R/2; i++) {
877+
if (R % i == 0) {
878+
isPrime = false;
879+
break;
880+
}
881+
}
882+
if (isPrime) {
883+
size_t R64 = (R + 63) & -64;
884+
parallel_for_lambda_impl<KernelName>(range<1>(R64), [=](id<1> ID) {
885+
if (ID < NumWorkItems) {
886+
KernelFunc(ID);
887+
}
888+
});
889+
} else {
890+
parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
891+
}
892+
875893
}
876894

877895
template <typename KernelName = detail::auto_name, typename KernelType>

sycl/test/usm/john.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// XFAIL: cuda
2+
// piextUSM*Alloc functions for CUDA are not behaving as described in
3+
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
4+
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
5+
//
6+
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t1.out
7+
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
8+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
9+
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
10+
11+
//==------------------- mixed.cpp - Mixed Memory test ---------------------==//
12+
//
13+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
14+
// See https://llvm.org/LICENSE.txt for license information.
15+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
16+
//
17+
//===----------------------------------------------------------------------===//
18+
19+
#include <CL/sycl.hpp>
20+
21+
using namespace sycl;
22+
23+
int main() {
24+
const int MAGIC_NUM = 42;
25+
const int N = 17;
26+
27+
queue q;
28+
auto dev = q.get_device();
29+
auto ctxt = q.get_context();
30+
31+
auto A = malloc_shared<int>(N, q);
32+
auto B = malloc_shared<int>(N, q);
33+
auto C = malloc_shared<int>(std::max(N,64), q);
34+
35+
for (int i = 0; i < N; i++) {
36+
A[i] = 1;
37+
B[i] = 2;
38+
}
39+
40+
q.parallel_for(range<1>(N), [=](id<1> i) { C[i] = A[i] + B[i]; });
41+
q.wait();
42+
43+
for (int i = 0; i < N; i++) {
44+
assert(C[i] == 3);
45+
}
46+
47+
for (int i = 0; i < 64; i++) {
48+
std::cout << C[i] << " ";
49+
}
50+
std::cout << std::endl;
51+
52+
free(A, q);
53+
free(B, q);
54+
free(C, q);
55+
56+
return 0;
57+
}

0 commit comments

Comments
 (0)