-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
Copy pathmandelbrot.mojo
109 lines (85 loc) · 3.37 KB
/
mandelbrot.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# ===----------------------------------------------------------------------=== #
# Copyright (c) 2023, Modular Inc. All rights reserved.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions:
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
# RUN: %mojo %s | FileCheck %s
from math import iota
from sys import num_physical_cores, simdwidthof
import benchmark
from algorithm import parallelize, vectorize
from complex import ComplexFloat64, ComplexSIMD
from memory import UnsafePointer
alias float_type = DType.float32
alias int_type = DType.int32
alias simd_width = 2 * simdwidthof[float_type]()
alias unit = benchmark.Unit.ms
alias cols = 960
alias rows = 960
alias MAX_ITERS = 200
alias min_x = -2.0
alias max_x = 0.6
alias min_y = -1.5
alias max_y = 1.5
struct Matrix[dtype: DType, rows: Int, cols: Int]:
var data: UnsafePointer[Scalar[dtype]]
fn __init__(out self):
self.data = UnsafePointer[Scalar[dtype]].alloc(rows * cols)
fn store[nelts: Int](self, row: Int, col: Int, val: SIMD[dtype, nelts]):
self.data.store(row * cols + col, val)
fn mandelbrot_kernel_SIMD[
simd_width: Int
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[int_type, simd_width]:
"""A vectorized implementation of the inner mandelbrot computation."""
var cx = c.re
var cy = c.im
var x = SIMD[float_type, simd_width](0)
var y = SIMD[float_type, simd_width](0)
var iters = SIMD[int_type, simd_width](0)
var t: SIMD[DType.bool, simd_width] = True
for _ in range(MAX_ITERS):
if not any(t):
break
var y2 = y * y
y = x.fma(y + y, cy)
t = x.fma(x, y2) <= 4
x = x.fma(x, cx - y2)
iters = t.select(iters + 1, iters)
return iters
fn main() raises:
var matrix = Matrix[int_type, rows, cols]()
@parameter
fn worker(row: Int):
alias scale_x = (max_x - min_x) / cols
alias scale_y = (max_y - min_y) / rows
@parameter
fn compute_vector[simd_width: Int](col: Int):
"""Each time we operate on a `simd_width` vector of pixels."""
var cx = min_x + (col + iota[float_type, simd_width]()) * scale_x
var cy = min_y + row * SIMD[float_type, simd_width](scale_y)
var c = ComplexSIMD[float_type, simd_width](cx, cy)
matrix.store(row, col, mandelbrot_kernel_SIMD(c))
# Vectorize the call to compute_vector with a chunk of pixels.
vectorize[compute_vector, simd_width, size=cols]()
@parameter
fn bench():
for row in range(rows):
worker(row)
@parameter
fn bench_parallel():
parallelize[worker](rows, rows)
print("Number of physical cores:", num_physical_cores())
var vectorized = benchmark.run[bench]().mean(unit)
print("Vectorized:", vectorized, unit)
var parallelized = benchmark.run[bench_parallel]().mean(unit)
print("Parallelized:", parallelized, unit)
# CHECK: Parallel speedup
print("Parallel speedup:", vectorized / parallelized)
matrix.data.free()