-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
Copy pathgrayscale.mojo
executable file
·129 lines (106 loc) · 4.45 KB
/
grayscale.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# ===----------------------------------------------------------------------=== #
# Copyright (c) 2025, Modular Inc. All rights reserved.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions:
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
from math import ceildiv
from sys import has_nvidia_gpu_accelerator
from gpu.host import Dim
from gpu.id import block_dim, block_idx, thread_idx
from layout import Layout, LayoutTensor
from max.driver import Accelerator, Device, Tensor, accelerator, cpu
alias channel_dtype = DType.uint8
alias internal_float_dtype = DType.float32
alias tensor_rank = 3
def print_image[h: Int, w: Int](t: Tensor[channel_dtype, 3]):
"""A helper function to print out the grayscale channel intensities."""
out = t.to_layout_tensor()
for row in range(h):
for col in range(w):
var v = out[row, col, 0]
if v < 100:
print(" ", end="")
if v < 10:
print(" ", end="")
print(v, " ", end="")
print("")
fn color_to_grayscale_conversion[
image_layout: Layout,
out_layout: Layout,
](
width: Int,
height: Int,
image: LayoutTensor[channel_dtype, image_layout, MutableAnyOrigin],
out: LayoutTensor[channel_dtype, out_layout, MutableAnyOrigin],
):
"""Converting each RGB pixel to grayscale, parallelized across the output tensor on the GPU.
"""
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
if col < width and row < height:
red = image[row, col, 0].cast[internal_float_dtype]()
green = image[row, col, 1].cast[internal_float_dtype]()
blue = image[row, col, 2].cast[internal_float_dtype]()
gray = 0.21 * red + 0.71 * green + 0.07 * blue
out[row, col, 0] = gray.cast[channel_dtype]()
def main():
# Attempt to connect to a compatible GPU. If one is not found, this will
# error out and exit.
gpu_device = accelerator()
host_device = cpu()
alias IMAGE_WIDTH = 5
alias IMAGE_HEIGHT = 10
alias NUM_CHANNELS = 3
# Allocate the input image tensor on the host.
rgb_tensor = Tensor[channel_dtype, tensor_rank](
(IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS), host_device
)
# Fill the image with initial colors.
for row in range(IMAGE_HEIGHT):
for col in range(IMAGE_WIDTH):
rgb_tensor[row, col, 0] = row + col
rgb_tensor[row, col, 1] = row + col + 20
rgb_tensor[row, col, 2] = row + col + 40
# Move the image tensor to the accelerator.
rgb_tensor = rgb_tensor.move_to(gpu_device)
# Allocate a tensor on the accelerator to host the grayscale image.
gray_tensor = Tensor[channel_dtype, tensor_rank](
(IMAGE_HEIGHT, IMAGE_WIDTH, 1), gpu_device
)
rgb_layout_tensor = rgb_tensor.to_layout_tensor()
gray_layout_tensor = gray_tensor.to_layout_tensor()
# Compile the function to run across a grid on the GPU.
gpu_function = Accelerator.compile[
color_to_grayscale_conversion[
rgb_layout_tensor.layout, gray_layout_tensor.layout
]
](gpu_device)
# The grid is divided up into blocks, making sure there's an extra
# full block for any remainder. This hasn't been tuned for any specific
# GPU.
alias BLOCK_SIZE = 16
num_col_blocks = ceildiv(IMAGE_WIDTH, BLOCK_SIZE)
num_row_blocks = ceildiv(IMAGE_HEIGHT, BLOCK_SIZE)
# Launch the compiled function on the GPU. The target device is specified
# first, followed by all function arguments. The last two named parameters
# are the dimensions of the grid in blocks, and the block dimensions.
gpu_function(
gpu_device,
IMAGE_WIDTH,
IMAGE_HEIGHT,
rgb_layout_tensor,
gray_layout_tensor,
grid_dim=Dim(num_col_blocks, num_row_blocks),
block_dim=Dim(BLOCK_SIZE, BLOCK_SIZE),
)
# Move the output tensor back onto the CPU so that we can read the results.
gray_tensor = gray_tensor.move_to(host_device)
print("Resulting grayscale image:")
print_image[IMAGE_HEIGHT, IMAGE_WIDTH](gray_tensor)