codon/test/transform/kernels.codon

267 lines
6.2 KiB
Python

import gpu
@test
def test_hello_world():
@gpu.kernel
def kernel(a, b, c):
i = gpu.thread.x
c[i] = a[i] + b[i]
a = [i for i in range(16)]
b = [2*i for i in range(16)]
c = [0 for _ in range(16)]
kernel(a, b, c, grid=1, block=16)
assert c == [3*i for i in range(16)]
@test
def test_raw():
@gpu.kernel
def kernel(a, b, c):
i = gpu.thread.x
c[i] = a[i] + b[i]
a = [i for i in range(16)]
b = [2*i for i in range(16)]
c = [0 for _ in range(16)]
kernel(gpu.raw(a), gpu.raw(b), gpu.raw(c), grid=1, block=16)
assert c == [3*i for i in range(16)]
@test
def test_conversions():
@gpu.kernel
def kernel(x, v):
v[0] = x
def check(x):
T = type(x)
v = [T()]
kernel(x, v, grid=1, block=1)
return v == [x]
assert check(None)
assert check(42)
assert check(3.14)
assert check(f32(2.718))
assert check(byte(99))
assert check(Int[128](123123))
assert check(UInt[128](321321))
assert check(Optional[int]())
assert check(Optional(111))
assert check((1, 2, 3))
assert check(([1], [2], [3]))
# assert check(()) # TODO: PTX can't handle this; why?
assert check(DynamicTuple((1, 2, 3)))
assert check(DynamicTuple(([1], [2], [3])))
assert check(DynamicTuple[int]())
assert check(DynamicTuple[List[List[List[str]]]]())
assert check('hello world')
assert check([1, 2, 3])
assert check([[1], [2], [3]])
assert check({1: [1.1], 2: [2.2]})
assert check({'a', 'b', 'c'})
assert check(Optional([1, 2, 3]))
@test
def test_user_classes():
@dataclass(gpu=True, eq=True)
class A:
x: int
y: List[int]
@tuple
class B:
x: int
y: List[int]
@gpu.kernel
def kernel(a, b, c):
a.x += b.x + c[0]
c[1][0][0] = 9999
a.y[0] = c[0] + 1
b.y[0] = c[0] + 2
a = A(42, [-1])
b = B(100, [-2])
c = (1000, [[-1]])
kernel(a, b, c, grid=1, block=1)
assert a == A(1142, [1001])
assert b == B(100, [1002])
assert c == (1000, [[9999]])
@gpu.kernel
def kernel2(a, b, c):
a[0].x += b[0].x + c[0][0]
c[0][1][0][0] = 9999
a[0].y[0] = c[0][0] + 1
b[0].y[0] = c[0][0] + 2
a = [A(42, [-1])]
b = [B(100, [-2])]
c = [(1000, [[-1]])]
kernel2(a, b, c, grid=1, block=1)
assert a == [A(1142, [1001])]
assert b == [B(100, [1002])]
assert c == [(1000, [[9999]])]
@test
def test_intrinsics():
@gpu.kernel
def kernel(v):
block_id = (gpu.block.x + gpu.block.y*gpu.grid.dim.x +
gpu.block.z*gpu.grid.dim.x*gpu.grid.dim.y)
thread_id = (block_id*gpu.block.dim.x*gpu.block.dim.y*gpu.block.dim.z +
gpu.thread.z*gpu.block.dim.x*gpu.block.dim.y +
gpu.thread.y*gpu.block.dim.x +
gpu.thread.x)
v[thread_id] = thread_id
gpu.syncthreads()
grid = gpu.Dim3(3, 4, 5)
block = gpu.Dim3(6, 7, 8)
N = grid.x * grid.y * grid.z * block.x * block.y * block.z
v = [0 for _ in range(N)]
kernel(v, grid=grid, block=block)
assert v == list(range(N))
@test
def test_matmul():
A = [[12, 7, 3],
[4, 5, 6],
[7, 8, 9]]
B = [[5, 8, 1, 2],
[6, 7, 3, 0],
[4, 5, 9, 1]]
def mmz(A, B):
return [[0]*len(B[0]) for _ in range(len(A))]
def matmul(A, B):
result = mmz(A, B)
for i in range(len(A)):
for j in range(len(B[0])):
for k in range(len(B)):
result[i][j] += A[i][k] * B[k][j]
return result
expected = matmul(A, B)
@gpu.kernel
def kernel(A, B, result):
i = gpu.thread.x
j = gpu.thread.y
result[i][j] = sum(A[i][k]*B[k][j] for k in range(len(A[0])))
result = mmz(A, B)
kernel(A, B, result, grid=1, block=(len(result), len(result[0])))
assert result == expected
MAX = 1000 # maximum Mandelbrot iterations
N = 256 # width and height of image
@test
def test_mandelbrot():
pixels = [0 for _ in range(N * N)]
def scale(x, a, b):
return a + (x/N)*(b - a)
expected = [0 for _ in range(N * N)]
for i in range(N):
for j in range(N):
c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
z = 0j
iteration = 0
while abs(z) <= 2 and iteration < MAX:
z = z**2 + c
iteration += 1
expected[N*i + j] = int(255 * iteration/MAX)
@gpu.kernel
def kernel(pixels):
idx = (gpu.block.x * gpu.block.dim.x) + gpu.thread.x
i, j = divmod(idx, N)
c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
z = 0j
iteration = 0
while abs(z) <= 2 and iteration < MAX:
z = z**2 + c
iteration += 1
pixels[idx] = int(255 * iteration/MAX)
kernel(pixels, grid=(N*N)//1024, block=1024)
assert pixels == expected
@test
def test_kitchen_sink():
@gpu.kernel
def kernel(x):
i = gpu.thread.x
d = {1: 2.1, 2: 3.5, 3: 4.2}
s = {4, 5, 6}
z = sum(
d.get(x[i], j) + (j if i in s else -j)
for j in range(i)
)
x[i] = int(z)
x = [i for i in range(16)]
kernel(x, grid=1, block=16)
assert x == [0, 2, 6, 9, 12, 20, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0]
@test
def test_auto_par():
a = [i for i in range(16)]
b = [2*i for i in range(16)]
c = [0 for _ in range(16)]
@par(gpu=True)
for i in range(16):
c[i] = a[i] + b[i]
assert c == [3*i for i in range(16)]
@par(gpu=True)
for i in range(16):
c[i] += a[i] + b[i]
assert c == [6*i for i in range(16)]
N = 200
Z = 42
x = [0] * (N*N)
y = [0] * (N*N)
for i in range(2, N - 1, 3):
for j in range(3, N, 2):
x[i*N + j] = i + j + Z
@par(gpu=True, collapse=2)
for i in range(2, N - 1, 3):
for j in range(3, N, 2):
y[i*N + j] = i + j + Z
assert x == y
@par(gpu=True)
for i in range(1):
pass
test_hello_world()
test_raw()
test_conversions()
test_user_classes()
test_intrinsics()
test_matmul()
test_mandelbrot()
test_kitchen_sink()
test_auto_par()