mirror of https://github.com/exaloop/codon.git
267 lines
6.2 KiB
Python
267 lines
6.2 KiB
Python
import gpu
|
|
|
|
@test
|
|
def test_hello_world():
|
|
@gpu.kernel
|
|
def kernel(a, b, c):
|
|
i = gpu.thread.x
|
|
c[i] = a[i] + b[i]
|
|
|
|
a = [i for i in range(16)]
|
|
b = [2*i for i in range(16)]
|
|
c = [0 for _ in range(16)]
|
|
kernel(a, b, c, grid=1, block=16)
|
|
|
|
assert c == [3*i for i in range(16)]
|
|
|
|
@test
|
|
def test_raw():
|
|
@gpu.kernel
|
|
def kernel(a, b, c):
|
|
i = gpu.thread.x
|
|
c[i] = a[i] + b[i]
|
|
|
|
a = [i for i in range(16)]
|
|
b = [2*i for i in range(16)]
|
|
c = [0 for _ in range(16)]
|
|
kernel(gpu.raw(a), gpu.raw(b), gpu.raw(c), grid=1, block=16)
|
|
|
|
assert c == [3*i for i in range(16)]
|
|
|
|
@test
|
|
def test_conversions():
|
|
@gpu.kernel
|
|
def kernel(x, v):
|
|
v[0] = x
|
|
|
|
def check(x):
|
|
T = type(x)
|
|
v = [T()]
|
|
kernel(x, v, grid=1, block=1)
|
|
return v == [x]
|
|
|
|
assert check(None)
|
|
assert check(42)
|
|
assert check(3.14)
|
|
assert check(f32(2.718))
|
|
assert check(byte(99))
|
|
assert check(Int[128](123123))
|
|
assert check(UInt[128](321321))
|
|
assert check(Optional[int]())
|
|
assert check(Optional(111))
|
|
assert check((1, 2, 3))
|
|
assert check(([1], [2], [3]))
|
|
# assert check(()) # TODO: PTX can't handle this; why?
|
|
assert check(DynamicTuple((1, 2, 3)))
|
|
assert check(DynamicTuple(([1], [2], [3])))
|
|
assert check(DynamicTuple[int]())
|
|
assert check(DynamicTuple[List[List[List[str]]]]())
|
|
assert check('hello world')
|
|
assert check([1, 2, 3])
|
|
assert check([[1], [2], [3]])
|
|
assert check({1: [1.1], 2: [2.2]})
|
|
assert check({'a', 'b', 'c'})
|
|
assert check(Optional([1, 2, 3]))
|
|
|
|
@test
|
|
def test_user_classes():
|
|
@dataclass(gpu=True, eq=True)
|
|
class A:
|
|
x: int
|
|
y: List[int]
|
|
|
|
@tuple
|
|
class B:
|
|
x: int
|
|
y: List[int]
|
|
|
|
@gpu.kernel
|
|
def kernel(a, b, c):
|
|
a.x += b.x + c[0]
|
|
c[1][0][0] = 9999
|
|
a.y[0] = c[0] + 1
|
|
b.y[0] = c[0] + 2
|
|
|
|
a = A(42, [-1])
|
|
b = B(100, [-2])
|
|
c = (1000, [[-1]])
|
|
kernel(a, b, c, grid=1, block=1)
|
|
|
|
assert a == A(1142, [1001])
|
|
assert b == B(100, [1002])
|
|
assert c == (1000, [[9999]])
|
|
|
|
@gpu.kernel
|
|
def kernel2(a, b, c):
|
|
a[0].x += b[0].x + c[0][0]
|
|
c[0][1][0][0] = 9999
|
|
a[0].y[0] = c[0][0] + 1
|
|
b[0].y[0] = c[0][0] + 2
|
|
|
|
a = [A(42, [-1])]
|
|
b = [B(100, [-2])]
|
|
c = [(1000, [[-1]])]
|
|
kernel2(a, b, c, grid=1, block=1)
|
|
|
|
assert a == [A(1142, [1001])]
|
|
assert b == [B(100, [1002])]
|
|
assert c == [(1000, [[9999]])]
|
|
|
|
@test
|
|
def test_intrinsics():
|
|
@gpu.kernel
|
|
def kernel(v):
|
|
block_id = (gpu.block.x + gpu.block.y*gpu.grid.dim.x +
|
|
gpu.block.z*gpu.grid.dim.x*gpu.grid.dim.y)
|
|
thread_id = (block_id*gpu.block.dim.x*gpu.block.dim.y*gpu.block.dim.z +
|
|
gpu.thread.z*gpu.block.dim.x*gpu.block.dim.y +
|
|
gpu.thread.y*gpu.block.dim.x +
|
|
gpu.thread.x)
|
|
v[thread_id] = thread_id
|
|
gpu.syncthreads()
|
|
|
|
grid = gpu.Dim3(3, 4, 5)
|
|
block = gpu.Dim3(6, 7, 8)
|
|
N = grid.x * grid.y * grid.z * block.x * block.y * block.z
|
|
v = [0 for _ in range(N)]
|
|
kernel(v, grid=grid, block=block)
|
|
assert v == list(range(N))
|
|
|
|
@test
|
|
def test_matmul():
|
|
A = [[12, 7, 3],
|
|
[4, 5, 6],
|
|
[7, 8, 9]]
|
|
|
|
B = [[5, 8, 1, 2],
|
|
[6, 7, 3, 0],
|
|
[4, 5, 9, 1]]
|
|
|
|
def mmz(A, B):
|
|
return [[0]*len(B[0]) for _ in range(len(A))]
|
|
|
|
def matmul(A, B):
|
|
result = mmz(A, B)
|
|
for i in range(len(A)):
|
|
for j in range(len(B[0])):
|
|
for k in range(len(B)):
|
|
result[i][j] += A[i][k] * B[k][j]
|
|
return result
|
|
|
|
expected = matmul(A, B)
|
|
|
|
@gpu.kernel
|
|
def kernel(A, B, result):
|
|
i = gpu.thread.x
|
|
j = gpu.thread.y
|
|
result[i][j] = sum(A[i][k]*B[k][j] for k in range(len(A[0])))
|
|
|
|
result = mmz(A, B)
|
|
kernel(A, B, result, grid=1, block=(len(result), len(result[0])))
|
|
assert result == expected
|
|
|
|
MAX = 1000 # maximum Mandelbrot iterations
|
|
N = 256 # width and height of image
|
|
|
|
@test
|
|
def test_mandelbrot():
|
|
pixels = [0 for _ in range(N * N)]
|
|
|
|
def scale(x, a, b):
|
|
return a + (x/N)*(b - a)
|
|
|
|
expected = [0 for _ in range(N * N)]
|
|
for i in range(N):
|
|
for j in range(N):
|
|
c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
|
|
z = 0j
|
|
iteration = 0
|
|
|
|
while abs(z) <= 2 and iteration < MAX:
|
|
z = z**2 + c
|
|
iteration += 1
|
|
|
|
expected[N*i + j] = int(255 * iteration/MAX)
|
|
|
|
@gpu.kernel
|
|
def kernel(pixels):
|
|
idx = (gpu.block.x * gpu.block.dim.x) + gpu.thread.x
|
|
i, j = divmod(idx, N)
|
|
c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
|
|
z = 0j
|
|
iteration = 0
|
|
|
|
while abs(z) <= 2 and iteration < MAX:
|
|
z = z**2 + c
|
|
iteration += 1
|
|
|
|
pixels[idx] = int(255 * iteration/MAX)
|
|
|
|
kernel(pixels, grid=(N*N)//1024, block=1024)
|
|
assert pixels == expected
|
|
|
|
@test
|
|
def test_kitchen_sink():
|
|
@gpu.kernel
|
|
def kernel(x):
|
|
i = gpu.thread.x
|
|
d = {1: 2.1, 2: 3.5, 3: 4.2}
|
|
s = {4, 5, 6}
|
|
z = sum(
|
|
d.get(x[i], j) + (j if i in s else -j)
|
|
for j in range(i)
|
|
)
|
|
x[i] = int(z)
|
|
|
|
x = [i for i in range(16)]
|
|
kernel(x, grid=1, block=16)
|
|
assert x == [0, 2, 6, 9, 12, 20, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
|
|
@test
|
|
def test_auto_par():
|
|
a = [i for i in range(16)]
|
|
b = [2*i for i in range(16)]
|
|
c = [0 for _ in range(16)]
|
|
|
|
@par(gpu=True)
|
|
for i in range(16):
|
|
c[i] = a[i] + b[i]
|
|
|
|
assert c == [3*i for i in range(16)]
|
|
|
|
@par(gpu=True)
|
|
for i in range(16):
|
|
c[i] += a[i] + b[i]
|
|
|
|
assert c == [6*i for i in range(16)]
|
|
|
|
N = 200
|
|
Z = 42
|
|
x = [0] * (N*N)
|
|
y = [0] * (N*N)
|
|
|
|
for i in range(2, N - 1, 3):
|
|
for j in range(3, N, 2):
|
|
x[i*N + j] = i + j + Z
|
|
|
|
@par(gpu=True, collapse=2)
|
|
for i in range(2, N - 1, 3):
|
|
for j in range(3, N, 2):
|
|
y[i*N + j] = i + j + Z
|
|
|
|
assert x == y
|
|
|
|
@par(gpu=True)
|
|
for i in range(1):
|
|
pass
|
|
|
|
test_hello_world()
|
|
test_raw()
|
|
test_conversions()
|
|
test_user_classes()
|
|
test_intrinsics()
|
|
test_matmul()
|
|
test_mandelbrot()
|
|
test_kitchen_sink()
|
|
test_auto_par()
|