codon/test/transform/kernels.codon

import gpu

@test
def test_hello_world():
    @gpu.kernel
    def kernel(a, b, c):
        i = gpu.thread.x
        c[i] = a[i] + b[i]

    a = [i for i in range(16)]
    b = [2*i for i in range(16)]
    c = [0 for _ in range(16)]
    kernel(a, b, c, grid=1, block=16)

    assert c == [3*i for i in range(16)]

@test
def test_raw():
    @gpu.kernel
    def kernel(a, b, c):
        i = gpu.thread.x
        c[i] = a[i] + b[i]

    a = [i for i in range(16)]
    b = [2*i for i in range(16)]
    c = [0 for _ in range(16)]
    kernel(gpu.raw(a), gpu.raw(b), gpu.raw(c), grid=1, block=16)

    assert c == [3*i for i in range(16)]

@test
def test_conversions():
    @gpu.kernel
    def kernel(x, v):
        v[0] = x

    def check(x):
        T = type(x)
        v = [T()]
        kernel(x, v, grid=1, block=1)
        return v == [x]

    assert check(None)
    assert check(42)
    assert check(3.14)
    assert check(f32(2.718))
    assert check(byte(99))
    assert check(Int[128](123123))
    assert check(UInt[128](321321))
    assert check(Optional[int]())
    assert check(Optional(111))
    assert check((1, 2, 3))
    assert check(([1], [2], [3]))
    # assert check(())  # TODO: PTX can't handle this; why?
    assert check(DynamicTuple((1, 2, 3)))
    assert check(DynamicTuple(([1], [2], [3])))
    assert check(DynamicTuple[int]())
    assert check(DynamicTuple[List[List[List[str]]]]())
    assert check('hello world')
    assert check([1, 2, 3])
    assert check([[1], [2], [3]])
    assert check({1: [1.1], 2: [2.2]})
    assert check({'a', 'b', 'c'})
    assert check(Optional([1, 2, 3]))

@test
def test_user_classes():
    @dataclass(gpu=True, eq=True)
    class A:
       x: int
       y: List[int]

    @tuple
    class B:
        x: int
        y: List[int]

    @gpu.kernel
    def kernel(a, b, c):
        a.x += b.x + c[0]
        c[1][0][0] = 9999
        a.y[0] = c[0] + 1
        b.y[0] = c[0] + 2

    a = A(42, [-1])
    b = B(100, [-2])
    c = (1000, [[-1]])
    kernel(a, b, c, grid=1, block=1)

    assert a == A(1142, [1001])
    assert b == B(100, [1002])
    assert c == (1000, [[9999]])

    @gpu.kernel
    def kernel2(a, b, c):
        a[0].x += b[0].x + c[0][0]
        c[0][1][0][0] = 9999
        a[0].y[0] = c[0][0] + 1
        b[0].y[0] = c[0][0] + 2

    a = [A(42, [-1])]
    b = [B(100, [-2])]
    c = [(1000, [[-1]])]
    kernel2(a, b, c, grid=1, block=1)

    assert a == [A(1142, [1001])]
    assert b == [B(100, [1002])]
    assert c == [(1000, [[9999]])]

@test
def test_intrinsics():
    @gpu.kernel
    def kernel(v):
        block_id = (gpu.block.x + gpu.block.y*gpu.grid.dim.x +
                    gpu.block.z*gpu.grid.dim.x*gpu.grid.dim.y)
        thread_id = (block_id*gpu.block.dim.x*gpu.block.dim.y*gpu.block.dim.z +
                     gpu.thread.z*gpu.block.dim.x*gpu.block.dim.y +
                     gpu.thread.y*gpu.block.dim.x +
                     gpu.thread.x)
        v[thread_id] = thread_id
        gpu.syncthreads()

    grid = gpu.Dim3(3, 4, 5)
    block = gpu.Dim3(6, 7, 8)
    N = grid.x * grid.y * grid.z * block.x * block.y * block.z
    v = [0 for _ in range(N)]
    kernel(v, grid=grid, block=block)
    assert v == list(range(N))

@test
def test_matmul():
    A = [[12, 7, 3],
         [4, 5, 6],
         [7, 8, 9]]

    B = [[5, 8, 1, 2],
         [6, 7, 3, 0],
         [4, 5, 9, 1]]

    def mmz(A, B):
        return [[0]*len(B[0]) for _ in range(len(A))]

    def matmul(A, B):
        result = mmz(A, B)
        for i in range(len(A)):
            for j in range(len(B[0])):
                for k in range(len(B)):
                    result[i][j] += A[i][k] * B[k][j]
        return result

    expected = matmul(A, B)

    @gpu.kernel
    def kernel(A, B, result):
        i = gpu.thread.x
        j = gpu.thread.y
        result[i][j] = sum(A[i][k]*B[k][j] for k in range(len(A[0])))

    result = mmz(A, B)
    kernel(A, B, result, grid=1, block=(len(result), len(result[0])))
    assert result == expected

MAX    = 1000  # maximum Mandelbrot iterations
N      = 256   # width and height of image

@test
def test_mandelbrot():
    pixels = [0 for _ in range(N * N)]

    def scale(x, a, b):
        return a + (x/N)*(b - a)

    expected = [0 for _ in range(N * N)]
    for i in range(N):
        for j in range(N):
            c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
            z = 0j
            iteration = 0

            while abs(z) <= 2 and iteration < MAX:
                z = z**2 + c
                iteration += 1

            expected[N*i + j] = int(255 * iteration/MAX)

    @gpu.kernel
    def kernel(pixels):
        idx = (gpu.block.x * gpu.block.dim.x) + gpu.thread.x
        i, j = divmod(idx, N)
        c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))
        z = 0j
        iteration = 0

        while abs(z) <= 2 and iteration < MAX:
            z = z**2 + c
            iteration += 1

        pixels[idx] = int(255 * iteration/MAX)

    kernel(pixels, grid=(N*N)//1024, block=1024)
    assert pixels == expected

@test
def test_kitchen_sink():
    @gpu.kernel
    def kernel(x):
        i = gpu.thread.x
        d = {1: 2.1, 2: 3.5, 3: 4.2}
        s = {4, 5, 6}
        z = sum(
            d.get(x[i], j) + (j if i in s else -j)
            for j in range(i)
        )
        x[i] = int(z)

    x = [i for i in range(16)]
    kernel(x, grid=1, block=16)
    assert x == [0, 2, 6, 9, 12, 20, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0]

@test
def test_auto_par():
    a = [i for i in range(16)]
    b = [2*i for i in range(16)]
    c = [0 for _ in range(16)]

    @par(gpu=True)
    for i in range(16):
        c[i] = a[i] + b[i]

    assert c == [3*i for i in range(16)]

    @par(gpu=True)
    for i in range(16):
        c[i] += a[i] + b[i]

    assert c == [6*i for i in range(16)]

    N = 200
    Z = 42
    x = [0] * (N*N)
    y = [0] * (N*N)

    for i in range(2, N - 1, 3):
        for j in range(3, N, 2):
            x[i*N + j] = i + j + Z

    @par(gpu=True, collapse=2)
    for i in range(2, N - 1, 3):
        for j in range(3, N, 2):
            y[i*N + j] = i + j + Z

    assert x == y

    @par(gpu=True)
    for i in range(1):
        pass

test_hello_world()
test_raw()
test_conversions()
test_user_classes()
test_intrinsics()
test_matmul()
test_mandelbrot()
test_kitchen_sink()
test_auto_par()
GPU and other updates (#52) * Add nvptx pass * Fix spaces * Don't change name * Add runtime support * Add init call * Add more runtime functions * Add launch function * Add intrinsics * Fix codegen * Run GPU pass between general opt passes * Set data layout * Create context * Link libdevice * Add function remapping * Fix linkage * Fix libdevice link * Fix linking * Fix personality * Fix linking * Fix linking * Fix linking * Add internalize pass * Add more math conversions * Add more re-mappings * Fix conversions * Fix __str__ * Add decorator attribute for any decorator * Update kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Remove old decorator * Fix pointer calc * Fix fill-in codegen * Fix linkage * Add comment * Update list conversion * Add more conversions * Add dict and set conversions * Add float32 type to IR/LLVM * Add float32 * Add float32 stdlib * Keep required global values in PTX module * Fix PTX module pruning * Fix malloc * Set will-return * Fix name cleanup * Fix access * Fix name cleanup * Fix function renaming * Update dimension API * Fix args * Clean up API * Move GPU transformations to end of opt pipeline * Fix alloc replacements * Fix naming * Target PTX 4.2 * Fix global renaming * Fix early return in static blocks; Add __realized__ function * Format * Add __llvm_name__ for functions * Add vector type to IR * SIMD support [wip] * Update kernel naming * Fix early returns; Fix SIMD calls * Fix kernel naming * Fix IR matcher * Remove module print * Update realloc * Add overloads for 32-bit float math ops * Add gpu.Pointer type for working with raw pointers * Add float32 conversion * Add to_gpu and from_gpu * clang-format * Add f32 reduction support to OpenMP * Fix automatic GPU class conversions * Fix conversion functions * Fix conversions * Rename self * Fix tuple conversion * Fix conversions * Fix conversions * Update PTX filename * Fix filename * Add raw function * Add GPU docs * Allow nested object conversions * Add tests (WIP) * Update SIMD * Add staticrange and statictuple loop support * SIMD updates * Add new Vec constructors * Fix UInt conversion * Fix size-0 allocs * Add more tests * Add matmul test * Rename gpu test file * Add more tests * Add alloc cache * Fix object_to_gpu * Fix frees * Fix str conversion * Fix set conversion * Fix conversions * Fix class conversion * Fix str conversion * Fix byte conversion * Fix list conversion * Fix pointer conversions * Fix conversions * Fix conversions * Update tests * Fix conversions * Fix tuple conversion * Fix tuple conversion * Fix auto conversions * Fix conversion * Fix magics * Update tests * Support GPU in JIT mode * Fix GPU+JIT * Fix kernel filename in JIT mode * Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling * Fix global static handling * Fix float32 tests * FIx gpu module * Support OpenMP "collapse" option * Add more collapse tests * Capture generics and statics * TraitVar handling * Python exceptions / isinstance [wip; no_ci] * clang-format * Add list comparison operators * Support empty raise in IR * Add dict 'or' operator * Fix repr * Add copy module * Fix spacing * Use sm_30 * Python exceptions * TypeTrait support; Fix defaultDict * Fix earlyDefines * Add defaultdict * clang-format * Fix invalid canonicalizations * Fix empty raise * Fix copyright * Add Python numerics option * Support py-numerics in math module * Update docs * Add static Python division / modulus * Add static py numerics tests * Fix staticrange/tuple; Add KwTuple.__getitem__ * clang-format * Add gpu parameter to par * Fix globals * Don't init loop vars on loop collapse * Add par-gpu tests * Update gpu docs * Fix isinstance check * Remove invalid test * Add -libdevice to set custom path [skip ci] * Add release notes; bump version [skip ci] * Add libdevice docs [skip ci] Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com> 2022-09-16 03:40:00 +08:00			`import gpu`

			`@test`
			`def test_hello_world():`
			`@gpu.kernel`
			`def kernel(a, b, c):`
			`i = gpu.thread.x`
			`c[i] = a[i] + b[i]`

			`a = [i for i in range(16)]`
			`b = [2*i for i in range(16)]`
			`c = [0 for _ in range(16)]`
			`kernel(a, b, c, grid=1, block=16)`

			`assert c == [3*i for i in range(16)]`

			`@test`
			`def test_raw():`
			`@gpu.kernel`
			`def kernel(a, b, c):`
			`i = gpu.thread.x`
			`c[i] = a[i] + b[i]`

			`a = [i for i in range(16)]`
			`b = [2*i for i in range(16)]`
			`c = [0 for _ in range(16)]`
			`kernel(gpu.raw(a), gpu.raw(b), gpu.raw(c), grid=1, block=16)`

			`assert c == [3*i for i in range(16)]`

			`@test`
			`def test_conversions():`
			`@gpu.kernel`
			`def kernel(x, v):`
			`v[0] = x`

			`def check(x):`
			`T = type(x)`
			`v = [T()]`
			`kernel(x, v, grid=1, block=1)`
			`return v == [x]`

			`assert check(None)`
			`assert check(42)`
			`assert check(3.14)`
			`assert check(f32(2.718))`
			`assert check(byte(99))`
			`assert check(Int[128](123123))`
			`assert check(UInt[128](321321))`
			`assert check(Optional[int]())`
			`assert check(Optional(111))`
			`assert check((1, 2, 3))`
			`assert check(([1], [2], [3]))`
Upgrade to LLVM 15 (#56) * Upgrade to LLVM 15 (WIP) * Call `setPresplitCoroutine()` on coro LLVM funcs * Use new pass manager * Update deps * Update docs * Fix exceptions on M1 * Add libunwind * Use Orc and JITLink for "codon run" * JITLink integration * Fix callback * Fix strbuf, fix GC root registration * Fix test init * Fix llvm function * Fix pickle, float atomics * Add TargetLibraryAnalysis * Use new LLVM pass manager in GPU codegen * Fix libunwind linking * Fix libunwind * Fix GPU passes * Don't link libunwind explicitly * Bump version * Update plugins for new pass manager * Fix bash error * Fix GPU GV extraction * Move simd module to experimental folder * Update file read * Add benchmark programs * Add dynamic tuple * Fix parser tuple slicing bug * Make DynamicTuple interoperable with Tuple * Fix DynamicTuple GPU interop * Dockerize builds * Simplify CMake * Revert "Simplify CMake" This reverts commit 08d2920349b5033750b54b4fb7aaa9bac264b750. Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com> 2022-10-14 21:31:10 +08:00			`# assert check(()) # TODO: PTX can't handle this; why?`
			`assert check(DynamicTuple((1, 2, 3)))`
			`assert check(DynamicTuple(([1], [2], [3])))`
			`assert check(DynamicTuple[int]())`
			`assert check(DynamicTuple[List[List[List[str]]]]())`
GPU and other updates (#52) * Add nvptx pass * Fix spaces * Don't change name * Add runtime support * Add init call * Add more runtime functions * Add launch function * Add intrinsics * Fix codegen * Run GPU pass between general opt passes * Set data layout * Create context * Link libdevice * Add function remapping * Fix linkage * Fix libdevice link * Fix linking * Fix personality * Fix linking * Fix linking * Fix linking * Add internalize pass * Add more math conversions * Add more re-mappings * Fix conversions * Fix __str__ * Add decorator attribute for any decorator * Update kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Fix kernel decorator * Remove old decorator * Fix pointer calc * Fix fill-in codegen * Fix linkage * Add comment * Update list conversion * Add more conversions * Add dict and set conversions * Add float32 type to IR/LLVM * Add float32 * Add float32 stdlib * Keep required global values in PTX module * Fix PTX module pruning * Fix malloc * Set will-return * Fix name cleanup * Fix access * Fix name cleanup * Fix function renaming * Update dimension API * Fix args * Clean up API * Move GPU transformations to end of opt pipeline * Fix alloc replacements * Fix naming * Target PTX 4.2 * Fix global renaming * Fix early return in static blocks; Add __realized__ function * Format * Add __llvm_name__ for functions * Add vector type to IR * SIMD support [wip] * Update kernel naming * Fix early returns; Fix SIMD calls * Fix kernel naming * Fix IR matcher * Remove module print * Update realloc * Add overloads for 32-bit float math ops * Add gpu.Pointer type for working with raw pointers * Add float32 conversion * Add to_gpu and from_gpu * clang-format * Add f32 reduction support to OpenMP * Fix automatic GPU class conversions * Fix conversion functions * Fix conversions * Rename self * Fix tuple conversion * Fix conversions * Fix conversions * Update PTX filename * Fix filename * Add raw function * Add GPU docs * Allow nested object conversions * Add tests (WIP) * Update SIMD * Add staticrange and statictuple loop support * SIMD updates * Add new Vec constructors * Fix UInt conversion * Fix size-0 allocs * Add more tests * Add matmul test * Rename gpu test file * Add more tests * Add alloc cache * Fix object_to_gpu * Fix frees * Fix str conversion * Fix set conversion * Fix conversions * Fix class conversion * Fix str conversion * Fix byte conversion * Fix list conversion * Fix pointer conversions * Fix conversions * Fix conversions * Update tests * Fix conversions * Fix tuple conversion * Fix tuple conversion * Fix auto conversions * Fix conversion * Fix magics * Update tests * Support GPU in JIT mode * Fix GPU+JIT * Fix kernel filename in JIT mode * Add __static_print__; Add earlyDefines; Various domination bugfixes; SimplifyContext RAII base handling * Fix global static handling * Fix float32 tests * FIx gpu module * Support OpenMP "collapse" option * Add more collapse tests * Capture generics and statics * TraitVar handling * Python exceptions / isinstance [wip; no_ci] * clang-format * Add list comparison operators * Support empty raise in IR * Add dict 'or' operator * Fix repr * Add copy module * Fix spacing * Use sm_30 * Python exceptions * TypeTrait support; Fix defaultDict * Fix earlyDefines * Add defaultdict * clang-format * Fix invalid canonicalizations * Fix empty raise * Fix copyright * Add Python numerics option * Support py-numerics in math module * Update docs * Add static Python division / modulus * Add static py numerics tests * Fix staticrange/tuple; Add KwTuple.__getitem__ * clang-format * Add gpu parameter to par * Fix globals * Don't init loop vars on loop collapse * Add par-gpu tests * Update gpu docs * Fix isinstance check * Remove invalid test * Add -libdevice to set custom path [skip ci] * Add release notes; bump version [skip ci] * Add libdevice docs [skip ci] Co-authored-by: Ibrahim Numanagić <ibrahimpasa@gmail.com> 2022-09-16 03:40:00 +08:00			`assert check('hello world')`
			`assert check([1, 2, 3])`
			`assert check([[1], [2], [3]])`
			`assert check({1: [1.1], 2: [2.2]})`
			`assert check({'a', 'b', 'c'})`
			`assert check(Optional([1, 2, 3]))`

			`@test`
			`def test_user_classes():`
			`@dataclass(gpu=True, eq=True)`
			`class A:`
			`x: int`
			`y: List[int]`

			`@tuple`
			`class B:`
			`x: int`
			`y: List[int]`

			`@gpu.kernel`
			`def kernel(a, b, c):`
			`a.x += b.x + c[0]`
			`c[1][0][0] = 9999`
			`a.y[0] = c[0] + 1`
			`b.y[0] = c[0] + 2`

			`a = A(42, [-1])`
			`b = B(100, [-2])`
			`c = (1000, [[-1]])`
			`kernel(a, b, c, grid=1, block=1)`

			`assert a == A(1142, [1001])`
			`assert b == B(100, [1002])`
			`assert c == (1000, [[9999]])`

			`@gpu.kernel`
			`def kernel2(a, b, c):`
			`a[0].x += b[0].x + c[0][0]`
			`c[0][1][0][0] = 9999`
			`a[0].y[0] = c[0][0] + 1`
			`b[0].y[0] = c[0][0] + 2`

			`a = [A(42, [-1])]`
			`b = [B(100, [-2])]`
			`c = [(1000, [[-1]])]`
			`kernel2(a, b, c, grid=1, block=1)`

			`assert a == [A(1142, [1001])]`
			`assert b == [B(100, [1002])]`
			`assert c == [(1000, [[9999]])]`

			`@test`
			`def test_intrinsics():`
			`@gpu.kernel`
			`def kernel(v):`
			`block_id = (gpu.block.x + gpu.block.y*gpu.grid.dim.x +`
			`gpu.block.zgpu.grid.dim.xgpu.grid.dim.y)`
			`thread_id = (block_idgpu.block.dim.xgpu.block.dim.y*gpu.block.dim.z +`
			`gpu.thread.zgpu.block.dim.xgpu.block.dim.y +`
			`gpu.thread.y*gpu.block.dim.x +`
			`gpu.thread.x)`
			`v[thread_id] = thread_id`
			`gpu.syncthreads()`

			`grid = gpu.Dim3(3, 4, 5)`
			`block = gpu.Dim3(6, 7, 8)`
			`N = grid.x * grid.y * grid.z * block.x * block.y * block.z`
			`v = [0 for _ in range(N)]`
			`kernel(v, grid=grid, block=block)`
			`assert v == list(range(N))`

			`@test`
			`def test_matmul():`
			`A = [[12, 7, 3],`
			`[4, 5, 6],`
			`[7, 8, 9]]`

			`B = [[5, 8, 1, 2],`
			`[6, 7, 3, 0],`
			`[4, 5, 9, 1]]`

			`def mmz(A, B):`
			`return [[0]*len(B[0]) for _ in range(len(A))]`

			`def matmul(A, B):`
			`result = mmz(A, B)`
			`for i in range(len(A)):`
			`for j in range(len(B[0])):`
			`for k in range(len(B)):`
			`result[i][j] += A[i][k] * B[k][j]`
			`return result`

			`expected = matmul(A, B)`

			`@gpu.kernel`
			`def kernel(A, B, result):`
			`i = gpu.thread.x`
			`j = gpu.thread.y`
			`result[i][j] = sum(A[i][k]*B[k][j] for k in range(len(A[0])))`

			`result = mmz(A, B)`
			`kernel(A, B, result, grid=1, block=(len(result), len(result[0])))`
			`assert result == expected`

			`MAX = 1000 # maximum Mandelbrot iterations`
			`N = 256 # width and height of image`

			`@test`
			`def test_mandelbrot():`
			`pixels = [0 for _ in range(N * N)]`

			`def scale(x, a, b):`
			`return a + (x/N)*(b - a)`

			`expected = [0 for _ in range(N * N)]`
			`for i in range(N):`
			`for j in range(N):`
			`c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))`
			`z = 0j`
			`iteration = 0`

			`while abs(z) <= 2 and iteration < MAX:`
			`z = z**2 + c`
			`iteration += 1`

			`expected[Ni + j] = int(255 iteration/MAX)`

			`@gpu.kernel`
			`def kernel(pixels):`
			`idx = (gpu.block.x * gpu.block.dim.x) + gpu.thread.x`
			`i, j = divmod(idx, N)`
			`c = complex(scale(j, -2.00, 0.47), scale(i, -1.12, 1.12))`
			`z = 0j`
			`iteration = 0`

			`while abs(z) <= 2 and iteration < MAX:`
			`z = z**2 + c`
			`iteration += 1`

			`pixels[idx] = int(255 * iteration/MAX)`

			`kernel(pixels, grid=(N*N)//1024, block=1024)`
			`assert pixels == expected`

			`@test`
			`def test_kitchen_sink():`
			`@gpu.kernel`
			`def kernel(x):`
			`i = gpu.thread.x`
			`d = {1: 2.1, 2: 3.5, 3: 4.2}`
			`s = {4, 5, 6}`
			`z = sum(`
			`d.get(x[i], j) + (j if i in s else -j)`
			`for j in range(i)`
			`)`
			`x[i] = int(z)`

			`x = [i for i in range(16)]`
			`kernel(x, grid=1, block=16)`
			`assert x == [0, 2, 6, 9, 12, 20, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0]`

			`@test`
			`def test_auto_par():`
			`a = [i for i in range(16)]`
			`b = [2*i for i in range(16)]`
			`c = [0 for _ in range(16)]`

			`@par(gpu=True)`
			`for i in range(16):`
			`c[i] = a[i] + b[i]`

			`assert c == [3*i for i in range(16)]`

			`@par(gpu=True)`
			`for i in range(16):`
			`c[i] += a[i] + b[i]`

			`assert c == [6*i for i in range(16)]`

			`N = 200`
			`Z = 42`
			`x = [0] * (N*N)`
			`y = [0] * (N*N)`

			`for i in range(2, N - 1, 3):`
			`for j in range(3, N, 2):`
			`x[i*N + j] = i + j + Z`

			`@par(gpu=True, collapse=2)`
			`for i in range(2, N - 1, 3):`
			`for j in range(3, N, 2):`
			`y[i*N + j] = i + j + Z`

			`assert x == y`

			`@par(gpu=True)`
			`for i in range(1):`
			`pass`

			`test_hello_world()`
			`test_raw()`
			`test_conversions()`
			`test_user_classes()`
			`test_intrinsics()`
			`test_matmul()`
			`test_mandelbrot()`
			`test_kitchen_sink()`
			`test_auto_par()`