mirror of https://github.com/exaloop/codon.git
Compare commits
21 Commits
Author | SHA1 | Date |
---|---|---|
|
dcb41dcfc9 | |
|
c1dae7d87d | |
|
984974b40d | |
|
915cb4e9f0 | |
|
ce5c49edb5 | |
|
59f5bbb73b | |
|
93fb3d53e3 | |
|
b3f6c12d57 | |
|
b17d21513d | |
|
d035f1dc97 | |
|
dc5e5ac7a6 | |
|
01a7503762 | |
|
f1ab7116d8 | |
|
b58b1ee767 | |
|
56c00d36c2 | |
|
4521182aa8 | |
|
44c59c2a03 | |
|
15c43eb94e | |
|
b8c1eeed36 | |
|
d13d6a58e3 | |
|
37ff25a907 |
.github
actions
build-manylinux-aarch64
build-manylinux-x86_64
build-manylinux
workflows
cmake
codon
app
cir
transform
|
@ -0,0 +1,3 @@
|
|||
FROM quay.io/pypa/manylinux2014_aarch64
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
|
@ -0,0 +1,5 @@
|
|||
name: manylinux build (aarch64)
|
||||
description: Builds Codon on manylinux (aarch64)
|
||||
runs:
|
||||
using: docker
|
||||
image: Dockerfile
|
|
@ -4,13 +4,12 @@ set -e
|
|||
# setup
|
||||
cd /github/workspace
|
||||
yum -y update
|
||||
yum -y install python3 python3-devel
|
||||
yum -y install python3 python3-devel gcc-gfortran
|
||||
|
||||
# env
|
||||
export PYTHONPATH=$(pwd)/test/python
|
||||
export CODON_PYTHON=$(python3 test/python/find-python-library.py)
|
||||
python3 -m pip install -Iv pip==21.3.1
|
||||
python3 -m pip install numpy
|
||||
python3 -m pip install -Iv pip==21.3.1 numpy==1.17.5
|
||||
|
||||
# deps
|
||||
if [ ! -d ./llvm ]; then
|
||||
|
@ -22,6 +21,7 @@ mkdir build
|
|||
export CC="$(pwd)/llvm/bin/clang"
|
||||
export CXX="$(pwd)/llvm/bin/clang++"
|
||||
export LLVM_DIR=$(llvm/bin/llvm-config --cmakedir)
|
||||
export CODON_SYSTEM_LIBRARIES=/usr/lib64
|
||||
(cd build && cmake .. -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_COMPILER=${CC} \
|
||||
-DCMAKE_CXX_COMPILER=${CXX})
|
||||
|
@ -44,6 +44,7 @@ build/codon_test
|
|||
|
||||
# package
|
||||
export CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz
|
||||
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
|
||||
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake \
|
||||
codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
|
||||
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
|
||||
du -sh codon-deploy
|
|
@ -0,0 +1,5 @@
|
|||
name: manylinux build (x86_64)
|
||||
description: Builds Codon on manylinux (x86_64)
|
||||
runs:
|
||||
using: docker
|
||||
image: Dockerfile
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/sh -l
|
||||
set -e
|
||||
|
||||
# setup
|
||||
cd /github/workspace
|
||||
yum -y update
|
||||
yum -y install python3 python3-devel gcc-gfortran
|
||||
|
||||
# env
|
||||
export PYTHONPATH=$(pwd)/test/python
|
||||
export CODON_PYTHON=$(python3 test/python/find-python-library.py)
|
||||
python3 -m pip install -Iv pip==21.3.1 numpy==1.17.5
|
||||
|
||||
# deps
|
||||
if [ ! -d ./llvm ]; then
|
||||
/bin/bash scripts/deps.sh 2;
|
||||
fi
|
||||
|
||||
# build
|
||||
mkdir build
|
||||
export CC="$(pwd)/llvm/bin/clang"
|
||||
export CXX="$(pwd)/llvm/bin/clang++"
|
||||
export LLVM_DIR=$(llvm/bin/llvm-config --cmakedir)
|
||||
export CODON_SYSTEM_LIBRARIES=/usr/lib64
|
||||
(cd build && cmake .. -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_COMPILER=${CC} \
|
||||
-DCMAKE_CXX_COMPILER=${CXX})
|
||||
cmake --build build --config Release -- VERBOSE=1
|
||||
cmake --install build --prefix=codon-deploy
|
||||
|
||||
# build cython
|
||||
export PATH=$PATH:$(pwd)/llvm/bin
|
||||
python3 -m pip install cython wheel astunparse
|
||||
(cd codon-deploy/python && python3 setup.py sdist)
|
||||
CODON_DIR=$(pwd)/codon-deploy python3 -m pip install -v codon-deploy/python/dist/*.gz
|
||||
python3 test/python/cython_jit.py
|
||||
|
||||
# test
|
||||
export LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH
|
||||
export PYTHONPATH=$(pwd):$PYTHONPATH
|
||||
export CODON_PATH=$(pwd)/stdlib
|
||||
ln -s build/libcodonrt.so .
|
||||
build/codon_test
|
||||
|
||||
# package
|
||||
export CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz
|
||||
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake \
|
||||
codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
|
||||
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
|
||||
du -sh codon-deploy
|
|
@ -1,5 +0,0 @@
|
|||
name: manylinux build
|
||||
description: Builds Codon on manylinux
|
||||
runs:
|
||||
using: docker
|
||||
image: Dockerfile
|
|
@ -26,7 +26,12 @@ jobs:
|
|||
uses: ncipollo/release-action@v1
|
||||
|
||||
manylinux:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
arch:
|
||||
- x86_64
|
||||
# - aarch64
|
||||
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-arm-latest' || 'ubuntu-latest' }}
|
||||
name: Codon CI (manylinux)
|
||||
needs: create_release
|
||||
permissions:
|
||||
|
@ -39,10 +44,15 @@ jobs:
|
|||
uses: actions/cache@v4
|
||||
with:
|
||||
path: llvm
|
||||
key: manylinux-llvm
|
||||
key: manylinux-${{ matrix.arch }}-llvm
|
||||
|
||||
- name: Main
|
||||
uses: ./.github/actions/build-manylinux
|
||||
- name: Main x86_64
|
||||
if: matrix.arch == 'x86_64'
|
||||
uses: ./.github/actions/build-manylinux-x86_64
|
||||
|
||||
- name: Main aarch64
|
||||
if: matrix.arch == 'aarch64'
|
||||
uses: ./.github/actions/build-manylinux-aarch64
|
||||
|
||||
- name: Upload Release Asset
|
||||
if: contains(github.ref, 'tags/v')
|
||||
|
@ -66,7 +76,8 @@ jobs:
|
|||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macos-12
|
||||
- macos-latest
|
||||
# - ubuntu-arm-latest
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: Codon CI
|
||||
needs: create_release
|
||||
|
@ -79,23 +90,49 @@ jobs:
|
|||
with:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Linux Setup
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
- name: x86_64 Linux Setup
|
||||
if: startsWith(matrix.os, 'ubuntu') && matrix.os != 'ubuntu-arm-latest'
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y gfortran libgfortran5 lsb-release wget software-properties-common gnupg
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
sudo chmod +x llvm.sh
|
||||
sudo ./llvm.sh 17
|
||||
echo "LIBEXT=so" >> $GITHUB_ENV
|
||||
echo "OS_NAME=linux" >> $GITHUB_ENV
|
||||
echo "CODON_SYSTEM_LIBRARIES=/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
|
||||
echo "CC=clang-17" >> $GITHUB_ENV
|
||||
echo "CXX=clang++-17" >> $GITHUB_ENV
|
||||
|
||||
- name: Arm Linux Setup
|
||||
if: matrix.os == 'ubuntu-arm-latest'
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y gfortran libgfortran5 lsb-release wget software-properties-common gnupg
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
sudo chmod +x llvm.sh
|
||||
sudo ./llvm.sh 17
|
||||
echo "LIBEXT=so" >> $GITHUB_ENV
|
||||
echo "OS_NAME=linux" >> $GITHUB_ENV
|
||||
echo "CODON_SYSTEM_LIBRARIES=/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||
echo "CC=clang-17" >> $GITHUB_ENV
|
||||
echo "CXX=clang++-17" >> $GITHUB_ENV
|
||||
|
||||
- name: macOS Setup
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
run: |
|
||||
brew install automake
|
||||
echo "LIBEXT=dylib" >> $GITHUB_ENV
|
||||
echo "OS_NAME=osx" >> $GITHUB_ENV
|
||||
echo "CODON_SYSTEM_LIBRARIES=$(brew --prefix gcc)/lib/gcc/current" >> $GITHUB_ENV
|
||||
echo "CC=clang" >> $GITHUB_ENV
|
||||
echo "CXX=clang++" >> $GITHUB_ENV
|
||||
echo "FC=gfortran-12" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Python
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
python -m pip install numpy cython wheel astunparse
|
||||
python -m pip install cython wheel astunparse
|
||||
python -m pip install --force-reinstall -v "numpy==1.26.4"
|
||||
which python
|
||||
which pip
|
||||
echo "CODON_PYTHON=$(python test/python/find-python-library.py)" >> $GITHUB_ENV
|
||||
|
@ -105,14 +142,11 @@ jobs:
|
|||
uses: actions/cache@v4
|
||||
with:
|
||||
path: llvm
|
||||
key: ${{ runner.os }}-llvm
|
||||
key: ${{ runner.os }}-${{ matrix.os }}-llvm
|
||||
|
||||
- name: Build Dependencies
|
||||
if: steps.cache-deps.outputs.cache-hit != 'true'
|
||||
run: ./scripts/deps.sh 2
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
|
@ -123,18 +157,12 @@ jobs:
|
|||
-DCMAKE_CXX_COMPILER=${CXX})
|
||||
cmake --build build --config Release -- VERBOSE=1
|
||||
cmake --install build --prefix=codon-deploy
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
|
||||
- name: Build Cython
|
||||
run: |
|
||||
(cd codon-deploy/python && python3 setup.py sdist)
|
||||
CODON_DIR=$(pwd)/codon-deploy python -m pip install -v codon-deploy/python/dist/*.gz
|
||||
python test/python/cython_jit.py
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
CODON_PATH=$(pwd)/codon-deploy/lib/codon/stdlib python test/python/cython_jit.py
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
|
@ -151,10 +179,15 @@ jobs:
|
|||
run: |
|
||||
echo "CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz" >> $GITHUB_ENV
|
||||
|
||||
- name: Codesign (macOS)
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
run: |
|
||||
codesign -f -s - codon-deploy/bin/codon codon-deploy/lib/codon/*.dylib
|
||||
|
||||
- name: Prepare Artifacts
|
||||
run: |
|
||||
cp -rf codon-deploy/python/dist .
|
||||
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
|
||||
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon_jit.egg-info codon-deploy/python/build
|
||||
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
|
||||
du -sh codon-deploy
|
||||
|
||||
|
@ -165,24 +198,31 @@ jobs:
|
|||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ needs.create_release.outputs.upload_url }}
|
||||
asset_path: ./codon-darwin-x86_64.tar.gz
|
||||
asset_name: codon-darwin-x86_64.tar.gz
|
||||
asset_path: ./codon-darwin-arm64.tar.gz
|
||||
asset_name: codon-darwin-arm64.tar.gz
|
||||
asset_content_type: application/gzip
|
||||
|
||||
- name: Upload Artifacts
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}-x86_64
|
||||
path: codon-darwin-x86_64.tar.gz
|
||||
name: ${{ matrix.os }}-arm64
|
||||
path: codon-darwin-arm64.tar.gz
|
||||
|
||||
- name: Upload Artifacts
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
if: startsWith(matrix.os, 'ubuntu') && matrix.os != 'ubuntu-arm-latest'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}-x86_64
|
||||
path: codon-linux-x86_64.tar.gz
|
||||
|
||||
# - name: Publish Package
|
||||
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && startsWith(matrix.os, 'ubuntu')
|
||||
# uses: pypa/gh-action-pypi-publish@release/v1
|
||||
- name: Upload Artifacts
|
||||
if: matrix.os == 'ubuntu-arm-latest'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}-arm64
|
||||
path: codon-linux-arm64.tar.gz
|
||||
|
||||
- name: Publish Package
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && startsWith(matrix.os, 'ubuntu')
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
|
|
111
CMakeLists.txt
111
CMakeLists.txt
|
@ -1,10 +1,10 @@
|
|||
cmake_minimum_required(VERSION 3.14)
|
||||
project(
|
||||
Codon
|
||||
VERSION "0.17.0"
|
||||
VERSION "0.18.2"
|
||||
HOMEPAGE_URL "https://github.com/exaloop/codon"
|
||||
DESCRIPTION "high-performance, extensible Python compiler")
|
||||
set(CODON_JIT_PYTHON_VERSION "0.2.0")
|
||||
set(CODON_JIT_PYTHON_VERSION "0.3.2")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/config.h.in"
|
||||
"${PROJECT_SOURCE_DIR}/codon/config/config.h")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/config.py.in"
|
||||
|
@ -48,10 +48,8 @@ include(${CMAKE_SOURCE_DIR}/cmake/deps.cmake)
|
|||
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
|
||||
if(APPLE)
|
||||
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../lib/codon")
|
||||
set(STATIC_LIBCPP "")
|
||||
else()
|
||||
set(CMAKE_INSTALL_RPATH "$ORIGIN:$ORIGIN/../lib/codon")
|
||||
set(STATIC_LIBCPP "-static-libstdc++")
|
||||
endif()
|
||||
|
||||
add_executable(peg2cpp codon/util/peg2cpp.cpp)
|
||||
|
@ -73,17 +71,72 @@ set(CODON_JUPYTER_FILES codon/util/jupyter.h codon/util/jupyter.cpp)
|
|||
add_library(codon_jupyter SHARED ${CODON_JUPYTER_FILES})
|
||||
|
||||
# Codon runtime library
|
||||
add_library(codonfloat STATIC
|
||||
codon/runtime/floatlib/extenddftf2.c
|
||||
codon/runtime/floatlib/fp_trunc.h
|
||||
codon/runtime/floatlib/truncdfhf2.c
|
||||
codon/runtime/floatlib/extendhfsf2.c
|
||||
codon/runtime/floatlib/int_endianness.h
|
||||
codon/runtime/floatlib/truncdfsf2.c
|
||||
codon/runtime/floatlib/extendhftf2.c
|
||||
codon/runtime/floatlib/int_lib.h
|
||||
# codon/runtime/floatlib/truncsfbf2.c
|
||||
codon/runtime/floatlib/extendsfdf2.c
|
||||
codon/runtime/floatlib/int_math.h
|
||||
codon/runtime/floatlib/truncsfhf2.c
|
||||
codon/runtime/floatlib/extendsftf2.c
|
||||
codon/runtime/floatlib/int_types.h
|
||||
codon/runtime/floatlib/trunctfdf2.c
|
||||
codon/runtime/floatlib/fp_extend.h
|
||||
codon/runtime/floatlib/int_util.h
|
||||
codon/runtime/floatlib/trunctfhf2.c
|
||||
codon/runtime/floatlib/fp_lib.h
|
||||
# codon/runtime/floatlib/truncdfbf2.c
|
||||
codon/runtime/floatlib/trunctfsf2.c)
|
||||
target_compile_options(codonfloat PRIVATE -O3)
|
||||
target_compile_definitions(codonfloat PRIVATE COMPILER_RT_HAS_FLOAT16)
|
||||
|
||||
set(CODONRT_FILES codon/runtime/lib.h codon/runtime/lib.cpp
|
||||
codon/runtime/re.cpp codon/runtime/exc.cpp
|
||||
codon/runtime/gpu.cpp)
|
||||
codon/runtime/gpu.cpp codon/runtime/numpy/sort.cpp
|
||||
codon/runtime/numpy/loops.cpp codon/runtime/numpy/zmath.cpp)
|
||||
add_library(codonrt SHARED ${CODONRT_FILES})
|
||||
add_dependencies(codonrt zlibstatic gc backtrace bz2 liblzma re2 fast_float)
|
||||
add_dependencies(codonrt zlibstatic gc backtrace bz2 liblzma
|
||||
re2 hwy hwy_contrib fast_float codonfloat)
|
||||
|
||||
if(DEFINED ENV{CODON_SYSTEM_LIBRARIES})
|
||||
if(APPLE)
|
||||
set(copied_libgfortran "${CMAKE_BINARY_DIR}/libgfortran.5${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
set(copied_libquadmath "${CMAKE_BINARY_DIR}/libquadmath.0${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
set(copied_libgcc "${CMAKE_BINARY_DIR}/libgcc_s.1.1${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
else()
|
||||
set(copied_libgfortran "${CMAKE_BINARY_DIR}/libgfortran${CMAKE_SHARED_LIBRARY_SUFFIX}.5")
|
||||
set(copied_libquadmath "${CMAKE_BINARY_DIR}/libquadmath${CMAKE_SHARED_LIBRARY_SUFFIX}.0")
|
||||
set(copied_libgcc "${CMAKE_BINARY_DIR}/libgcc_s${CMAKE_SHARED_LIBRARY_SUFFIX}.1")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${copied_libgfortran}
|
||||
DEPENDS "${CMAKE_SOURCE_DIR}/scripts/get_system_libs.sh"
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/scripts/get_system_libs.sh "$ENV{CODON_SYSTEM_LIBRARIES}" ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Copying system libraries to build directory")
|
||||
|
||||
add_custom_target(copy_libraries ALL DEPENDS ${copied_libgfortran})
|
||||
add_dependencies(codonrt copy_libraries)
|
||||
|
||||
add_library(libgfortran SHARED IMPORTED)
|
||||
set_target_properties(libgfortran PROPERTIES IMPORTED_LOCATION ${copied_libgfortran})
|
||||
target_link_libraries(codonrt PRIVATE libgfortran)
|
||||
else()
|
||||
message(FATAL_ERROR "Set 'CODON_SYSTEM_LIBRARIES' to the directory containing system libraries.")
|
||||
endif()
|
||||
|
||||
target_include_directories(codonrt PRIVATE ${backtrace_SOURCE_DIR}
|
||||
${re2_SOURCE_DIR}
|
||||
${highway_SOURCE_DIR}
|
||||
"${gc_SOURCE_DIR}/include"
|
||||
"${fast_float_SOURCE_DIR}/include" runtime)
|
||||
target_link_libraries(codonrt PRIVATE fmt omp backtrace ${STATIC_LIBCPP}
|
||||
LLVMSupport)
|
||||
target_link_libraries(codonrt PRIVATE fmt omp backtrace LLVMSupport)
|
||||
if(APPLE)
|
||||
target_link_libraries(
|
||||
codonrt
|
||||
|
@ -91,13 +144,19 @@ if(APPLE)
|
|||
-Wl,-force_load,$<TARGET_FILE:gc>
|
||||
-Wl,-force_load,$<TARGET_FILE:bz2>
|
||||
-Wl,-force_load,$<TARGET_FILE:liblzma>
|
||||
-Wl,-force_load,$<TARGET_FILE:re2>)
|
||||
-Wl,-force_load,$<TARGET_FILE:re2>
|
||||
-Wl,-force_load,$<TARGET_FILE:hwy>
|
||||
-Wl,-force_load,$<TARGET_FILE:hwy_contrib>
|
||||
-Wl,-force_load,$<TARGET_FILE:codonfloat>)
|
||||
target_link_libraries(codonrt PUBLIC "-framework Accelerate")
|
||||
else()
|
||||
add_dependencies(codonrt openblas)
|
||||
target_link_libraries(
|
||||
codonrt
|
||||
PRIVATE -Wl,--whole-archive $<TARGET_FILE:zlibstatic> $<TARGET_FILE:gc>
|
||||
$<TARGET_FILE:bz2> $<TARGET_FILE:liblzma> $<TARGET_FILE:re2>
|
||||
-Wl,--no-whole-archive)
|
||||
$<TARGET_FILE:openblas> $<TARGET_FILE:hwy> $<TARGET_FILE:hwy_contrib>
|
||||
$<TARGET_FILE:codonfloat> -Wl,--no-whole-archive)
|
||||
endif()
|
||||
if(ASAN)
|
||||
target_compile_options(
|
||||
|
@ -173,6 +232,10 @@ set(CODON_HPPFILES
|
|||
codon/cir/llvm/gpu.h
|
||||
codon/cir/llvm/llvisitor.h
|
||||
codon/cir/llvm/llvm.h
|
||||
codon/cir/llvm/native/native.h
|
||||
codon/cir/llvm/native/targets/aarch64.h
|
||||
codon/cir/llvm/native/targets/target.h
|
||||
codon/cir/llvm/native/targets/x86.h
|
||||
codon/cir/llvm/optimize.h
|
||||
codon/cir/module.h
|
||||
codon/cir/pyextension.h
|
||||
|
@ -187,6 +250,7 @@ set(CODON_HPPFILES
|
|||
codon/cir/transform/folding/rule.h
|
||||
codon/cir/transform/lowering/imperative.h
|
||||
codon/cir/transform/lowering/pipeline.h
|
||||
codon/cir/transform/numpy/numpy.h
|
||||
codon/cir/transform/manager.h
|
||||
codon/cir/transform/parallel/openmp.h
|
||||
codon/cir/transform/parallel/schedule.h
|
||||
|
@ -283,6 +347,9 @@ set(CODON_CPPFILES
|
|||
codon/cir/instr.cpp
|
||||
codon/cir/llvm/gpu.cpp
|
||||
codon/cir/llvm/llvisitor.cpp
|
||||
codon/cir/llvm/native/native.cpp
|
||||
codon/cir/llvm/native/targets/aarch64.cpp
|
||||
codon/cir/llvm/native/targets/x86.cpp
|
||||
codon/cir/llvm/optimize.cpp
|
||||
codon/cir/module.cpp
|
||||
codon/cir/transform/cleanup/canonical.cpp
|
||||
|
@ -294,6 +361,9 @@ set(CODON_CPPFILES
|
|||
codon/cir/transform/folding/folding.cpp
|
||||
codon/cir/transform/lowering/imperative.cpp
|
||||
codon/cir/transform/lowering/pipeline.cpp
|
||||
codon/cir/transform/numpy/expr.cpp
|
||||
codon/cir/transform/numpy/forward.cpp
|
||||
codon/cir/transform/numpy/numpy.cpp
|
||||
codon/cir/transform/manager.cpp
|
||||
codon/cir/transform/parallel/openmp.cpp
|
||||
codon/cir/transform/parallel/schedule.cpp
|
||||
|
@ -362,11 +432,7 @@ llvm_map_components_to_libnames(
|
|||
TransformUtils
|
||||
Vectorize
|
||||
Passes)
|
||||
if(APPLE)
|
||||
target_link_libraries(codonc PRIVATE ${LLVM_LIBS} fmt dl codonrt)
|
||||
else()
|
||||
target_link_libraries(codonc PRIVATE ${STATIC_LIBCPP} ${LLVM_LIBS} fmt dl codonrt)
|
||||
endif()
|
||||
target_link_libraries(codonc PRIVATE ${LLVM_LIBS} fmt dl codonrt)
|
||||
|
||||
# Gather headers
|
||||
add_custom_target(
|
||||
|
@ -399,18 +465,24 @@ add_custom_target(
|
|||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
"${CMAKE_BINARY_DIR}/libomp${CMAKE_SHARED_LIBRARY_SUFFIX}"
|
||||
"${CMAKE_BINARY_DIR}/lib/codon")
|
||||
"${CMAKE_BINARY_DIR}/lib/codon"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy ${copied_libgfortran} "${CMAKE_BINARY_DIR}/lib/codon"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy ${copied_libquadmath} "${CMAKE_BINARY_DIR}/lib/codon"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy ${copied_libgcc} "${CMAKE_BINARY_DIR}/lib/codon")
|
||||
add_dependencies(libs codonrt codonc)
|
||||
|
||||
# Codon command-line tool
|
||||
add_executable(codon codon/app/main.cpp)
|
||||
target_link_libraries(codon PUBLIC ${STATIC_LIBCPP} fmt codonc codon_jupyter Threads::Threads)
|
||||
target_link_libraries(codon PUBLIC fmt codonc codon_jupyter Threads::Threads)
|
||||
|
||||
# Codon test Download and unpack googletest at configure time
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
|
@ -442,6 +514,9 @@ target_compile_definitions(codon_test
|
|||
|
||||
install(TARGETS codonrt codonc codon_jupyter DESTINATION lib/codon)
|
||||
install(FILES ${CMAKE_BINARY_DIR}/libomp${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib/codon)
|
||||
install(FILES ${copied_libgfortran} DESTINATION lib/codon)
|
||||
install(FILES ${copied_libquadmath} DESTINATION lib/codon)
|
||||
install(FILES ${copied_libgcc} DESTINATION lib/codon)
|
||||
install(TARGETS codon DESTINATION bin)
|
||||
install(DIRECTORY ${CMAKE_BINARY_DIR}/include/codon DESTINATION include)
|
||||
install(DIRECTORY ${CMAKE_SOURCE_DIR}/stdlib DESTINATION lib/codon)
|
||||
|
|
240
LICENSE
240
LICENSE
|
@ -1,91 +1,201 @@
|
|||
Business Source License 1.1
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
||||
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
1. Definitions.
|
||||
|
||||
Parameters
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
Licensor: Exaloop, Inc.
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
Licensed Work: Codon compiler, runtime, and standard library
|
||||
The Licensed Work is (c) 2022-2024 Exaloop Inc.
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
Additional Use Grant: None
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
Change Date: 2028-03-01
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
Change License: Apache License, Version 2.0
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
For information about alternative licensing arrangements for the Software,
|
||||
please visit: https://exaloop.io/
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
Terms
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||
works, redistribute, and make non-production use of the Licensed Work. The
|
||||
Licensor may make an Additional Use Grant, above, permitting limited
|
||||
production use.
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||
available distribution of a specific version of the Licensed Work under this
|
||||
License, whichever comes first, the Licensor hereby grants you rights under
|
||||
the terms of the Change License, and the rights granted in the paragraph
|
||||
above terminate.
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
If your use of the Licensed Work does not comply with the requirements
|
||||
currently in effect as described in this License, you must purchase a
|
||||
commercial license from the Licensor, its affiliated entities, or authorized
|
||||
resellers, or you must refrain from using the Licensed Work.
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
All copies of the original and modified Licensed Work, and derivative works
|
||||
of the Licensed Work, are subject to this License. This License applies
|
||||
separately for each version of the Licensed Work and the Change Date may vary
|
||||
for each version of the Licensed Work released by Licensor.
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
You must conspicuously display this License on each original or modified copy
|
||||
of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||
TITLE.
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
MariaDB hereby grants you permission to use this License's text to license
|
||||
your works, and to refer to it using the trademark "Business Source License",
|
||||
as long as you comply with the Covenants of Licensor below.
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
Covenants of Licensor
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
In consideration of the right to use this License's text and the "Business
|
||||
Source License" name and trademark, Licensor covenants to MariaDB, and to all
|
||||
other recipients of the licensed work to be provided by Licensor:
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
||||
or a license that is compatible with GPL Version 2.0 or a later version,
|
||||
where "compatible" means that software provided under the Change License can
|
||||
be included in a program with software provided under GPL Version 2.0 or a
|
||||
later version. Licensor may specify additional Change Licenses without
|
||||
limitation.
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
2. To either: (a) specify an additional grant of rights to use that does not
|
||||
impose any additional restriction on the right granted in this License, as
|
||||
the Additional Use Grant; or (b) insert the text "None".
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
3. To specify a Change Date.
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
4. Not to modify this License in any other way.
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
149
README.md
149
README.md
|
@ -1,19 +1,19 @@
|
|||
<p align="center">
|
||||
<img src="docs/img/codon.png?raw=true" width="600" alt="Codon"/>
|
||||
</p>
|
||||
<h1 align="center">
|
||||
<img src="docs/img/codon-banner.svg" alt="Codon banner"/>
|
||||
</h1>
|
||||
|
||||
<h3 align="center">
|
||||
<a href="https://docs.exaloop.io/codon" target="_blank"><b>Docs</b></a>
|
||||
·
|
||||
<a href="https://docs.exaloop.io/codon/general/faq" target="_blank"><b>FAQ</b></a>
|
||||
·
|
||||
<a href="https://blog.exaloop.io" target="_blank"><b>Blog</b></a>
|
||||
<a href="https://exaloop.io/blog" target="_blank"><b>Blog</b></a>
|
||||
·
|
||||
<a href="https://join.slack.com/t/exaloop/shared_invite/zt-1jusa4kc0-T3rRWrrHDk_iZ1dMS8s0JQ" target="_blank">Chat</a>
|
||||
·
|
||||
<a href="https://docs.exaloop.io/codon/general/roadmap" target="_blank">Roadmap</a>
|
||||
·
|
||||
<a href="https://exaloop.io/benchmarks" target="_blank">Benchmarks</a>
|
||||
<a href="https://exaloop.io/#benchmarks" target="_blank">Benchmarks</a>
|
||||
</h3>
|
||||
|
||||
<a href="https://github.com/exaloop/codon/actions/workflows/ci.yml">
|
||||
|
@ -21,7 +21,7 @@
|
|||
alt="Build Status">
|
||||
</a>
|
||||
|
||||
## What is Codon?
|
||||
# What is Codon?
|
||||
|
||||
Codon is a high-performance Python implementation that compiles to native machine code without
|
||||
any runtime overhead. Typical speedups over vanilla Python are on the order of 10-100x or more, on
|
||||
|
@ -32,7 +32,7 @@ higher still.
|
|||
*Think of Codon as Python reimagined for static, ahead-of-time compilation, built from the ground
|
||||
up with best possible performance in mind.*
|
||||
|
||||
### Goals
|
||||
## Goals
|
||||
|
||||
- :bulb: **No learning curve:** Be as close to CPython as possible in terms of syntax, semantics and libraries
|
||||
- :rocket: **Top-notch performance:** At *least* on par with low-level languages like C, C++ or Rust
|
||||
|
@ -41,7 +41,7 @@ up with best possible performance in mind.*
|
|||
and libraries
|
||||
- :battery: **Interoperability:** Full interoperability with Python's ecosystem of packages and libraries
|
||||
|
||||
### Non-goals
|
||||
## Non-goals
|
||||
|
||||
- :x: *Drop-in replacement for CPython:* Codon is not a drop-in replacement for CPython. There are some
|
||||
aspects of Python that are not suitable for static compilation — we don't support these in Codon.
|
||||
|
@ -54,55 +54,62 @@ up with best possible performance in mind.*
|
|||
features as much as possible. While Codon does add some new syntax in a couple places (e.g. to express
|
||||
parallelism), we try to make it as familiar and intuitive as possible.
|
||||
|
||||
## Install
|
||||
## How it works
|
||||
|
||||
Pre-built binaries for Linux (x86_64) and macOS (x86_64 and arm64) are available alongside [each release](https://github.com/exaloop/codon/releases).
|
||||
Download and install with:
|
||||
<p align="center">
|
||||
<img src="docs/img/codon-figure.svg" width="90%" alt="Codon figure"/>
|
||||
</p>
|
||||
|
||||
# Quick start
|
||||
|
||||
Download and install Codon with this command:
|
||||
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://exaloop.io/install.sh)"
|
||||
```
|
||||
|
||||
Or you can [build from source](https://docs.exaloop.io/codon/advanced/build).
|
||||
After following the prompts, the `codon` command will be available to use. For example:
|
||||
|
||||
## Examples
|
||||
- To run a program: `codon run file.py`
|
||||
- To run a program with optimizations enabled: `codon run -release file.py`
|
||||
- To compile to an executable: `codon build -release file.py`
|
||||
- To generate LLVM IR: `codon build -release -llvm file.py`
|
||||
|
||||
Codon is a Python-compatible language, and many Python programs will work with few if any modifications:
|
||||
Many more options are available and described in [the docs](https://docs.exaloop.io/codon/general/intro).
|
||||
|
||||
Alternatively, you can [build from source](https://docs.exaloop.io/codon/advanced/build).
|
||||
|
||||
# Examples
|
||||
|
||||
## Basics
|
||||
|
||||
Codon supports much of Python, and many Python programs will work with few if any modifications.
|
||||
Here's a simple script `fib.py` that computes the 40th Fibonacci number...
|
||||
|
||||
``` python
|
||||
from time import time
|
||||
|
||||
```python
|
||||
def fib(n):
|
||||
a, b = 0, 1
|
||||
while a < n:
|
||||
print(a, end=' ')
|
||||
a, b = b, a+b
|
||||
print()
|
||||
fib(1000)
|
||||
return n if n < 2 else fib(n - 1) + fib(n - 2)
|
||||
|
||||
t0 = time()
|
||||
ans = fib(40)
|
||||
t1 = time()
|
||||
print(f'Computed fib(40) = {ans} in {t1 - t0} seconds.')
|
||||
```
|
||||
|
||||
The `codon` compiler has a number of options and modes:
|
||||
... run through Python and Codon:
|
||||
|
||||
```bash
|
||||
# compile and run the program
|
||||
codon run fib.py
|
||||
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
|
||||
|
||||
# compile and run the program with optimizations enabled
|
||||
codon run -release fib.py
|
||||
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
|
||||
|
||||
# compile to executable with optimizations enabled
|
||||
codon build -release -exe fib.py
|
||||
./fib
|
||||
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
|
||||
|
||||
# compile to LLVM IR file with optimizations enabled
|
||||
codon build -release -llvm fib.py
|
||||
# outputs file fib.ll
|
||||
```
|
||||
$ python3 fib.py
|
||||
Computed fib(40) = 102334155 in 17.979357957839966 seconds.
|
||||
$ codon run -release fib.py
|
||||
Computed fib(40) = 102334155 in 0.275645 seconds.
|
||||
```
|
||||
|
||||
See [the docs](https://docs.exaloop.io/codon/general/intro) for more options and examples.
|
||||
## Using Python libraries
|
||||
|
||||
You can import and use any Python package from Codon. For example:
|
||||
You can import and use any Python package from Codon via `from python import`. For example:
|
||||
|
||||
```python
|
||||
from python import matplotlib.pyplot as plt
|
||||
|
@ -112,11 +119,13 @@ plt.show()
|
|||
```
|
||||
|
||||
(Just remember to set the `CODON_PYTHON` environment variable to the CPython shared library,
|
||||
as explained in the [the docs](https://docs.exaloop.io/codon/interoperability/python).)
|
||||
as explained in the [the Python interoperability docs](https://docs.exaloop.io/codon/interoperability/python).)
|
||||
|
||||
This prime counting example showcases Codon's [OpenMP](https://www.openmp.org/) support, enabled
|
||||
with the addition of one line. The `@par` annotation tells the compiler to parallelize the
|
||||
following `for`-loop, in this case using a dynamic schedule, chunk size of 100, and 16 threads.
|
||||
## Parallelism
|
||||
|
||||
Codon supports native multithreading via [OpenMP](https://www.openmp.org/). The `@par` annotation
|
||||
in the code below tells the compiler to parallelize the following `for`-loop, in this case using
|
||||
a dynamic schedule, chunk size of 100, and 16 threads.
|
||||
|
||||
```python
|
||||
from sys import argv
|
||||
|
@ -139,7 +148,10 @@ for i in range(2, limit):
|
|||
print(total)
|
||||
```
|
||||
|
||||
Codon supports writing and executing GPU kernels. Here's an example that computes the
|
||||
Note that Codon automatically turns the `total += 1` statement in the loop body into an atomic
|
||||
reduction to avoid race conditions. Learn more in the [multithreading docs](https://docs.exaloop.io/codon/advanced/parallel).
|
||||
|
||||
Codon also supports writing and executing GPU kernels. Here's an example that computes the
|
||||
[Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set):
|
||||
|
||||
```python
|
||||
|
@ -169,8 +181,47 @@ def mandelbrot(pixels):
|
|||
mandelbrot(pixels, grid=(N*N)//1024, block=1024)
|
||||
```
|
||||
|
||||
GPU programming can also be done using the `@par` syntax with `@par(gpu=True)`.
|
||||
GPU programming can also be done using the `@par` syntax with `@par(gpu=True)`. See the
|
||||
[GPU programming docs](https://docs.exaloop.io/codon/advanced/gpu) for more details.
|
||||
|
||||
## Documentation
|
||||
## NumPy support
|
||||
|
||||
Please see [docs.exaloop.io](https://docs.exaloop.io/codon) for in-depth documentation.
|
||||
Codon includes a feature-complete, fully-compiled native NumPy implementation. It uses the same
|
||||
API as NumPy, but re-implements everything in Codon itself, allowing for a range of optimizations
|
||||
and performance improvements.
|
||||
|
||||
Here's an example NumPy program that approximates $\pi$ using random numbers...
|
||||
|
||||
``` python
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
rng = np.random.default_rng(seed=0)
|
||||
x = rng.random(500_000_000)
|
||||
y = rng.random(500_000_000)
|
||||
|
||||
t0 = time.time()
|
||||
# pi ~= 4 x (fraction of points in circle)
|
||||
pi = ((x-1)**2 + (y-1)**2 < 1).sum() * (4 / len(x))
|
||||
t1 = time.time()
|
||||
|
||||
print(f'Computed pi~={pi:.4f} in {t1 - t0:.2f} sec')
|
||||
```
|
||||
|
||||
... run through Python and Codon:
|
||||
|
||||
```
|
||||
$ python3 pi.py
|
||||
Computed pi~=3.1417 in 2.25 sec
|
||||
$ codon run -release pi.py
|
||||
Computed pi~=3.1417 in 0.43 sec
|
||||
```
|
||||
|
||||
Codon can speed up NumPy code through general-purpose and NumPy-specific compiler optimizations,
|
||||
including inlining, fusion, memory allocation elision and more. Furthermore, Codon's NumPy
|
||||
implementation works with its multithreading and GPU capabilities, and can even integrate with
|
||||
[PyTorch](https://pytorch.org). Learn more in the [Codon-NumPy docs](https://docs.exaloop.io/codon/interoperability/numpy).
|
||||
|
||||
# Documentation
|
||||
|
||||
Please see [docs.exaloop.io](https://docs.exaloop.io) for in-depth documentation.
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
set(CPM_DOWNLOAD_VERSION 0.32.3)
|
||||
set(CPM_DOWNLOAD_VERSION 0.40.8)
|
||||
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
|
||||
if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
|
||||
message(STATUS "Downloading CPM.cmake...")
|
||||
file(DOWNLOAD https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION})
|
||||
file(DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION})
|
||||
endif()
|
||||
include(${CPM_DOWNLOAD_LOCATION})
|
||||
|
||||
|
@ -77,9 +77,9 @@ endif()
|
|||
|
||||
CPMAddPackage(
|
||||
NAME bdwgc
|
||||
GITHUB_REPOSITORY "ivmai/bdwgc"
|
||||
GITHUB_REPOSITORY "exaloop/bdwgc"
|
||||
VERSION 8.0.5
|
||||
GIT_TAG d0ba209660ea8c663e06d9a68332ba5f42da54ba
|
||||
GIT_TAG e16c67244aff26802203060422545d38305e0160
|
||||
EXCLUDE_FROM_ALL YES
|
||||
OPTIONS "CMAKE_POSITION_INDEPENDENT_CODE ON"
|
||||
"BUILD_SHARED_LIBS OFF"
|
||||
|
@ -163,3 +163,28 @@ CPMAddPackage(
|
|||
GITHUB_REPOSITORY "fastfloat/fast_float"
|
||||
GIT_TAG v6.1.1
|
||||
EXCLUDE_FROM_ALL YES)
|
||||
|
||||
if(NOT APPLE)
|
||||
enable_language(Fortran)
|
||||
CPMAddPackage(
|
||||
NAME openblas
|
||||
GITHUB_REPOSITORY "OpenMathLib/OpenBLAS"
|
||||
GIT_TAG v0.3.29
|
||||
EXCLUDE_FROM_ALL YES
|
||||
OPTIONS "DYNAMIC_ARCH ON"
|
||||
"BUILD_TESTING OFF"
|
||||
"BUILD_BENCHMARKS OFF"
|
||||
"NUM_THREADS 64"
|
||||
"CCOMMON_OPT -O3")
|
||||
endif()
|
||||
|
||||
CPMAddPackage(
|
||||
NAME highway
|
||||
GITHUB_REPOSITORY "google/highway"
|
||||
GIT_TAG 1.2.0
|
||||
EXCLUDE_FROM_ALL YES
|
||||
OPTIONS "HWY_ENABLE_CONTRIB ON"
|
||||
"HWY_ENABLE_EXAMPLES OFF"
|
||||
"HWY_ENABLE_INSTALL OFF"
|
||||
"HWY_ENABLE_TESTS OFF"
|
||||
"BUILD_TESTING OFF")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
|
@ -11,6 +11,7 @@
|
|||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "codon/cir/util/format.h"
|
||||
#include "codon/compiler/compiler.h"
|
||||
#include "codon/compiler/error.h"
|
||||
#include "codon/compiler/jit.h"
|
||||
|
@ -87,7 +88,7 @@ void initLogFlags(const llvm::cl::opt<std::string> &log) {
|
|||
codon::getLogger().parse(std::string(d));
|
||||
}
|
||||
|
||||
enum BuildKind { LLVM, Bitcode, Object, Executable, Library, PyExtension, Detect };
|
||||
enum BuildKind { LLVM, Bitcode, Object, Executable, Library, PyExtension, Detect, CIR };
|
||||
enum OptMode { Debug, Release };
|
||||
enum Numerics { C, Python };
|
||||
} // namespace
|
||||
|
@ -121,7 +122,8 @@ int docMode(const std::vector<const char *> &args, const std::string &argv0) {
|
|||
}
|
||||
};
|
||||
|
||||
collectPaths(args[1]);
|
||||
if (args.size() > 1)
|
||||
collectPaths(args[1]);
|
||||
auto compiler = std::make_unique<codon::Compiler>(args[0]);
|
||||
bool failed = false;
|
||||
auto result = compiler->docgen(files);
|
||||
|
@ -332,6 +334,7 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
|
|||
clEnumValN(Executable, "exe", "Generate executable"),
|
||||
clEnumValN(Library, "lib", "Generate shared library"),
|
||||
clEnumValN(PyExtension, "pyext", "Generate Python extension module"),
|
||||
clEnumValN(CIR, "cir", "Generate Codon Intermediate Representation"),
|
||||
clEnumValN(Detect, "detect",
|
||||
"Detect output type based on output file extension")),
|
||||
llvm::cl::init(Detect));
|
||||
|
@ -371,6 +374,9 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
|
|||
case BuildKind::Detect:
|
||||
extension = "";
|
||||
break;
|
||||
case BuildKind::CIR:
|
||||
extension = ".cir";
|
||||
break;
|
||||
default:
|
||||
seqassertn(0, "unknown build kind");
|
||||
}
|
||||
|
@ -400,6 +406,11 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
|
|||
compiler->getLLVMVisitor()->writeToPythonExtension(*compiler->getCache()->pyModule,
|
||||
filename);
|
||||
break;
|
||||
case BuildKind::CIR: {
|
||||
std::ofstream out(filename);
|
||||
codon::ir::util::format(out, compiler->getModule());
|
||||
break;
|
||||
}
|
||||
case BuildKind::Detect:
|
||||
compiler->getLLVMVisitor()->compile(filename, argv0, libsVec, lflags);
|
||||
break;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "analysis.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "capture.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "cfg.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "dominator.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "reaching.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "global_vars.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "side_effect.h"
|
||||
|
||||
|
@ -293,7 +293,7 @@ struct SideEfectAnalyzer : public util::ConstVisitor {
|
|||
}
|
||||
|
||||
void visit(const CallInstr *v) override {
|
||||
auto s = Status::PURE;
|
||||
auto s = process(v->getCallee());
|
||||
auto callStatus = Status::UNKNOWN;
|
||||
for (auto *x : *v) {
|
||||
s = max(s, process(x));
|
||||
|
@ -303,7 +303,6 @@ struct SideEfectAnalyzer : public util::ConstVisitor {
|
|||
s = max(s, callStatus);
|
||||
} else {
|
||||
// unknown function
|
||||
process(v->getCallee());
|
||||
s = Status::UNKNOWN;
|
||||
}
|
||||
set(v, s, callStatus);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "attribute.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "base.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "const.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "nodes.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "flow.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "func.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "instr.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "gpu.h"
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "codon/cir/llvm/optimize.h"
|
||||
#include "codon/util/common.h"
|
||||
|
||||
namespace codon {
|
||||
|
@ -204,6 +205,139 @@ llvm::Function *makeNoOp(llvm::Function *F) {
|
|||
using Codegen =
|
||||
std::function<void(llvm::IRBuilder<> &, const std::vector<llvm::Value *> &)>;
|
||||
|
||||
void codegenVectorizedUnaryLoop(llvm::IRBuilder<> &B,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
llvm::Function *func) {
|
||||
// Create IR to represent:
|
||||
// p_in = in
|
||||
// p_out = out
|
||||
// for i in range(n):
|
||||
// *p_out = func(*p_in)
|
||||
// p_in += is
|
||||
// p_out += os
|
||||
auto &context = B.getContext();
|
||||
auto *parent = B.GetInsertBlock()->getParent();
|
||||
auto *ty = func->getReturnType();
|
||||
auto *in = args[0];
|
||||
auto *is = args[1];
|
||||
auto *out = args[2];
|
||||
auto *os = args[3];
|
||||
auto *n = args[4];
|
||||
|
||||
auto *loop = llvm::BasicBlock::Create(context, "loop", parent);
|
||||
auto *exit = llvm::BasicBlock::Create(context, "exit", parent);
|
||||
|
||||
auto *pinStore = B.CreateAlloca(B.getPtrTy());
|
||||
auto *poutStore = B.CreateAlloca(B.getPtrTy());
|
||||
auto *idxStore = B.CreateAlloca(B.getInt64Ty());
|
||||
|
||||
// p_in = in
|
||||
B.CreateStore(in, pinStore);
|
||||
// p_out = out
|
||||
B.CreateStore(out, poutStore);
|
||||
// i = 0
|
||||
B.CreateStore(B.getInt64(0), idxStore);
|
||||
// if n > 0: goto loop; else: goto exit
|
||||
B.CreateCondBr(B.CreateICmpSGT(n, B.getInt64(0)), loop, exit);
|
||||
|
||||
// load pointers
|
||||
B.SetInsertPoint(loop);
|
||||
auto *pin = B.CreateLoad(B.getPtrTy(), pinStore);
|
||||
auto *pout = B.CreateLoad(B.getPtrTy(), poutStore);
|
||||
|
||||
// y = func(x)
|
||||
auto *x = B.CreateLoad(ty, pin);
|
||||
auto *y = B.CreateCall(func, x);
|
||||
B.CreateStore(y, pout);
|
||||
|
||||
auto *idx = B.CreateLoad(B.getInt64Ty(), idxStore);
|
||||
// i += 1
|
||||
B.CreateStore(B.CreateAdd(idx, B.getInt64(1)), idxStore);
|
||||
// p_in += is
|
||||
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin, is), pinStore);
|
||||
// p_out += os
|
||||
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pout, os), poutStore);
|
||||
|
||||
idx = B.CreateLoad(B.getInt64Ty(), idxStore);
|
||||
// if i < n: goto loop; else: goto exit
|
||||
B.CreateCondBr(B.CreateICmpSLT(idx, n), loop, exit);
|
||||
|
||||
B.SetInsertPoint(exit);
|
||||
B.CreateRet(llvm::UndefValue::get(parent->getReturnType()));
|
||||
}
|
||||
|
||||
void codegenVectorizedBinaryLoop(llvm::IRBuilder<> &B,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
llvm::Function *func) {
|
||||
// Create IR to represent:
|
||||
// p_in1 = in1
|
||||
// p_in2 = in2
|
||||
// p_out = out
|
||||
// for i in range(n):
|
||||
// *p_out = func(*p_in1, *p_in2)
|
||||
// p_in1 += is1
|
||||
// p_in2 += is2
|
||||
// p_out += os
|
||||
auto &context = B.getContext();
|
||||
auto *parent = B.GetInsertBlock()->getParent();
|
||||
auto *ty = func->getReturnType();
|
||||
auto *in1 = args[0];
|
||||
auto *is1 = args[1];
|
||||
auto *in2 = args[2];
|
||||
auto *is2 = args[3];
|
||||
auto *out = args[4];
|
||||
auto *os = args[5];
|
||||
auto *n = args[6];
|
||||
|
||||
auto *loop = llvm::BasicBlock::Create(context, "loop", parent);
|
||||
auto *exit = llvm::BasicBlock::Create(context, "exit", parent);
|
||||
|
||||
auto *pin1Store = B.CreateAlloca(B.getPtrTy());
|
||||
auto *pin2Store = B.CreateAlloca(B.getPtrTy());
|
||||
auto *poutStore = B.CreateAlloca(B.getPtrTy());
|
||||
auto *idxStore = B.CreateAlloca(B.getInt64Ty());
|
||||
|
||||
// p_in1 = in1
|
||||
B.CreateStore(in1, pin1Store);
|
||||
// p_in2 = in2
|
||||
B.CreateStore(in2, pin2Store);
|
||||
// p_out = out
|
||||
B.CreateStore(out, poutStore);
|
||||
// i = 0
|
||||
B.CreateStore(B.getInt64(0), idxStore);
|
||||
// if n > 0: goto loop; else: goto exit
|
||||
B.CreateCondBr(B.CreateICmpSGT(n, B.getInt64(0)), loop, exit);
|
||||
|
||||
// load pointers
|
||||
B.SetInsertPoint(loop);
|
||||
auto *pin1 = B.CreateLoad(B.getPtrTy(), pin1Store);
|
||||
auto *pin2 = B.CreateLoad(B.getPtrTy(), pin2Store);
|
||||
auto *pout = B.CreateLoad(B.getPtrTy(), poutStore);
|
||||
|
||||
// y = func(x1, x2)
|
||||
auto *x1 = B.CreateLoad(ty, pin1);
|
||||
auto *x2 = B.CreateLoad(ty, pin2);
|
||||
auto *y = B.CreateCall(func, {x1, x2});
|
||||
B.CreateStore(y, pout);
|
||||
|
||||
auto *idx = B.CreateLoad(B.getInt64Ty(), idxStore);
|
||||
// i += 1
|
||||
B.CreateStore(B.CreateAdd(idx, B.getInt64(1)), idxStore);
|
||||
// p_in1 += is1
|
||||
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin1, is1), pin1Store);
|
||||
// p_in2 += is2
|
||||
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin2, is2), pin2Store);
|
||||
// p_out += os
|
||||
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pout, os), poutStore);
|
||||
|
||||
idx = B.CreateLoad(B.getInt64Ty(), idxStore);
|
||||
// if i < n: goto loop; else: goto exit
|
||||
B.CreateCondBr(B.CreateICmpSLT(idx, n), loop, exit);
|
||||
|
||||
B.SetInsertPoint(exit);
|
||||
B.CreateRet(llvm::UndefValue::get(parent->getReturnType()));
|
||||
}
|
||||
|
||||
llvm::Function *makeFillIn(llvm::Function *F, Codegen codegen) {
|
||||
auto *M = F->getParent();
|
||||
auto &context = M->getContext();
|
||||
|
@ -346,6 +480,13 @@ void remapFunctions(llvm::Module *M) {
|
|||
B.CreateRet(mem);
|
||||
}},
|
||||
|
||||
{"seq_alloc_uncollectable",
|
||||
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
|
||||
auto *M = B.GetInsertBlock()->getModule();
|
||||
llvm::Value *mem = B.CreateCall(makeMalloc(M), args[0]);
|
||||
B.CreateRet(mem);
|
||||
}},
|
||||
|
||||
{"seq_alloc_atomic",
|
||||
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
|
||||
auto *M = B.GetInsertBlock()->getModule();
|
||||
|
@ -353,6 +494,13 @@ void remapFunctions(llvm::Module *M) {
|
|||
B.CreateRet(mem);
|
||||
}},
|
||||
|
||||
{"seq_alloc_atomic_uncollectable",
|
||||
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
|
||||
auto *M = B.GetInsertBlock()->getModule();
|
||||
llvm::Value *mem = B.CreateCall(makeMalloc(M), args[0]);
|
||||
B.CreateRet(mem);
|
||||
}},
|
||||
|
||||
{"seq_realloc",
|
||||
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
|
||||
auto *M = B.GetInsertBlock()->getModule();
|
||||
|
@ -396,6 +544,93 @@ void remapFunctions(llvm::Module *M) {
|
|||
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
|
||||
B.CreateUnreachable();
|
||||
}},
|
||||
|
||||
#define FILLIN_VECLOOP_UNARY32(loop, func) \
|
||||
{ \
|
||||
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
|
||||
auto *M = B.GetInsertBlock()->getModule(); \
|
||||
auto f = llvm::cast<llvm::Function>( \
|
||||
M->getOrInsertFunction(func, B.getFloatTy(), B.getFloatTy()).getCallee()); \
|
||||
f->setWillReturn(); \
|
||||
codegenVectorizedUnaryLoop(B, args, f); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FILLIN_VECLOOP_UNARY64(loop, func) \
|
||||
{ \
|
||||
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
|
||||
auto *M = B.GetInsertBlock()->getModule(); \
|
||||
auto f = llvm::cast<llvm::Function>( \
|
||||
M->getOrInsertFunction(func, B.getDoubleTy(), B.getDoubleTy()).getCallee()); \
|
||||
f->setWillReturn(); \
|
||||
codegenVectorizedUnaryLoop(B, args, f); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FILLIN_VECLOOP_BINARY32(loop, func) \
|
||||
{ \
|
||||
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
|
||||
auto *M = B.GetInsertBlock()->getModule(); \
|
||||
auto f = llvm::cast<llvm::Function>( \
|
||||
M->getOrInsertFunction(func, B.getFloatTy(), B.getFloatTy(), B.getFloatTy()) \
|
||||
.getCallee()); \
|
||||
f->setWillReturn(); \
|
||||
codegenVectorizedBinaryLoop(B, args, f); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FILLIN_VECLOOP_BINARY64(loop, func) \
|
||||
{ \
|
||||
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
|
||||
auto *M = B.GetInsertBlock()->getModule(); \
|
||||
auto f = llvm::cast<llvm::Function>( \
|
||||
M->getOrInsertFunction(func, B.getDoubleTy(), B.getDoubleTy(), \
|
||||
B.getDoubleTy()) \
|
||||
.getCallee()); \
|
||||
f->setWillReturn(); \
|
||||
codegenVectorizedBinaryLoop(B, args, f); \
|
||||
} \
|
||||
}
|
||||
|
||||
FILLIN_VECLOOP_UNARY64("cnp_acos_float64", "__nv_acos"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_acosh_float64", "__nv_acosh"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_asin_float64", "__nv_asin"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_asinh_float64", "__nv_asinh"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_atan_float64", "__nv_atan"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_atanh_float64", "__nv_atanh"),
|
||||
FILLIN_VECLOOP_BINARY64("cnp_atan2_float64", "__nv_atan2"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_exp_float64", "__nv_exp"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_exp2_float64", "__nv_exp2"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_expm1_float64", "__nv_expm1"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_log_float64", "__nv_log"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_log10_float64", "__nv_log10"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_log1p_float64", "__nv_log1p"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_log2_float64", "__nv_log2"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_sin_float64", "__nv_sin"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_sinh_float64", "__nv_sinh"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_tan_float64", "__nv_tan"),
|
||||
FILLIN_VECLOOP_UNARY64("cnp_tanh_float64", "__nv_tanh"),
|
||||
FILLIN_VECLOOP_BINARY64("cnp_hypot_float64", "__nv_hypot"),
|
||||
|
||||
FILLIN_VECLOOP_UNARY32("cnp_acos_float32", "__nv_acosf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_acosh_float32", "__nv_acoshf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_asin_float32", "__nv_asinf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_asinh_float32", "__nv_asinhf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_atan_float32", "__nv_atanf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_atanh_float32", "__nv_atanhf"),
|
||||
FILLIN_VECLOOP_BINARY32("cnp_atan2_float32", "__nv_atan2f"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_exp_float32", "__nv_expf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_exp2_float32", "__nv_exp2f"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_expm1_float32", "__nv_expm1f"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_log_float32", "__nv_logf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_log10_float32", "__nv_log10f"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_log1p_float32", "__nv_log1pf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_log2_float32", "__nv_log2f"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_sin_float32", "__nv_sinf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_sinh_float32", "__nv_sinhf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_tan_float32", "__nv_tanf"),
|
||||
FILLIN_VECLOOP_UNARY32("cnp_tanh_float32", "__nv_tanhf"),
|
||||
FILLIN_VECLOOP_BINARY32("cnp_hypot_float32", "__nv_hypotf"),
|
||||
};
|
||||
|
||||
for (auto &pair : remapping) {
|
||||
|
@ -636,6 +871,11 @@ void applyGPUTransformations(llvm::Module *M, const std::string &ptxFilename) {
|
|||
clone->setTargetTriple(llvm::Triple::normalize(GPU_TRIPLE));
|
||||
clone->setDataLayout(GPU_DL);
|
||||
|
||||
if (isFastMathOn()) {
|
||||
clone->addModuleFlag(llvm::Module::ModFlagBehavior::Override, "nvvm-reflect-ftz",
|
||||
1);
|
||||
}
|
||||
|
||||
llvm::NamedMDNode *nvvmAnno = clone->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
std::vector<llvm::GlobalValue *> kernels;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "llvisitor.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "native.h"
|
||||
|
||||
#include "codon/cir/llvm/llvm.h"
|
||||
#include "codon/cir/llvm/native/targets/aarch64.h"
|
||||
#include "codon/cir/llvm/native/targets/x86.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace {
|
||||
std::unique_ptr<Target> getNativeTarget(const llvm::Triple &triple) {
|
||||
std::unique_ptr<Target> result = std::unique_ptr<Target>();
|
||||
switch (triple.getArch()) {
|
||||
default:
|
||||
break;
|
||||
case llvm::Triple::mips:
|
||||
case llvm::Triple::mipsel:
|
||||
case llvm::Triple::mips64:
|
||||
case llvm::Triple::mips64el:
|
||||
// nothing
|
||||
break;
|
||||
|
||||
case llvm::Triple::arm:
|
||||
case llvm::Triple::armeb:
|
||||
case llvm::Triple::thumb:
|
||||
case llvm::Triple::thumbeb:
|
||||
// nothing
|
||||
break;
|
||||
|
||||
case llvm::Triple::ppc:
|
||||
case llvm::Triple::ppcle:
|
||||
case llvm::Triple::ppc64:
|
||||
case llvm::Triple::ppc64le:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::riscv32:
|
||||
case llvm::Triple::riscv64:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::systemz:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::aarch64:
|
||||
case llvm::Triple::aarch64_32:
|
||||
case llvm::Triple::aarch64_be:
|
||||
result = std::make_unique<Aarch64>();
|
||||
break;
|
||||
case llvm::Triple::x86:
|
||||
case llvm::Triple::x86_64:
|
||||
result = std::make_unique<X86>();
|
||||
break;
|
||||
case llvm::Triple::hexagon:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::wasm32:
|
||||
case llvm::Triple::wasm64:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::sparc:
|
||||
case llvm::Triple::sparcel:
|
||||
case llvm::Triple::sparcv9:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::r600:
|
||||
case llvm::Triple::amdgcn:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::msp430:
|
||||
// nothing
|
||||
break;
|
||||
case llvm::Triple::ve:
|
||||
// nothing
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
class ArchNativePass : public llvm::PassInfoMixin<ArchNativePass> {
|
||||
private:
|
||||
std::string cpu;
|
||||
std::string features;
|
||||
|
||||
public:
|
||||
explicit ArchNativePass(const std::string &cpu = "", const std::string &features = "")
|
||||
: cpu(cpu), features(features) {}
|
||||
|
||||
llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &) {
|
||||
if (!cpu.empty())
|
||||
F.addFnAttr("target-cpu", cpu);
|
||||
if (!features.empty())
|
||||
F.addFnAttr("target-features", features);
|
||||
F.addFnAttr("frame-pointer", "none");
|
||||
return llvm::PreservedAnalyses::all();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void addNativeLLVMPasses(llvm::PassBuilder *pb) {
|
||||
llvm::Triple triple = llvm::EngineBuilder().selectTarget()->getTargetTriple();
|
||||
auto target = getNativeTarget(triple);
|
||||
if (!target)
|
||||
return;
|
||||
std::string cpu = target->getCPU(triple);
|
||||
std::string features = target->getFeatures(triple);
|
||||
|
||||
pb->registerPipelineEarlySimplificationEPCallback(
|
||||
[cpu, features](llvm::ModulePassManager &pm, llvm::OptimizationLevel opt) {
|
||||
pm.addPass(
|
||||
llvm::createModuleToFunctionPassAdaptor(ArchNativePass(cpu, features)));
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "codon/cir/llvm/llvm.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
|
||||
void addNativeLLVMPasses(llvm::PassBuilder *pb);
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "aarch64.h"
|
||||
|
||||
#include "llvm/TargetParser/AArch64TargetParser.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace {
|
||||
template <typename T> std::string join(const T &v, const std::string &delim = ",") {
|
||||
std::ostringstream s;
|
||||
for (const auto &i : v) {
|
||||
if (&i != &v[0])
|
||||
s << delim;
|
||||
s << std::string(i);
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string Aarch64::getCPU(const llvm::Triple &triple) const {
|
||||
return llvm::sys::getHostCPUName().str();
|
||||
}
|
||||
|
||||
std::string Aarch64::getFeatures(const llvm::Triple &triple) const {
|
||||
std::vector<llvm::StringRef> features;
|
||||
// Enable NEON by default.
|
||||
features.push_back("+neon");
|
||||
|
||||
std::string cpu(llvm::sys::getHostCPUName());
|
||||
const std::optional<llvm::AArch64::CpuInfo> cpuInfo = llvm::AArch64::parseCpu(cpu);
|
||||
if (!cpuInfo)
|
||||
return "";
|
||||
|
||||
if (cpu == "cyclone" || llvm::StringRef(cpu).startswith("apple")) {
|
||||
features.push_back("+zcm");
|
||||
features.push_back("+zcz");
|
||||
}
|
||||
|
||||
auto *archInfo = &cpuInfo->Arch;
|
||||
features.push_back(archInfo->ArchFeature);
|
||||
uint64_t extension = cpuInfo->getImpliedExtensions();
|
||||
if (!llvm::AArch64::getExtensionFeatures(extension, features))
|
||||
return "";
|
||||
|
||||
// Handle (arch-dependent) fp16fml/fullfp16 relationship.
|
||||
// FIXME: this fp16fml option handling will be reimplemented after the
|
||||
// TargetParser rewrite.
|
||||
const auto ItRNoFullFP16 = std::find(features.rbegin(), features.rend(), "-fullfp16");
|
||||
const auto ItRFP16FML = std::find(features.rbegin(), features.rend(), "+fp16fml");
|
||||
if (llvm::is_contained(features, "+v8.4a")) {
|
||||
const auto ItRFullFP16 = std::find(features.rbegin(), features.rend(), "+fullfp16");
|
||||
if (ItRFullFP16 < ItRNoFullFP16 && ItRFullFP16 < ItRFP16FML) {
|
||||
// Only entangled feature that can be to the right of this +fullfp16 is -fp16fml.
|
||||
// Only append the +fp16fml if there is no -fp16fml after the +fullfp16.
|
||||
if (std::find(features.rbegin(), ItRFullFP16, "-fp16fml") == ItRFullFP16)
|
||||
features.push_back("+fp16fml");
|
||||
} else
|
||||
goto fp16_fml_fallthrough;
|
||||
} else {
|
||||
fp16_fml_fallthrough:
|
||||
// In both of these cases, putting the 'other' feature on the end of the vector will
|
||||
// result in the same effect as placing it immediately after the current feature.
|
||||
if (ItRNoFullFP16 < ItRFP16FML)
|
||||
features.push_back("-fp16fml");
|
||||
else if (ItRNoFullFP16 > ItRFP16FML)
|
||||
features.push_back("+fullfp16");
|
||||
}
|
||||
|
||||
// FIXME: this needs reimplementation too after the TargetParser rewrite
|
||||
//
|
||||
// Context sensitive meaning of Crypto:
|
||||
// 1) For Arch >= ARMv8.4a: crypto = sm4 + sha3 + sha2 + aes
|
||||
// 2) For Arch <= ARMv8.3a: crypto = sha2 + aes
|
||||
const auto ItBegin = features.begin();
|
||||
const auto ItEnd = features.end();
|
||||
const auto ItRBegin = features.rbegin();
|
||||
const auto ItREnd = features.rend();
|
||||
const auto ItRCrypto = std::find(ItRBegin, ItREnd, "+crypto");
|
||||
const auto ItRNoCrypto = std::find(ItRBegin, ItREnd, "-crypto");
|
||||
const auto HasCrypto = ItRCrypto != ItREnd;
|
||||
const auto HasNoCrypto = ItRNoCrypto != ItREnd;
|
||||
const ptrdiff_t PosCrypto = ItRCrypto - ItRBegin;
|
||||
const ptrdiff_t PosNoCrypto = ItRNoCrypto - ItRBegin;
|
||||
|
||||
bool NoCrypto = false;
|
||||
if (HasCrypto && HasNoCrypto) {
|
||||
if (PosNoCrypto < PosCrypto)
|
||||
NoCrypto = true;
|
||||
}
|
||||
|
||||
if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd) {
|
||||
if (HasCrypto && !NoCrypto) {
|
||||
// Check if we have NOT disabled an algorithm with something like:
|
||||
// +crypto, -algorithm
|
||||
// And if "-algorithm" does not occur, we enable that crypto algorithm.
|
||||
const bool HasSM4 = (std::find(ItBegin, ItEnd, "-sm4") == ItEnd);
|
||||
const bool HasSHA3 = (std::find(ItBegin, ItEnd, "-sha3") == ItEnd);
|
||||
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "-sha2") == ItEnd);
|
||||
const bool HasAES = (std::find(ItBegin, ItEnd, "-aes") == ItEnd);
|
||||
if (HasSM4)
|
||||
features.push_back("+sm4");
|
||||
if (HasSHA3)
|
||||
features.push_back("+sha3");
|
||||
if (HasSHA2)
|
||||
features.push_back("+sha2");
|
||||
if (HasAES)
|
||||
features.push_back("+aes");
|
||||
} else if (HasNoCrypto) {
|
||||
// Check if we have NOT enabled a crypto algorithm with something like:
|
||||
// -crypto, +algorithm
|
||||
// And if "+algorithm" does not occur, we disable that crypto algorithm.
|
||||
const bool HasSM4 = (std::find(ItBegin, ItEnd, "+sm4") != ItEnd);
|
||||
const bool HasSHA3 = (std::find(ItBegin, ItEnd, "+sha3") != ItEnd);
|
||||
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "+sha2") != ItEnd);
|
||||
const bool HasAES = (std::find(ItBegin, ItEnd, "+aes") != ItEnd);
|
||||
if (!HasSM4)
|
||||
features.push_back("-sm4");
|
||||
if (!HasSHA3)
|
||||
features.push_back("-sha3");
|
||||
if (!HasSHA2)
|
||||
features.push_back("-sha2");
|
||||
if (!HasAES)
|
||||
features.push_back("-aes");
|
||||
}
|
||||
} else {
|
||||
if (HasCrypto && !NoCrypto) {
|
||||
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "-sha2") == ItEnd);
|
||||
const bool HasAES = (std::find(ItBegin, ItEnd, "-aes") == ItEnd);
|
||||
if (HasSHA2)
|
||||
features.push_back("+sha2");
|
||||
if (HasAES)
|
||||
features.push_back("+aes");
|
||||
} else if (HasNoCrypto) {
|
||||
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "+sha2") != ItEnd);
|
||||
const bool HasAES = (std::find(ItBegin, ItEnd, "+aes") != ItEnd);
|
||||
const bool HasV82a = (std::find(ItBegin, ItEnd, "+v8.2a") != ItEnd);
|
||||
const bool HasV83a = (std::find(ItBegin, ItEnd, "+v8.3a") != ItEnd);
|
||||
const bool HasV84a = (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd);
|
||||
if (!HasSHA2)
|
||||
features.push_back("-sha2");
|
||||
if (!HasAES)
|
||||
features.push_back("-aes");
|
||||
if (HasV82a || HasV83a || HasV84a) {
|
||||
features.push_back("-sm4");
|
||||
features.push_back("-sha3");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto V8_6Pos = llvm::find(features, "+v8.6a");
|
||||
if (V8_6Pos != std::end(features))
|
||||
V8_6Pos = features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
|
||||
|
||||
if (triple.isOSOpenBSD())
|
||||
features.push_back("+strict-align");
|
||||
|
||||
return join(features);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "codon/cir/llvm/native/targets/target.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
|
||||
class Aarch64 : public Target {
|
||||
public:
|
||||
std::string getCPU(const llvm::Triple &triple) const override;
|
||||
std::string getFeatures(const llvm::Triple &triple) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "codon/cir/llvm/llvm.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
|
||||
class Target {
|
||||
public:
|
||||
virtual ~Target() {}
|
||||
virtual std::string getCPU(const llvm::Triple &triple) const = 0;
|
||||
virtual std::string getFeatures(const llvm::Triple &triple) const = 0;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "x86.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace {
|
||||
template <typename T> std::string join(const T &v, const std::string &delim = ",") {
|
||||
std::ostringstream s;
|
||||
for (const auto &i : v) {
|
||||
if (&i != &v[0])
|
||||
s << delim;
|
||||
s << std::string(i);
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string X86::getCPU(const llvm::Triple &triple) const {
|
||||
auto CPU = llvm::sys::getHostCPUName();
|
||||
if (!CPU.empty() && CPU != "generic")
|
||||
return std::string(CPU);
|
||||
|
||||
// Select the default CPU if none was given (or detection failed).
|
||||
|
||||
if (!triple.isX86())
|
||||
return ""; // This routine is only handling x86 targets.
|
||||
|
||||
bool is64Bit = triple.getArch() == llvm::Triple::x86_64;
|
||||
|
||||
// FIXME: Need target hooks.
|
||||
if (triple.isOSDarwin()) {
|
||||
if (triple.getArchName() == "x86_64h")
|
||||
return "core-avx2";
|
||||
// macosx10.12 drops support for all pre-Penryn Macs.
|
||||
// Simulators can still run on 10.11 though, like Xcode.
|
||||
if (triple.isMacOSX() && !triple.isOSVersionLT(10, 12))
|
||||
return "penryn";
|
||||
|
||||
if (triple.isDriverKit())
|
||||
return "nehalem";
|
||||
|
||||
// The oldest x86_64 Macs have core2/Merom; the oldest x86 Macs have Yonah.
|
||||
return is64Bit ? "core2" : "yonah";
|
||||
}
|
||||
|
||||
// Set up default CPU name for PS4/PS5 compilers.
|
||||
if (triple.isPS4())
|
||||
return "btver2";
|
||||
if (triple.isPS5())
|
||||
return "znver2";
|
||||
|
||||
// On Android use targets compatible with gcc
|
||||
if (triple.isAndroid())
|
||||
return is64Bit ? "x86-64" : "i686";
|
||||
|
||||
// Everything else goes to x86-64 in 64-bit mode.
|
||||
if (is64Bit)
|
||||
return "x86-64";
|
||||
|
||||
switch (triple.getOS()) {
|
||||
case llvm::Triple::NetBSD:
|
||||
return "i486";
|
||||
case llvm::Triple::Haiku:
|
||||
case llvm::Triple::OpenBSD:
|
||||
return "i586";
|
||||
case llvm::Triple::FreeBSD:
|
||||
return "i686";
|
||||
default:
|
||||
// Fallback to p4.
|
||||
return "pentium4";
|
||||
}
|
||||
}
|
||||
|
||||
std::string X86::getFeatures(const llvm::Triple &triple) const {
|
||||
std::vector<std::string> features;
|
||||
llvm::StringMap<bool> hostFeatures;
|
||||
if (llvm::sys::getHostCPUFeatures(hostFeatures)) {
|
||||
for (auto &f : hostFeatures) {
|
||||
features.push_back((f.second ? "+" : "-") + f.first().str());
|
||||
}
|
||||
}
|
||||
|
||||
if (triple.getArchName() == "x86_64h") {
|
||||
// x86_64h implies quite a few of the more modern subtarget features
|
||||
// for Haswell class CPUs, but not all of them. Opt-out of a few.
|
||||
features.push_back("-rdrnd");
|
||||
features.push_back("-aes");
|
||||
features.push_back("-pclmul");
|
||||
features.push_back("-rtm");
|
||||
features.push_back("-fsgsbase");
|
||||
}
|
||||
|
||||
const llvm::Triple::ArchType ArchType = triple.getArch();
|
||||
// Add features to be compatible with gcc for Android.
|
||||
if (triple.isAndroid()) {
|
||||
if (ArchType == llvm::Triple::x86_64) {
|
||||
features.push_back("+sse4.2");
|
||||
features.push_back("+popcnt");
|
||||
features.push_back("+cx16");
|
||||
} else
|
||||
features.push_back("+ssse3");
|
||||
}
|
||||
return join(features);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "codon/cir/llvm/native/targets/target.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
|
||||
class X86 : public Target {
|
||||
public:
|
||||
std::string getCPU(const llvm::Triple &triple) const override;
|
||||
std::string getFeatures(const llvm::Triple &triple) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "optimize.h"
|
||||
|
||||
|
@ -6,12 +6,23 @@
|
|||
#include <deque>
|
||||
|
||||
#include "codon/cir/llvm/gpu.h"
|
||||
#include "codon/cir/llvm/native/native.h"
|
||||
#include "codon/util/common.h"
|
||||
|
||||
static llvm::codegen::RegisterCodeGenFlags CFG;
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace {
|
||||
llvm::cl::opt<bool>
|
||||
AutoFree("auto-free",
|
||||
llvm::cl::desc("Insert free() calls on allocated memory automatically"),
|
||||
llvm::cl::init(false), llvm::cl::Hidden);
|
||||
|
||||
llvm::cl::opt<bool> FastMath("fast-math",
|
||||
llvm::cl::desc("Apply fastmath optimizations"),
|
||||
llvm::cl::init(false));
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<llvm::TargetMachine>
|
||||
getTargetMachine(llvm::Triple triple, llvm::StringRef cpuStr,
|
||||
|
@ -77,6 +88,27 @@ void applyDebugTransformations(llvm::Module *module, bool debug, bool jit) {
|
|||
}
|
||||
}
|
||||
|
||||
void applyFastMathTransformations(llvm::Module *module) {
|
||||
if (!FastMath)
|
||||
return;
|
||||
|
||||
for (auto &f : *module) {
|
||||
for (auto &block : f) {
|
||||
for (auto &inst : block) {
|
||||
if (auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(&inst)) {
|
||||
if (binop->getType()->isFloatingPointTy())
|
||||
binop->setFast(true);
|
||||
}
|
||||
|
||||
if (auto *intrinsic = llvm::dyn_cast<llvm::IntrinsicInst>(&inst)) {
|
||||
if (intrinsic->getType()->isFloatingPointTy())
|
||||
intrinsic->setFast(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct AllocInfo {
|
||||
std::vector<std::string> allocators;
|
||||
std::string realloc;
|
||||
|
@ -751,6 +783,136 @@ struct AllocationHoister : public llvm::PassInfoMixin<AllocationHoister> {
|
|||
}
|
||||
};
|
||||
|
||||
struct AllocationAutoFree : public llvm::PassInfoMixin<AllocationAutoFree> {
|
||||
AllocInfo info;
|
||||
|
||||
explicit AllocationAutoFree(
|
||||
std::vector<std::string> allocators = {"seq_alloc", "seq_alloc_atomic",
|
||||
"seq_alloc_uncollectable",
|
||||
"seq_alloc_atomic_uncollectable"},
|
||||
const std::string &realloc = "seq_realloc", const std::string &free = "seq_free")
|
||||
: info(std::move(allocators), realloc, free) {}
|
||||
|
||||
llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) {
|
||||
// Get the necessary analysis results.
|
||||
auto &MSSA = FAM.getResult<llvm::MemorySSAAnalysis>(F);
|
||||
auto &TLI = FAM.getResult<llvm::TargetLibraryAnalysis>(F);
|
||||
auto &AA = FAM.getResult<llvm::AAManager>(F);
|
||||
auto &DT = FAM.getResult<llvm::DominatorTreeAnalysis>(F);
|
||||
auto &PDT = FAM.getResult<llvm::PostDominatorTreeAnalysis>(F);
|
||||
auto &LI = FAM.getResult<llvm::LoopAnalysis>(F);
|
||||
auto &CI = FAM.getResult<llvm::CycleAnalysis>(F);
|
||||
bool Changed = false;
|
||||
|
||||
// Traverse the function to find allocs and insert corresponding frees.
|
||||
for (auto &BB : F) {
|
||||
for (auto &I : BB) {
|
||||
if (auto *Alloc = llvm::dyn_cast<llvm::CallInst>(&I)) {
|
||||
auto *Callee = Alloc->getCalledFunction();
|
||||
if (!Callee || !Callee->isDeclaration())
|
||||
continue;
|
||||
|
||||
if (info.isAlloc(Alloc)) {
|
||||
if (llvm::PointerMayBeCaptured(Alloc, /*ReturnCaptures=*/true,
|
||||
/*StoreCaptures=*/true))
|
||||
continue;
|
||||
|
||||
Changed |= insertFree(Alloc, F, DT, PDT, LI, CI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (Changed ? llvm::PreservedAnalyses::none() : llvm::PreservedAnalyses::all());
|
||||
}
|
||||
|
||||
bool insertFree(llvm::Instruction *Alloc, llvm::Function &F, llvm::DominatorTree &DT,
|
||||
llvm::PostDominatorTree &PDT, llvm::LoopInfo &LI,
|
||||
llvm::CycleInfo &CI) {
|
||||
llvm::SmallVector<llvm::Value *, 8> Worklist;
|
||||
llvm::SmallPtrSet<llvm::Value *, 8> Visited;
|
||||
llvm::SmallVector<llvm::BasicBlock *, 8> UseBlocks;
|
||||
|
||||
// We need to find a basic block that:
|
||||
// 1. Post-dominates the allocation block (so we always free it)
|
||||
// 2. Is dominated by the allocation block (so the use is valid)
|
||||
// 3. Post-dominates all uses
|
||||
|
||||
// Start with the original pointer.
|
||||
Worklist.push_back(Alloc);
|
||||
UseBlocks.push_back(Alloc->getParent());
|
||||
|
||||
// Track all blocks where the pointer or its derived values are used.
|
||||
while (!Worklist.empty()) {
|
||||
auto *CurrentPtr = Worklist.pop_back_val();
|
||||
if (!Visited.insert(CurrentPtr).second)
|
||||
continue;
|
||||
|
||||
// Traverse all users of the current pointer.
|
||||
for (auto *U : CurrentPtr->users()) {
|
||||
if (auto *Inst = llvm::dyn_cast<llvm::Instruction>(U)) {
|
||||
if (auto *call = llvm::dyn_cast<llvm::CallBase>(Inst))
|
||||
if (call->getCalledFunction() && info.isFree(call->getCalledFunction()))
|
||||
return false;
|
||||
|
||||
if (llvm::isa<llvm::GetElementPtrInst>(Inst) ||
|
||||
llvm::isa<llvm::BitCastInst>(Inst) || llvm::isa<llvm::PHINode>(Inst) ||
|
||||
llvm::isa<llvm::SelectInst>(Inst)) {
|
||||
Worklist.push_back(Inst);
|
||||
} else {
|
||||
// If this is a real use, record the block.
|
||||
UseBlocks.push_back(Inst->getParent());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the closest post-dominating block of all the use blocks.
|
||||
llvm::BasicBlock *PostDomBlock = nullptr;
|
||||
for (auto *BB : UseBlocks) {
|
||||
if (!PostDomBlock) {
|
||||
PostDomBlock = BB;
|
||||
} else {
|
||||
PostDomBlock = PDT.findNearestCommonDominator(PostDomBlock, BB);
|
||||
if (!PostDomBlock) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto *allocLoop = LI.getLoopFor(Alloc->getParent());
|
||||
auto *freeLoop = LI.getLoopFor(PostDomBlock);
|
||||
|
||||
while (allocLoop != freeLoop) {
|
||||
if (!freeLoop)
|
||||
return false;
|
||||
PostDomBlock = freeLoop->getExitBlock();
|
||||
if (!PostDomBlock)
|
||||
return false;
|
||||
freeLoop = LI.getLoopFor(PostDomBlock);
|
||||
}
|
||||
|
||||
if (!DT.dominates(Alloc->getParent(), PostDomBlock)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::IRBuilder<> B(PostDomBlock->getTerminator());
|
||||
auto *FreeFunc = F.getParent()->getFunction(info.free);
|
||||
if (!FreeFunc) {
|
||||
FreeFunc = llvm::Function::Create(
|
||||
llvm::FunctionType::get(B.getVoidTy(), {B.getPtrTy()}, false),
|
||||
llvm::Function::ExternalLinkage, info.free, F.getParent());
|
||||
FreeFunc->setWillReturn();
|
||||
FreeFunc->setDoesNotThrow();
|
||||
}
|
||||
|
||||
// Add free
|
||||
B.CreateCall(FreeFunc, Alloc);
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/// Sometimes coroutine lowering produces hard-to-analyze loops involving
|
||||
/// function pointer comparisons. This pass puts them into a somewhat
|
||||
/// easier-to-analyze form.
|
||||
|
@ -826,9 +988,15 @@ struct CoroBranchSimplifier : public llvm::PassInfoMixin<CoroBranchSimplifier> {
|
|||
}
|
||||
};
|
||||
|
||||
llvm::cl::opt<bool>
|
||||
DisableNative("disable-native",
|
||||
llvm::cl::desc("Disable architecture-specific optimizations"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
|
||||
PluginManager *plugins) {
|
||||
applyDebugTransformations(module, debug, jit);
|
||||
applyFastMathTransformations(module);
|
||||
|
||||
llvm::LoopAnalysisManager lam;
|
||||
llvm::FunctionAnalysisManager fam;
|
||||
|
@ -860,9 +1028,14 @@ void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
|
|||
pm.addPass(llvm::LoopSimplifyPass());
|
||||
pm.addPass(llvm::LCSSAPass());
|
||||
pm.addPass(AllocationHoister());
|
||||
if (AutoFree)
|
||||
pm.addPass(AllocationAutoFree());
|
||||
}
|
||||
});
|
||||
|
||||
if (!DisableNative)
|
||||
addNativeLLVMPasses(&pb);
|
||||
|
||||
if (plugins) {
|
||||
for (auto *plugin : *plugins) {
|
||||
plugin->dsl->addLLVMPasses(&pb, debug);
|
||||
|
@ -884,7 +1057,15 @@ void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
|
|||
|
||||
void verify(llvm::Module *module) {
|
||||
const bool broken = llvm::verifyModule(*module, &llvm::errs());
|
||||
seqassertn(!broken, "module broken");
|
||||
if (broken) {
|
||||
auto fo = fopen("_dump.ll", "w");
|
||||
llvm::raw_fd_ostream fout(fileno(fo), true);
|
||||
fout << *module;
|
||||
fout.close();
|
||||
}
|
||||
seqassertn(!broken, "Generated LLVM IR is invalid and has been dumped to '_dump.ll'. "
|
||||
"Please submit a bug report at https://github.com/exaloop/codon "
|
||||
"including the code and generated LLVM IR.");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -906,5 +1087,7 @@ void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins
|
|||
verify(module);
|
||||
}
|
||||
|
||||
bool isFastMathOn() { return FastMath; }
|
||||
|
||||
} // namespace ir
|
||||
} // namespace codon
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -20,5 +20,7 @@ getTargetMachine(llvm::Module *module, bool setFunctionAttributes = false,
|
|||
|
||||
void optimize(llvm::Module *module, bool debug, bool jit = false,
|
||||
PluginManager *plugins = nullptr);
|
||||
|
||||
bool isFastMathOn();
|
||||
} // namespace ir
|
||||
} // namespace codon
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "module.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "canonical.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "dead_code.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "global_demote.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "replacer.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "const_fold.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "const_prop.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "folding.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "imperative.h"
|
||||
|
||||
|
@ -117,7 +117,7 @@ void ImperativeForFlowLowering::handle(ForFlow *v) {
|
|||
// body
|
||||
auto *parent = cast<BodiedFunc>(getParentFunc());
|
||||
auto *series = M->N<SeriesFlow>(v->getSrcInfo());
|
||||
auto *listVar = util::makeVar(list, series, parent)->getVar();
|
||||
auto *listVar = util::makeVar(list, series, parent);
|
||||
auto *lenVal = M->Nr<ExtractInstr>(M->Nr<VarValue>(listVar), "len");
|
||||
auto *lenVar = util::makeVar(lenVal, series, parent);
|
||||
auto *ptrVal = M->Nr<ExtractInstr>(
|
||||
|
@ -129,12 +129,14 @@ void ImperativeForFlowLowering::handle(ForFlow *v) {
|
|||
auto *oldLoopVar = v->getVar();
|
||||
auto *newLoopVar = M->Nr<Var>(M->getIntType());
|
||||
parent->push_back(newLoopVar);
|
||||
auto *replacement = M->N<ImperativeForFlow>(
|
||||
v->getSrcInfo(), M->getInt(0), 1, lenVar, body, newLoopVar, std::move(sched));
|
||||
auto *replacement = M->N<ImperativeForFlow>(v->getSrcInfo(), M->getInt(0), 1,
|
||||
M->Nr<VarValue>(lenVar), body,
|
||||
newLoopVar, std::move(sched));
|
||||
series->push_back(replacement);
|
||||
body->insert(
|
||||
body->begin(),
|
||||
M->Nr<AssignInstr>(oldLoopVar, (*ptrVar)[*M->Nr<VarValue>(newLoopVar)]));
|
||||
M->Nr<AssignInstr>(oldLoopVar,
|
||||
(*M->Nr<VarValue>(ptrVar))[*M->Nr<VarValue>(newLoopVar)]));
|
||||
v->replaceAll(series);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "pipeline.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "manager.h"
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
|||
#include "codon/cir/transform/lowering/imperative.h"
|
||||
#include "codon/cir/transform/lowering/pipeline.h"
|
||||
#include "codon/cir/transform/manager.h"
|
||||
#include "codon/cir/transform/numpy/numpy.h"
|
||||
#include "codon/cir/transform/parallel/openmp.h"
|
||||
#include "codon/cir/transform/pass.h"
|
||||
#include "codon/cir/transform/pythonic/dict.h"
|
||||
|
@ -196,6 +197,9 @@ void PassManager::registerStandardPasses(PassManager::Init init) {
|
|||
pyNumerics),
|
||||
/*insertBefore=*/"", {seKey1, rdKey, globalKey},
|
||||
{seKey1, rdKey, cfgKey, globalKey, capKey});
|
||||
registerPass(std::make_unique<numpy::NumPyFusionPass>(rdKey, seKey2),
|
||||
/*insertBefore=*/"", {rdKey, seKey2},
|
||||
{seKey1, rdKey, cfgKey, globalKey, capKey});
|
||||
|
||||
// parallel
|
||||
registerPass(std::make_unique<parallel::OpenMPPass>(), /*insertBefore=*/"", {},
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -0,0 +1,982 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "numpy.h"
|
||||
|
||||
#include "codon/cir/util/irtools.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace transform {
|
||||
namespace numpy {
|
||||
namespace {
|
||||
types::Type *coerceScalarArray(NumPyType &scalar, NumPyType &array,
|
||||
NumPyPrimitiveTypes &T) {
|
||||
auto xtype = scalar.dtype;
|
||||
auto atype = array.dtype;
|
||||
bool aIsInt = false;
|
||||
bool xIsInt = false;
|
||||
bool aIsFloat = false;
|
||||
bool xIsFloat = false;
|
||||
bool aIsComplex = false;
|
||||
bool xIsComplex = false;
|
||||
|
||||
switch (atype) {
|
||||
case NumPyType::NP_TYPE_ARR_BOOL:
|
||||
break;
|
||||
case NumPyType::NP_TYPE_ARR_I8:
|
||||
case NumPyType::NP_TYPE_ARR_U8:
|
||||
case NumPyType::NP_TYPE_ARR_I16:
|
||||
case NumPyType::NP_TYPE_ARR_U16:
|
||||
case NumPyType::NP_TYPE_ARR_I32:
|
||||
case NumPyType::NP_TYPE_ARR_U32:
|
||||
case NumPyType::NP_TYPE_ARR_I64:
|
||||
case NumPyType::NP_TYPE_ARR_U64:
|
||||
aIsInt = true;
|
||||
break;
|
||||
case NumPyType::NP_TYPE_ARR_F16:
|
||||
case NumPyType::NP_TYPE_ARR_F32:
|
||||
case NumPyType::NP_TYPE_ARR_F64:
|
||||
aIsFloat = true;
|
||||
break;
|
||||
case NumPyType::NP_TYPE_ARR_C64:
|
||||
case NumPyType::NP_TYPE_ARR_C128:
|
||||
aIsComplex = true;
|
||||
break;
|
||||
default:
|
||||
seqassertn(false, "unexpected type");
|
||||
}
|
||||
|
||||
xIsInt = (xtype == NumPyType::NP_TYPE_BOOL || xtype == NumPyType::NP_TYPE_I64);
|
||||
xIsFloat = (xtype == NumPyType::NP_TYPE_F64);
|
||||
xIsComplex = (xtype == NumPyType::NP_TYPE_C128);
|
||||
|
||||
bool shouldCast =
|
||||
((xIsInt && (aIsInt || aIsFloat || aIsComplex)) ||
|
||||
(xIsFloat && (aIsFloat || aIsComplex)) || (xIsComplex && aIsComplex));
|
||||
|
||||
if ((atype == NumPyType::NP_TYPE_ARR_F16 || atype == NumPyType::NP_TYPE_ARR_F32) &&
|
||||
xtype == NumPyType::NP_TYPE_C128)
|
||||
return T.c64;
|
||||
else if (shouldCast)
|
||||
return array.getIRBaseType(T);
|
||||
else
|
||||
return scalar.getIRBaseType(T);
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
types::Type *decideTypes(E *expr, NumPyType &lhs, NumPyType &rhs,
|
||||
NumPyPrimitiveTypes &T) {
|
||||
// Special case(s)
|
||||
if (expr->op == E::NP_OP_COPYSIGN)
|
||||
return expr->type.getIRBaseType(T);
|
||||
|
||||
if (lhs.isArray() && !rhs.isArray())
|
||||
return coerceScalarArray(rhs, lhs, T);
|
||||
|
||||
if (!lhs.isArray() && rhs.isArray())
|
||||
return coerceScalarArray(lhs, rhs, T);
|
||||
|
||||
auto *t1 = lhs.getIRBaseType(T);
|
||||
auto *t2 = rhs.getIRBaseType(T);
|
||||
auto *M = t1->getModule();
|
||||
auto *coerceFunc = M->getOrRealizeFunc("_coerce", {}, {t1, t2}, FUSION_MODULE);
|
||||
seqassertn(coerceFunc, "coerce func not found");
|
||||
return util::getReturnType(coerceFunc);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void NumPyExpr::replace(NumPyExpr &e) {
|
||||
type = e.type;
|
||||
val = e.val;
|
||||
op = e.op;
|
||||
lhs = std::move(e.lhs);
|
||||
rhs = std::move(e.rhs);
|
||||
freeable = e.freeable;
|
||||
|
||||
e.type = {};
|
||||
e.val = nullptr;
|
||||
e.op = NP_OP_NONE;
|
||||
e.lhs = {};
|
||||
e.rhs = {};
|
||||
e.freeable = false;
|
||||
}
|
||||
|
||||
bool NumPyExpr::haveVectorizedLoop() const {
|
||||
if (lhs && !(lhs->type.dtype == NumPyType::NP_TYPE_ARR_F32 ||
|
||||
lhs->type.dtype == NumPyType::NP_TYPE_ARR_F64))
|
||||
return false;
|
||||
|
||||
if (rhs && !(rhs->type.dtype == NumPyType::NP_TYPE_ARR_F32 ||
|
||||
rhs->type.dtype == NumPyType::NP_TYPE_ARR_F64))
|
||||
return false;
|
||||
|
||||
if (lhs && rhs && lhs->type.dtype != rhs->type.dtype)
|
||||
return false;
|
||||
|
||||
// These are the loops available in the runtime library.
|
||||
static const std::vector<std::string> VecLoops = {
|
||||
"arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "arctan2",
|
||||
"cos", "exp", "exp2", "expm1", "log", "log10", "log1p",
|
||||
"log2", "sin", "sinh", "tanh", "hypot"};
|
||||
return std::find(VecLoops.begin(), VecLoops.end(), opstring()) != VecLoops.end();
|
||||
}
|
||||
|
||||
int64_t NumPyExpr::opcost() const {
|
||||
switch (op) {
|
||||
case NP_OP_NONE:
|
||||
return 0;
|
||||
case NP_OP_POS:
|
||||
return 0;
|
||||
case NP_OP_NEG:
|
||||
return 0;
|
||||
case NP_OP_INVERT:
|
||||
return 0;
|
||||
case NP_OP_ABS:
|
||||
return 1;
|
||||
case NP_OP_TRANSPOSE:
|
||||
return 0;
|
||||
case NP_OP_ADD:
|
||||
return 1;
|
||||
case NP_OP_SUB:
|
||||
return 1;
|
||||
case NP_OP_MUL:
|
||||
return 1;
|
||||
case NP_OP_MATMUL:
|
||||
return 20;
|
||||
case NP_OP_TRUE_DIV:
|
||||
return 8;
|
||||
case NP_OP_FLOOR_DIV:
|
||||
return 8;
|
||||
case NP_OP_MOD:
|
||||
return 8;
|
||||
case NP_OP_FMOD:
|
||||
return 8;
|
||||
case NP_OP_POW:
|
||||
return 8;
|
||||
case NP_OP_LSHIFT:
|
||||
return 1;
|
||||
case NP_OP_RSHIFT:
|
||||
return 1;
|
||||
case NP_OP_AND:
|
||||
return 1;
|
||||
case NP_OP_OR:
|
||||
return 1;
|
||||
case NP_OP_XOR:
|
||||
return 1;
|
||||
case NP_OP_LOGICAL_AND:
|
||||
return 1;
|
||||
case NP_OP_LOGICAL_OR:
|
||||
return 1;
|
||||
case NP_OP_LOGICAL_XOR:
|
||||
return 1;
|
||||
case NP_OP_EQ:
|
||||
return 1;
|
||||
case NP_OP_NE:
|
||||
return 1;
|
||||
case NP_OP_LT:
|
||||
return 1;
|
||||
case NP_OP_LE:
|
||||
return 1;
|
||||
case NP_OP_GT:
|
||||
return 1;
|
||||
case NP_OP_GE:
|
||||
return 1;
|
||||
case NP_OP_MIN:
|
||||
return 3;
|
||||
case NP_OP_MAX:
|
||||
return 3;
|
||||
case NP_OP_FMIN:
|
||||
return 3;
|
||||
case NP_OP_FMAX:
|
||||
return 3;
|
||||
case NP_OP_SIN:
|
||||
return 10;
|
||||
case NP_OP_COS:
|
||||
return 10;
|
||||
case NP_OP_TAN:
|
||||
return 10;
|
||||
case NP_OP_ARCSIN:
|
||||
return 20;
|
||||
case NP_OP_ARCCOS:
|
||||
return 20;
|
||||
case NP_OP_ARCTAN:
|
||||
return 20;
|
||||
case NP_OP_ARCTAN2:
|
||||
return 35;
|
||||
case NP_OP_HYPOT:
|
||||
return 5;
|
||||
case NP_OP_SINH:
|
||||
return 10;
|
||||
case NP_OP_COSH:
|
||||
return 10;
|
||||
case NP_OP_TANH:
|
||||
return 10;
|
||||
case NP_OP_ARCSINH:
|
||||
return 10;
|
||||
case NP_OP_ARCCOSH:
|
||||
return 10;
|
||||
case NP_OP_ARCTANH:
|
||||
return 10;
|
||||
case NP_OP_CONJ:
|
||||
return 1;
|
||||
case NP_OP_EXP:
|
||||
return 5;
|
||||
case NP_OP_EXP2:
|
||||
return 5;
|
||||
case NP_OP_LOG:
|
||||
return 5;
|
||||
case NP_OP_LOG2:
|
||||
return 5;
|
||||
case NP_OP_LOG10:
|
||||
return 5;
|
||||
case NP_OP_EXPM1:
|
||||
return 5;
|
||||
case NP_OP_LOG1P:
|
||||
return 5;
|
||||
case NP_OP_SQRT:
|
||||
return 2;
|
||||
case NP_OP_SQUARE:
|
||||
return 1;
|
||||
case NP_OP_CBRT:
|
||||
return 5;
|
||||
case NP_OP_LOGADDEXP:
|
||||
return 10;
|
||||
case NP_OP_LOGADDEXP2:
|
||||
return 10;
|
||||
case NP_OP_RECIPROCAL:
|
||||
return 1;
|
||||
case NP_OP_RINT:
|
||||
return 1;
|
||||
case NP_OP_FLOOR:
|
||||
return 1;
|
||||
case NP_OP_CEIL:
|
||||
return 1;
|
||||
case NP_OP_TRUNC:
|
||||
return 1;
|
||||
case NP_OP_ISNAN:
|
||||
return 1;
|
||||
case NP_OP_ISINF:
|
||||
return 1;
|
||||
case NP_OP_ISFINITE:
|
||||
return 1;
|
||||
case NP_OP_SIGN:
|
||||
return 1;
|
||||
case NP_OP_SIGNBIT:
|
||||
return 1;
|
||||
case NP_OP_COPYSIGN:
|
||||
return 1;
|
||||
case NP_OP_SPACING:
|
||||
return 1;
|
||||
case NP_OP_NEXTAFTER:
|
||||
return 1;
|
||||
case NP_OP_DEG2RAD:
|
||||
return 2;
|
||||
case NP_OP_RAD2DEG:
|
||||
return 2;
|
||||
case NP_OP_HEAVISIDE:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t NumPyExpr::cost() const {
|
||||
auto c = opcost();
|
||||
if (c == -1)
|
||||
return -1;
|
||||
|
||||
// Account for the fact that the vectorized loops are much faster.
|
||||
if (haveVectorizedLoop()) {
|
||||
c *= 3;
|
||||
if (lhs->type.dtype == NumPyType::NP_TYPE_ARR_F32)
|
||||
c *= 2;
|
||||
}
|
||||
|
||||
bool lhsIntConst = (lhs && lhs->isLeaf() && isA<IntConst>(lhs->val));
|
||||
bool rhsIntConst = (rhs && rhs->isLeaf() && isA<IntConst>(rhs->val));
|
||||
bool lhsFloatConst = (lhs && lhs->isLeaf() && isA<FloatConst>(lhs->val));
|
||||
bool rhsFloatConst = (rhs && rhs->isLeaf() && isA<FloatConst>(rhs->val));
|
||||
bool lhsConst = lhsIntConst || lhsFloatConst;
|
||||
bool rhsConst = rhsIntConst || rhsFloatConst;
|
||||
|
||||
if (rhsConst || lhsConst) {
|
||||
switch (op) {
|
||||
case NP_OP_TRUE_DIV:
|
||||
case NP_OP_FLOOR_DIV:
|
||||
case NP_OP_MOD:
|
||||
case NP_OP_FMOD:
|
||||
c = 1;
|
||||
break;
|
||||
case NP_OP_POW:
|
||||
if (rhsIntConst)
|
||||
c = (cast<IntConst>(rhs->val)->getVal() == 2) ? 1 : 5;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lhs) {
|
||||
auto cl = lhs->cost();
|
||||
if (cl == -1)
|
||||
return -1;
|
||||
c += cl;
|
||||
}
|
||||
|
||||
if (rhs) {
|
||||
auto cr = rhs->cost();
|
||||
if (cr == -1)
|
||||
return -1;
|
||||
c += cr;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
std::string NumPyExpr::opstring() const {
|
||||
static const std::unordered_map<Op, std::string> m = {
|
||||
{NP_OP_NONE, "a"},
|
||||
{NP_OP_POS, "pos"},
|
||||
{NP_OP_NEG, "neg"},
|
||||
{NP_OP_INVERT, "invert"},
|
||||
{NP_OP_ABS, "abs"},
|
||||
{NP_OP_TRANSPOSE, "transpose"},
|
||||
{NP_OP_ADD, "add"},
|
||||
{NP_OP_SUB, "sub"},
|
||||
{NP_OP_MUL, "mul"},
|
||||
{NP_OP_MATMUL, "matmul"},
|
||||
{NP_OP_TRUE_DIV, "true_div"},
|
||||
{NP_OP_FLOOR_DIV, "floor_div"},
|
||||
{NP_OP_MOD, "mod"},
|
||||
{NP_OP_FMOD, "fmod"},
|
||||
{NP_OP_POW, "pow"},
|
||||
{NP_OP_LSHIFT, "lshift"},
|
||||
{NP_OP_RSHIFT, "rshift"},
|
||||
{NP_OP_AND, "and"},
|
||||
{NP_OP_OR, "or"},
|
||||
{NP_OP_XOR, "xor"},
|
||||
{NP_OP_LOGICAL_AND, "logical_and"},
|
||||
{NP_OP_LOGICAL_OR, "logical_or"},
|
||||
{NP_OP_LOGICAL_XOR, "logical_xor"},
|
||||
{NP_OP_EQ, "eq"},
|
||||
{NP_OP_NE, "ne"},
|
||||
{NP_OP_LT, "lt"},
|
||||
{NP_OP_LE, "le"},
|
||||
{NP_OP_GT, "gt"},
|
||||
{NP_OP_GE, "ge"},
|
||||
{NP_OP_MIN, "minimum"},
|
||||
{NP_OP_MAX, "maximum"},
|
||||
{NP_OP_FMIN, "fmin"},
|
||||
{NP_OP_FMAX, "fmax"},
|
||||
{NP_OP_SIN, "sin"},
|
||||
{NP_OP_COS, "cos"},
|
||||
{NP_OP_TAN, "tan"},
|
||||
{NP_OP_ARCSIN, "arcsin"},
|
||||
{NP_OP_ARCCOS, "arccos"},
|
||||
{NP_OP_ARCTAN, "arctan"},
|
||||
{NP_OP_ARCTAN2, "arctan2"},
|
||||
{NP_OP_HYPOT, "hypot"},
|
||||
{NP_OP_SINH, "sinh"},
|
||||
{NP_OP_COSH, "cosh"},
|
||||
{NP_OP_TANH, "tanh"},
|
||||
{NP_OP_ARCSINH, "arcsinh"},
|
||||
{NP_OP_ARCCOSH, "arccosh"},
|
||||
{NP_OP_ARCTANH, "arctanh"},
|
||||
{NP_OP_CONJ, "conj"},
|
||||
{NP_OP_EXP, "exp"},
|
||||
{NP_OP_EXP2, "exp2"},
|
||||
{NP_OP_LOG, "log"},
|
||||
{NP_OP_LOG2, "log2"},
|
||||
{NP_OP_LOG10, "log10"},
|
||||
{NP_OP_EXPM1, "expm1"},
|
||||
{NP_OP_LOG1P, "log1p"},
|
||||
{NP_OP_SQRT, "sqrt"},
|
||||
{NP_OP_SQUARE, "square"},
|
||||
{NP_OP_CBRT, "cbrt"},
|
||||
{NP_OP_LOGADDEXP, "logaddexp"},
|
||||
{NP_OP_LOGADDEXP2, "logaddexp2"},
|
||||
{NP_OP_RECIPROCAL, "reciprocal"},
|
||||
{NP_OP_RINT, "rint"},
|
||||
{NP_OP_FLOOR, "floor"},
|
||||
{NP_OP_CEIL, "ceil"},
|
||||
{NP_OP_TRUNC, "trunc"},
|
||||
{NP_OP_ISNAN, "isnan"},
|
||||
{NP_OP_ISINF, "isinf"},
|
||||
{NP_OP_ISFINITE, "isfinite"},
|
||||
{NP_OP_SIGN, "sign"},
|
||||
{NP_OP_SIGNBIT, "signbit"},
|
||||
{NP_OP_COPYSIGN, "copysign"},
|
||||
{NP_OP_SPACING, "spacing"},
|
||||
{NP_OP_NEXTAFTER, "nextafter"},
|
||||
{NP_OP_DEG2RAD, "deg2rad"},
|
||||
{NP_OP_RAD2DEG, "rad2deg"},
|
||||
{NP_OP_HEAVISIDE, "heaviside"},
|
||||
};
|
||||
|
||||
auto it = m.find(op);
|
||||
seqassertn(it != m.end(), "op not found");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void NumPyExpr::dump(std::ostream &os, int level, int &leafId) const {
|
||||
auto indent = [&]() {
|
||||
for (int i = 0; i < level; i++)
|
||||
os << " ";
|
||||
};
|
||||
|
||||
indent();
|
||||
if (op == NP_OP_NONE) {
|
||||
os << "\033[1;36m" << opstring() << leafId;
|
||||
++leafId;
|
||||
} else {
|
||||
os << "\033[1;33m" << opstring();
|
||||
}
|
||||
os << "\033[0m <" << type << ">";
|
||||
if (op != NP_OP_NONE)
|
||||
os << " \033[1;35m[cost=" << cost() << "]\033[0m";
|
||||
os << "\n";
|
||||
if (lhs)
|
||||
lhs->dump(os, level + 1, leafId);
|
||||
if (rhs)
|
||||
rhs->dump(os, level + 1, leafId);
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, NumPyExpr const &expr) {
|
||||
int leafId = 0;
|
||||
expr.dump(os, 0, leafId);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::string NumPyExpr::str() const {
|
||||
std::stringstream buffer;
|
||||
buffer << *this;
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
void NumPyExpr::apply(std::function<void(NumPyExpr &)> f) {
|
||||
f(*this);
|
||||
if (lhs)
|
||||
lhs->apply(f);
|
||||
if (rhs)
|
||||
rhs->apply(f);
|
||||
}
|
||||
|
||||
Value *NumPyExpr::codegenBroadcasts(CodegenContext &C) {
|
||||
auto *M = C.M;
|
||||
auto &vars = C.vars;
|
||||
|
||||
Value *targetShape = nullptr;
|
||||
Value *result = nullptr;
|
||||
|
||||
apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf() && e.type.isArray()) {
|
||||
auto it = vars.find(&e);
|
||||
seqassertn(it != vars.end(),
|
||||
"NumPyExpr not found in vars map (codegen broadcasts)");
|
||||
auto *var = it->second;
|
||||
auto *shape = M->getOrRealizeFunc("_shape", {var->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(shape, "shape function not found");
|
||||
auto *leafShape = util::call(shape, {M->Nr<VarValue>(var)});
|
||||
|
||||
if (!targetShape) {
|
||||
targetShape = leafShape;
|
||||
} else {
|
||||
auto *diff = (*targetShape != *leafShape);
|
||||
if (result) {
|
||||
result = *result | *diff;
|
||||
} else {
|
||||
result = diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return result ? result : M->getBool(false);
|
||||
}
|
||||
|
||||
Var *NumPyExpr::codegenFusedEval(CodegenContext &C) {
|
||||
auto *M = C.M;
|
||||
auto *series = C.series;
|
||||
auto *func = C.func;
|
||||
auto &vars = C.vars;
|
||||
auto &T = C.T;
|
||||
|
||||
std::vector<std::pair<NumPyExpr *, Var *>> leaves;
|
||||
apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf()) {
|
||||
auto it = vars.find(&e);
|
||||
seqassertn(it != vars.end(), "NumPyExpr not found in vars map (fused eval)");
|
||||
auto *var = it->second;
|
||||
leaves.emplace_back(&e, var);
|
||||
}
|
||||
});
|
||||
|
||||
// Arrays for scalar expression function
|
||||
std::vector<Value *> arrays;
|
||||
std::vector<std::string> scalarFuncArgNames;
|
||||
std::vector<types::Type *> scalarFuncArgTypes;
|
||||
std::unordered_map<NumPyExpr *, Var *> scalarFuncArgMap;
|
||||
|
||||
// Scalars passed through 'extra' arg of ndarray._loop()
|
||||
std::vector<Value *> extra;
|
||||
std::unordered_map<NumPyExpr *, unsigned> extraMap;
|
||||
|
||||
auto *baseType = type.getIRBaseType(T);
|
||||
scalarFuncArgNames.push_back("out");
|
||||
scalarFuncArgTypes.push_back(M->getPointerType(baseType));
|
||||
|
||||
unsigned argIdx = 0;
|
||||
unsigned extraIdx = 0;
|
||||
|
||||
for (auto &e : leaves) {
|
||||
if (e.first->type.isArray()) {
|
||||
arrays.push_back(M->Nr<VarValue>(e.second));
|
||||
scalarFuncArgNames.push_back("in" + std::to_string(argIdx++));
|
||||
scalarFuncArgTypes.push_back(M->getPointerType(e.first->type.getIRBaseType(T)));
|
||||
} else {
|
||||
extra.push_back(M->Nr<VarValue>(e.second));
|
||||
extraMap.emplace(e.first, extraIdx++);
|
||||
}
|
||||
}
|
||||
|
||||
auto *extraTuple = util::makeTuple(extra, M);
|
||||
scalarFuncArgNames.push_back("extra");
|
||||
scalarFuncArgTypes.push_back(extraTuple->getType());
|
||||
auto *scalarFuncType = M->getFuncType(M->getNoneType(), scalarFuncArgTypes);
|
||||
auto *scalarFunc = M->Nr<BodiedFunc>("__numpy_fusion_scalar_fn");
|
||||
scalarFunc->realize(scalarFuncType, scalarFuncArgNames);
|
||||
std::vector<Var *> scalarFuncArgVars(scalarFunc->arg_begin(), scalarFunc->arg_end());
|
||||
|
||||
argIdx = 1;
|
||||
for (auto &e : leaves) {
|
||||
if (e.first->type.isArray()) {
|
||||
scalarFuncArgMap.emplace(e.first, scalarFuncArgVars[argIdx++]);
|
||||
}
|
||||
}
|
||||
auto *scalarExpr =
|
||||
codegenScalarExpr(C, scalarFuncArgMap, extraMap, scalarFuncArgVars.back());
|
||||
auto *ptrsetFunc = M->getOrRealizeFunc("_ptrset", {scalarFuncArgTypes[0], baseType},
|
||||
{}, FUSION_MODULE);
|
||||
seqassertn(ptrsetFunc, "ptrset func not found");
|
||||
scalarFunc->setBody(util::series(
|
||||
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), scalarExpr})));
|
||||
|
||||
auto *arraysTuple = util::makeTuple(arrays);
|
||||
auto *loopFunc = M->getOrRealizeFunc(
|
||||
"_loop_alloc",
|
||||
{arraysTuple->getType(), scalarFunc->getType(), extraTuple->getType()},
|
||||
{baseType}, FUSION_MODULE);
|
||||
seqassertn(loopFunc, "loop_alloc func not found");
|
||||
|
||||
auto *result = util::makeVar(
|
||||
util::call(loopFunc, {arraysTuple, M->Nr<VarValue>(scalarFunc), extraTuple}),
|
||||
series, func);
|
||||
|
||||
// Free temporary arrays
|
||||
apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf() && e.freeable) {
|
||||
auto it = vars.find(&e);
|
||||
seqassertn(it != vars.end(), "NumPyExpr not found in vars map (fused eval)");
|
||||
auto *var = it->second;
|
||||
auto *freeFunc =
|
||||
M->getOrRealizeFunc("_free", {var->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(freeFunc, "free func not found");
|
||||
series->push_back(util::call(freeFunc, {M->Nr<VarValue>(var)}));
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Var *NumPyExpr::codegenSequentialEval(CodegenContext &C) {
|
||||
auto *M = C.M;
|
||||
auto *series = C.series;
|
||||
auto *func = C.func;
|
||||
auto &vars = C.vars;
|
||||
auto &T = C.T;
|
||||
|
||||
if (isLeaf()) {
|
||||
auto it = vars.find(this);
|
||||
seqassertn(it != vars.end(),
|
||||
"NumPyExpr not found in vars map (codegen sequential eval)");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Var *lv = lhs->codegenSequentialEval(C);
|
||||
Var *rv = rhs ? rhs->codegenSequentialEval(C) : nullptr;
|
||||
Var *like = nullptr;
|
||||
Value *outShapeVal = nullptr;
|
||||
|
||||
if (rv) {
|
||||
// Can't do anything special with matmul here...
|
||||
if (op == NP_OP_MATMUL) {
|
||||
auto *matmul = M->getOrRealizeFunc("_matmul", {lv->getType(), rv->getType()}, {},
|
||||
FUSION_MODULE);
|
||||
return util::makeVar(
|
||||
util::call(matmul, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv)}), series, func);
|
||||
}
|
||||
|
||||
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(lshape, "shape func not found for left arg");
|
||||
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(rshape, "shape func not found for right arg");
|
||||
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
|
||||
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
|
||||
auto *shape = M->getOrRealizeFunc(
|
||||
"_broadcast", {leftShape->getType(), rightShape->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(shape, "output shape func not found");
|
||||
like = rhs->type.ndim > lhs->type.ndim ? rv : lv;
|
||||
outShapeVal = util::call(shape, {leftShape, rightShape});
|
||||
} else {
|
||||
auto *shape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(shape, "shape func not found");
|
||||
like = lv;
|
||||
outShapeVal = util::call(shape, {M->Nr<VarValue>(lv)});
|
||||
}
|
||||
|
||||
auto *outShape = util::makeVar(outShapeVal, series, func);
|
||||
Var *result = nullptr;
|
||||
|
||||
bool lfreeable = lhs && lhs->type.isArray() && (lhs->freeable || !lhs->isLeaf());
|
||||
bool rfreeable = rhs && rhs->type.isArray() && (rhs->freeable || !rhs->isLeaf());
|
||||
bool ltmp = lfreeable && lhs->type.dtype == type.dtype && lhs->type.ndim == type.ndim;
|
||||
bool rtmp = rfreeable && rhs->type.dtype == type.dtype && rhs->type.ndim == type.ndim;
|
||||
|
||||
auto *t = type.getIRBaseType(T);
|
||||
auto newArray = [&]() {
|
||||
auto *create = M->getOrRealizeFunc(
|
||||
"_create", {like->getType(), outShape->getType()}, {t}, FUSION_MODULE);
|
||||
seqassertn(create, "create func not found");
|
||||
return util::call(create, {M->Nr<VarValue>(like), M->Nr<VarValue>(outShape)});
|
||||
};
|
||||
|
||||
bool freeLeftStatic = false;
|
||||
bool freeRightStatic = false;
|
||||
Var *lcond = nullptr;
|
||||
Var *rcond = nullptr;
|
||||
|
||||
if (rv) {
|
||||
if (ltmp && rhs->type.ndim == 0) {
|
||||
// We are adding lhs temp array to const or 0-dim array, so reuse lhs array.
|
||||
result = lv;
|
||||
} else if (rtmp && lhs->type.ndim == 0) {
|
||||
// We are adding rhs temp array to const or 0-dim array, so reuse rhs array.
|
||||
result = rv;
|
||||
} else if (!ltmp && !rtmp) {
|
||||
// Neither operand is a temp array, so we must allocate a new array.
|
||||
result = util::makeVar(newArray(), series, func);
|
||||
freeLeftStatic = lfreeable;
|
||||
freeRightStatic = rfreeable;
|
||||
} else if (ltmp && rtmp) {
|
||||
// We won't know until runtime if we can reuse the temp array(s) since they
|
||||
// might broadcast.
|
||||
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(lshape, "shape function func not found for left arg");
|
||||
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(rshape, "shape function func not found for right arg");
|
||||
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
|
||||
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
|
||||
lcond = util::makeVar(*leftShape == *M->Nr<VarValue>(outShape), series, func);
|
||||
rcond = util::makeVar(*rightShape == *M->Nr<VarValue>(outShape), series, func);
|
||||
auto *arr = M->Nr<TernaryInstr>(
|
||||
M->Nr<VarValue>(lcond), M->Nr<VarValue>(lv),
|
||||
M->Nr<TernaryInstr>(M->Nr<VarValue>(rcond), M->Nr<VarValue>(rv), newArray()));
|
||||
result = util::makeVar(arr, series, func);
|
||||
} else if (ltmp && !rtmp) {
|
||||
// We won't know until runtime if we can reuse the temp array(s) since they
|
||||
// might broadcast.
|
||||
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(lshape, "shape function func not found for left arg");
|
||||
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
|
||||
lcond = util::makeVar(*leftShape == *M->Nr<VarValue>(outShape), series, func);
|
||||
auto *arr =
|
||||
M->Nr<TernaryInstr>(M->Nr<VarValue>(lcond), M->Nr<VarValue>(lv), newArray());
|
||||
result = util::makeVar(arr, series, func);
|
||||
freeRightStatic = rfreeable;
|
||||
} else if (!ltmp && rtmp) {
|
||||
// We won't know until runtime if we can reuse the temp array(s) since they
|
||||
// might broadcast.
|
||||
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(rshape, "shape function func not found for right arg");
|
||||
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
|
||||
rcond = util::makeVar(*rightShape == *M->Nr<VarValue>(outShape), series, func);
|
||||
auto *arr =
|
||||
M->Nr<TernaryInstr>(M->Nr<VarValue>(rcond), M->Nr<VarValue>(rv), newArray());
|
||||
result = util::makeVar(arr, series, func);
|
||||
freeLeftStatic = lfreeable;
|
||||
}
|
||||
} else {
|
||||
if (ltmp) {
|
||||
result = lv;
|
||||
} else {
|
||||
result = util::makeVar(newArray(), series, func);
|
||||
freeLeftStatic = lfreeable;
|
||||
}
|
||||
}
|
||||
|
||||
auto opstr = opstring();
|
||||
|
||||
if (haveVectorizedLoop()) {
|
||||
// We have a vectorized loop available for this operations.
|
||||
if (rv) {
|
||||
auto *vecloop = M->getOrRealizeFunc(
|
||||
"_apply_vectorized_loop_binary",
|
||||
{lv->getType(), rv->getType(), result->getType()}, {opstr}, FUSION_MODULE);
|
||||
seqassertn(vecloop, "binary vec loop func not found ({})", opstr);
|
||||
series->push_back(util::call(vecloop, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv),
|
||||
M->Nr<VarValue>(result)}));
|
||||
} else {
|
||||
auto *vecloop = M->getOrRealizeFunc("_apply_vectorized_loop_unary",
|
||||
{lv->getType(), result->getType()}, {opstr},
|
||||
FUSION_MODULE);
|
||||
seqassertn(vecloop, "unary vec loop func not found ({})", opstr);
|
||||
series->push_back(
|
||||
util::call(vecloop, {M->Nr<VarValue>(lv), M->Nr<VarValue>(result)}));
|
||||
}
|
||||
} else {
|
||||
// Arrays for scalar expression function
|
||||
std::vector<Value *> arrays = {M->Nr<VarValue>(result)};
|
||||
std::vector<std::string> scalarFuncArgNames;
|
||||
std::vector<types::Type *> scalarFuncArgTypes;
|
||||
std::unordered_map<NumPyExpr *, Var *> scalarFuncArgMap;
|
||||
|
||||
// Scalars passed through 'extra' arg of ndarray._loop()
|
||||
std::vector<Value *> extra;
|
||||
|
||||
auto *baseType = type.getIRBaseType(T);
|
||||
scalarFuncArgNames.push_back("out");
|
||||
scalarFuncArgTypes.push_back(M->getPointerType(baseType));
|
||||
|
||||
if (lhs->type.isArray()) {
|
||||
if (result != lv) {
|
||||
scalarFuncArgNames.push_back("in0");
|
||||
scalarFuncArgTypes.push_back(M->getPointerType(lhs->type.getIRBaseType(T)));
|
||||
arrays.push_back(M->Nr<VarValue>(lv));
|
||||
}
|
||||
} else {
|
||||
extra.push_back(M->Nr<VarValue>(lv));
|
||||
}
|
||||
|
||||
if (rv) {
|
||||
if (rhs->type.isArray()) {
|
||||
if (result != rv) {
|
||||
scalarFuncArgNames.push_back("in1");
|
||||
scalarFuncArgTypes.push_back(M->getPointerType(rhs->type.getIRBaseType(T)));
|
||||
arrays.push_back(M->Nr<VarValue>(rv));
|
||||
}
|
||||
} else {
|
||||
extra.push_back(M->Nr<VarValue>(rv));
|
||||
}
|
||||
}
|
||||
|
||||
auto *extraTuple = util::makeTuple(extra, M);
|
||||
scalarFuncArgNames.push_back("extra");
|
||||
scalarFuncArgTypes.push_back(extraTuple->getType());
|
||||
auto *scalarFuncType = M->getFuncType(M->getNoneType(), scalarFuncArgTypes);
|
||||
auto *scalarFunc = M->Nr<BodiedFunc>("__numpy_fusion_scalar_fn");
|
||||
scalarFunc->realize(scalarFuncType, scalarFuncArgNames);
|
||||
std::vector<Var *> scalarFuncArgVars(scalarFunc->arg_begin(),
|
||||
scalarFunc->arg_end());
|
||||
auto *body = M->Nr<SeriesFlow>();
|
||||
auto name = "_" + opstr;
|
||||
|
||||
auto deref = [&](unsigned idx) {
|
||||
return (*M->Nr<VarValue>(scalarFuncArgVars[idx]))[*M->getInt(0)];
|
||||
};
|
||||
|
||||
if (rv) {
|
||||
Value *litem = nullptr;
|
||||
Value *ritem = nullptr;
|
||||
|
||||
if (lhs->type.isArray() && rhs->type.isArray()) {
|
||||
if (result == lv) {
|
||||
litem = deref(0);
|
||||
ritem = deref(1);
|
||||
} else if (result == rv) {
|
||||
litem = deref(1);
|
||||
ritem = deref(0);
|
||||
} else {
|
||||
litem = deref(1);
|
||||
ritem = deref(2);
|
||||
}
|
||||
} else if (lhs->type.isArray()) {
|
||||
if (result == lv) {
|
||||
litem = deref(0);
|
||||
} else {
|
||||
litem = deref(1);
|
||||
}
|
||||
ritem = util::tupleGet(M->Nr<VarValue>(scalarFuncArgVars.back()), 0);
|
||||
} else if (rhs->type.isArray()) {
|
||||
if (result == rv) {
|
||||
ritem = deref(0);
|
||||
} else {
|
||||
ritem = deref(1);
|
||||
}
|
||||
litem = util::tupleGet(M->Nr<VarValue>(scalarFuncArgVars.back()), 0);
|
||||
} else {
|
||||
seqassertn(false, "both lhs are rhs are scalars");
|
||||
}
|
||||
|
||||
auto *commonType = decideTypes(this, lhs->type, rhs->type, T);
|
||||
|
||||
auto *lcast =
|
||||
M->getOrRealizeFunc("_cast", {litem->getType()}, {commonType}, FUSION_MODULE);
|
||||
seqassertn(lcast, "cast func not found for left arg");
|
||||
litem = util::call(lcast, {litem});
|
||||
|
||||
auto *rcast =
|
||||
M->getOrRealizeFunc("_cast", {ritem->getType()}, {commonType}, FUSION_MODULE);
|
||||
seqassertn(rcast, "cast func not found for left arg");
|
||||
ritem = util::call(rcast, {ritem});
|
||||
|
||||
auto *op = M->getOrRealizeFunc(name, {litem->getType(), ritem->getType()}, {},
|
||||
FUSION_MODULE);
|
||||
seqassertn(op, "2-op func '{}' not found", name);
|
||||
auto *oitem = util::call(op, {litem, ritem});
|
||||
auto *ptrsetFunc = M->getOrRealizeFunc(
|
||||
"_ptrset", {scalarFuncArgTypes[0], oitem->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(ptrsetFunc, "ptrset func not found");
|
||||
body->push_back(
|
||||
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), oitem}));
|
||||
} else {
|
||||
auto *litem = deref(result == lv ? 0 : 1);
|
||||
auto *op = M->getOrRealizeFunc(name, {litem->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(op, "1-op func '{}' not found", name);
|
||||
auto *oitem = util::call(op, {litem});
|
||||
auto *ptrsetFunc = M->getOrRealizeFunc(
|
||||
"_ptrset", {scalarFuncArgTypes[0], oitem->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(ptrsetFunc, "ptrset func not found");
|
||||
body->push_back(
|
||||
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), oitem}));
|
||||
}
|
||||
|
||||
scalarFunc->setBody(body);
|
||||
auto *arraysTuple = util::makeTuple(arrays);
|
||||
auto *loopFunc = M->getOrRealizeFunc(
|
||||
"_loop_basic",
|
||||
{arraysTuple->getType(), scalarFunc->getType(), extraTuple->getType()}, {},
|
||||
FUSION_MODULE);
|
||||
seqassertn(loopFunc, "loop_basic func not found");
|
||||
series->push_back(
|
||||
util::call(loopFunc, {arraysTuple, M->Nr<VarValue>(scalarFunc), extraTuple}));
|
||||
}
|
||||
|
||||
auto freeArray = [&](Var *arr) {
|
||||
auto *freeFunc = M->getOrRealizeFunc("_free", {arr->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(freeFunc, "free func not found");
|
||||
return util::call(freeFunc, {M->Nr<VarValue>(arr)});
|
||||
};
|
||||
|
||||
seqassertn(!(freeLeftStatic && lcond), "unexpected free conditions for left arg");
|
||||
seqassertn(!(freeRightStatic && rcond), "unexpected free conditions for right arg");
|
||||
|
||||
if (lcond && rcond) {
|
||||
series->push_back(M->Nr<IfFlow>(
|
||||
M->Nr<VarValue>(lcond), util::series(freeArray(rv)),
|
||||
util::series(freeArray(lv),
|
||||
M->Nr<IfFlow>(M->Nr<VarValue>(rcond), M->Nr<SeriesFlow>(),
|
||||
util::series(freeArray(rv))))));
|
||||
} else {
|
||||
if (freeLeftStatic) {
|
||||
series->push_back(freeArray(lv));
|
||||
} else if (lcond) {
|
||||
series->push_back(M->Nr<IfFlow>(M->Nr<VarValue>(lcond), M->Nr<SeriesFlow>(),
|
||||
util::series(freeArray(lv))));
|
||||
}
|
||||
|
||||
if (freeRightStatic) {
|
||||
series->push_back(freeArray(rv));
|
||||
} else if (rcond) {
|
||||
series->push_back(M->Nr<IfFlow>(M->Nr<VarValue>(rcond), M->Nr<SeriesFlow>(),
|
||||
util::series(freeArray(rv))));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
BroadcastInfo NumPyExpr::getBroadcastInfo() {
|
||||
int64_t arrDim = -1;
|
||||
Var *varLeaf = nullptr;
|
||||
bool multipleLeafVars = false;
|
||||
int numNonVarLeafArrays = 0;
|
||||
bool definitelyBroadcasts = false;
|
||||
|
||||
apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf() && e.type.isArray()) {
|
||||
if (arrDim == -1) {
|
||||
arrDim = e.type.ndim;
|
||||
} else if (arrDim != e.type.ndim) {
|
||||
definitelyBroadcasts = true;
|
||||
}
|
||||
|
||||
if (auto *v = cast<VarValue>(e.val)) {
|
||||
if (varLeaf) {
|
||||
if (varLeaf != v->getVar())
|
||||
multipleLeafVars = true;
|
||||
} else {
|
||||
varLeaf = v->getVar();
|
||||
}
|
||||
} else {
|
||||
++numNonVarLeafArrays;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
bool mightBroadcast = numNonVarLeafArrays > 1 || multipleLeafVars ||
|
||||
(numNonVarLeafArrays == 1 && varLeaf);
|
||||
if (definitelyBroadcasts) {
|
||||
return BroadcastInfo::YES;
|
||||
} else if (mightBroadcast) {
|
||||
return BroadcastInfo::MAYBE;
|
||||
} else {
|
||||
return BroadcastInfo::NO;
|
||||
}
|
||||
}
|
||||
|
||||
Value *NumPyExpr::codegenScalarExpr(
|
||||
CodegenContext &C, const std::unordered_map<NumPyExpr *, Var *> &args,
|
||||
const std::unordered_map<NumPyExpr *, unsigned> &scalarMap, Var *scalars) {
|
||||
auto *M = C.M;
|
||||
auto &T = C.T;
|
||||
|
||||
Value *lv = lhs ? lhs->codegenScalarExpr(C, args, scalarMap, scalars) : nullptr;
|
||||
Value *rv = rhs ? rhs->codegenScalarExpr(C, args, scalarMap, scalars) : nullptr;
|
||||
auto name = "_" + opstring();
|
||||
|
||||
if (lv && rv) {
|
||||
auto *t = type.getIRBaseType(T);
|
||||
auto *commonType = decideTypes(this, lhs->type, rhs->type, T);
|
||||
auto *cast1 =
|
||||
M->getOrRealizeFunc("_cast", {lv->getType()}, {commonType}, FUSION_MODULE);
|
||||
auto *cast2 =
|
||||
M->getOrRealizeFunc("_cast", {rv->getType()}, {commonType}, FUSION_MODULE);
|
||||
lv = util::call(cast1, {lv});
|
||||
rv = util::call(cast2, {rv});
|
||||
auto *f =
|
||||
M->getOrRealizeFunc(name, {lv->getType(), rv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(f, "2-op func '{}' not found", name);
|
||||
return util::call(f, {lv, rv});
|
||||
} else if (lv) {
|
||||
auto *t = type.getIRBaseType(T);
|
||||
auto *f = M->getOrRealizeFunc(name, {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(f, "1-op func '{}' not found", name);
|
||||
return util::call(f, {lv});
|
||||
} else {
|
||||
if (type.isArray()) {
|
||||
auto it = args.find(this);
|
||||
seqassertn(it != args.end(), "NumPyExpr not found in args map (codegen expr)");
|
||||
auto *var = it->second;
|
||||
return (*M->Nr<VarValue>(var))[*M->getInt(0)];
|
||||
} else {
|
||||
auto it = scalarMap.find(this);
|
||||
seqassertn(it != scalarMap.end(),
|
||||
"NumPyExpr not found in scalar map (codegen expr)");
|
||||
auto idx = it->second;
|
||||
return util::tupleGet(M->Nr<VarValue>(scalars), idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace numpy
|
||||
} // namespace transform
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,385 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "numpy.h"
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace transform {
|
||||
namespace numpy {
|
||||
namespace {
|
||||
using CFG = analyze::dataflow::CFGraph;
|
||||
using CFBlock = analyze::dataflow::CFBlock;
|
||||
using RD = analyze::dataflow::RDInspector;
|
||||
using SE = analyze::module::SideEffectResult;
|
||||
|
||||
struct GetVars : public util::Operator {
|
||||
std::unordered_set<id_t> &vids;
|
||||
|
||||
explicit GetVars(std::unordered_set<id_t> &vids) : util::Operator(), vids(vids) {}
|
||||
|
||||
void preHook(Node *v) override {
|
||||
for (auto *var : v->getUsedVariables()) {
|
||||
if (!isA<Func>(var))
|
||||
vids.insert(var->getId());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct OkToForwardPast : public util::Operator {
|
||||
std::unordered_set<id_t> &vids;
|
||||
const std::unordered_map<id_t, NumPyExpr *> &parsedValues;
|
||||
SE *se;
|
||||
bool ok;
|
||||
|
||||
OkToForwardPast(std::unordered_set<id_t> &vids,
|
||||
const std::unordered_map<id_t, NumPyExpr *> &parsedValues, SE *se)
|
||||
: util::Operator(), vids(vids), parsedValues(parsedValues), se(se), ok(true) {}
|
||||
|
||||
void preHook(Node *v) override {
|
||||
if (!ok) {
|
||||
return;
|
||||
} else if (auto *assign = cast<AssignInstr>(v)) {
|
||||
if (vids.count(assign->getLhs()->getId()))
|
||||
ok = false;
|
||||
} else if (auto *val = cast<Value>(v)) {
|
||||
auto it = parsedValues.find(val->getId());
|
||||
if (it != parsedValues.end()) {
|
||||
it->second->apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf() && se->hasSideEffect(e.val))
|
||||
ok = false;
|
||||
});
|
||||
// Skip children since we are processing them manually above.
|
||||
for (auto *used : val->getUsedValues())
|
||||
see(used);
|
||||
} else if (se->hasSideEffect(val)) {
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct GetAllUses : public util::Operator {
|
||||
Var *var;
|
||||
std::vector<Value *> &uses;
|
||||
|
||||
GetAllUses(Var *var, std::vector<Value *> &uses)
|
||||
: util::Operator(), var(var), uses(uses) {}
|
||||
|
||||
void preHook(Node *n) override {
|
||||
if (auto *v = cast<Value>(n)) {
|
||||
auto vars = v->getUsedVariables();
|
||||
if (std::find(vars.begin(), vars.end(), var) != vars.end())
|
||||
uses.push_back(v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool canForwardExpressionAlongPath(
|
||||
Value *source, Value *destination, std::unordered_set<id_t> &vids,
|
||||
const std::unordered_map<id_t, NumPyExpr *> &parsedValues, SE *se,
|
||||
const std::vector<CFBlock *> &path) {
|
||||
if (path.empty())
|
||||
return true;
|
||||
|
||||
bool go = false;
|
||||
for (auto *block : path) {
|
||||
for (const auto *value : *block) {
|
||||
// Skip things before 'source' in first block
|
||||
if (!go && block == path.front() && value == source) {
|
||||
go = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip things after 'destination' in last block
|
||||
if (go && block == path.back() && value == destination) {
|
||||
go = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!go)
|
||||
continue;
|
||||
|
||||
OkToForwardPast check(vids, parsedValues, se);
|
||||
const_cast<Value *>(value)->accept(check);
|
||||
if (!check.ok)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool canForwardExpression(NumPyOptimizationUnit *expr, Value *target,
|
||||
const std::unordered_map<id_t, NumPyExpr *> &parsedValues,
|
||||
CFG *cfg, SE *se) {
|
||||
std::unordered_set<id_t> vids;
|
||||
bool pure = true;
|
||||
|
||||
expr->expr->apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf()) {
|
||||
if (se->hasSideEffect(e.val)) {
|
||||
pure = false;
|
||||
} else {
|
||||
GetVars gv(vids);
|
||||
e.val->accept(gv);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (!pure)
|
||||
return false;
|
||||
|
||||
auto *source = expr->assign;
|
||||
auto *start = cfg->getBlock(source);
|
||||
auto *end = cfg->getBlock(target);
|
||||
seqassertn(start, "start CFG block not found");
|
||||
seqassertn(end, "end CFG block not found");
|
||||
bool ok = true;
|
||||
|
||||
std::function<void(CFBlock *, std::vector<CFBlock *> &)> dfs =
|
||||
[&](CFBlock *curr, std::vector<CFBlock *> &path) {
|
||||
path.push_back(curr);
|
||||
if (curr == end) {
|
||||
if (!canForwardExpressionAlongPath(source, target, vids, parsedValues, se,
|
||||
path))
|
||||
ok = false;
|
||||
} else {
|
||||
for (auto it = curr->successors_begin(); it != curr->successors_end(); ++it) {
|
||||
if (std::find(path.begin(), path.end(), *it) != path.end())
|
||||
dfs(*it, path);
|
||||
}
|
||||
}
|
||||
path.pop_back();
|
||||
};
|
||||
|
||||
std::vector<CFBlock *> path;
|
||||
dfs(start, path);
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool canForwardVariable(AssignInstr *assign, Value *destination, BodiedFunc *func,
|
||||
RD *rd) {
|
||||
auto *var = assign->getLhs();
|
||||
|
||||
// Check 1: Only the given assignment should reach the destination.
|
||||
auto reaching = rd->getReachingDefinitions(var, destination);
|
||||
if (reaching.size() != 1 && *reaching.begin() != assign->getRhs()->getId())
|
||||
return false;
|
||||
|
||||
// Check 2: There should be no other uses of the variable that the given assignment
|
||||
// reaches.
|
||||
std::vector<Value *> uses;
|
||||
GetAllUses gu(var, uses);
|
||||
func->accept(gu);
|
||||
for (auto *use : uses) {
|
||||
if (use != destination && use->getId() != assign->getId() &&
|
||||
rd->getReachingDefinitions(var, use).count(assign->getRhs()->getId()))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ForwardingDAG buildForwardingDAG(BodiedFunc *func, RD *rd, CFG *cfg, SE *se,
|
||||
std::vector<NumPyOptimizationUnit> &exprs) {
|
||||
std::unordered_map<id_t, NumPyExpr *> parsedValues;
|
||||
for (auto &e : exprs) {
|
||||
e.expr->apply([&](NumPyExpr &e) {
|
||||
if (e.val)
|
||||
parsedValues.emplace(e.val->getId(), &e);
|
||||
});
|
||||
}
|
||||
|
||||
ForwardingDAG dag;
|
||||
int64_t dstId = 0;
|
||||
for (auto &dst : exprs) {
|
||||
auto *target = dst.expr.get();
|
||||
auto &forwardingVec = dag[&dst];
|
||||
|
||||
std::vector<std::pair<Var *, NumPyExpr *>> vars;
|
||||
target->apply([&](NumPyExpr &e) {
|
||||
if (e.isLeaf()) {
|
||||
if (auto *v = cast<VarValue>(e.val)) {
|
||||
vars.emplace_back(v->getVar(), &e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (auto &p : vars) {
|
||||
int64_t srcId = 0;
|
||||
for (auto &src : exprs) {
|
||||
if (srcId != dstId && src.assign && src.assign->getLhs() == p.first) {
|
||||
auto checkFwdVar = canForwardVariable(src.assign, p.second->val, func, rd);
|
||||
auto checkFwdExpr =
|
||||
canForwardExpression(&src, p.second->val, parsedValues, cfg, se);
|
||||
if (checkFwdVar && checkFwdExpr)
|
||||
forwardingVec.push_back({&dst, &src, p.first, p.second, dstId, srcId});
|
||||
}
|
||||
++srcId;
|
||||
}
|
||||
}
|
||||
++dstId;
|
||||
}
|
||||
|
||||
return dag;
|
||||
}
|
||||
|
||||
struct UnionFind {
|
||||
std::vector<int64_t> parent;
|
||||
std::vector<int64_t> rank;
|
||||
|
||||
explicit UnionFind(int64_t n) : parent(n), rank(n) {
|
||||
for (auto i = 0; i < n; i++) {
|
||||
parent[i] = i;
|
||||
rank[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t find(int64_t u) {
|
||||
if (parent[u] != u)
|
||||
parent[u] = find(parent[u]);
|
||||
return parent[u];
|
||||
}
|
||||
|
||||
void union_(int64_t u, int64_t v) {
|
||||
auto ru = find(u);
|
||||
auto rv = find(v);
|
||||
if (ru != rv) {
|
||||
if (rank[ru] > rank[rv]) {
|
||||
parent[rv] = ru;
|
||||
} else if (rank[ru] < rank[rv]) {
|
||||
parent[ru] = rv;
|
||||
} else {
|
||||
parent[rv] = ru;
|
||||
++rank[ru];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<ForwardingDAG>
|
||||
getForwardingDAGConnectedComponents(ForwardingDAG &dag,
|
||||
std::vector<NumPyOptimizationUnit> &exprs) {
|
||||
auto n = exprs.size();
|
||||
UnionFind uf(n);
|
||||
|
||||
for (auto i = 0; i < n; i++) {
|
||||
for (auto &fwd : dag[&exprs[i]]) {
|
||||
uf.union_(i, fwd.srcId);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<NumPyOptimizationUnit *>> components(n);
|
||||
for (auto i = 0; i < n; i++) {
|
||||
auto root = uf.find(i);
|
||||
components[root].push_back(&exprs[i]);
|
||||
}
|
||||
|
||||
std::vector<ForwardingDAG> result;
|
||||
for (auto &c : components) {
|
||||
if (c.empty())
|
||||
continue;
|
||||
|
||||
ForwardingDAG d;
|
||||
for (auto *expr : c)
|
||||
d.emplace(expr, dag[expr]);
|
||||
result.push_back(d);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool hasCycleHelper(int64_t v, ForwardingDAG &dag,
|
||||
std::vector<NumPyOptimizationUnit> &exprs,
|
||||
std::vector<bool> &visited, std::vector<bool> &recStack) {
|
||||
visited[v] = true;
|
||||
recStack[v] = true;
|
||||
|
||||
for (auto &neighbor : dag[&exprs[v]]) {
|
||||
if (!visited[neighbor.srcId]) {
|
||||
if (hasCycleHelper(neighbor.srcId, dag, exprs, visited, recStack))
|
||||
return true;
|
||||
} else if (recStack[neighbor.srcId]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
recStack[v] = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasCycle(ForwardingDAG &dag, std::vector<NumPyOptimizationUnit> &exprs) {
|
||||
auto n = exprs.size();
|
||||
std::vector<bool> visited(n, false);
|
||||
std::vector<bool> recStack(n, false);
|
||||
|
||||
for (auto i = 0; i < n; i++) {
|
||||
if (dag.find(&exprs[i]) != dag.end() && !visited[i] &&
|
||||
hasCycleHelper(i, dag, exprs, visited, recStack))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void doForwardingHelper(ForwardingDAG &dag, NumPyOptimizationUnit *curr,
|
||||
std::unordered_set<NumPyOptimizationUnit *> &done,
|
||||
std::vector<AssignInstr *> &assignsToDelete) {
|
||||
if (done.count(curr))
|
||||
return;
|
||||
|
||||
auto forwardings = dag[curr];
|
||||
for (auto &fwd : forwardings) {
|
||||
doForwardingHelper(dag, fwd.src, done, assignsToDelete);
|
||||
// Note that order of leaves here doesn't matter since they're guaranteed to have no
|
||||
// side effects based on forwarding checks.
|
||||
fwd.dst->leaves.insert(fwd.dst->leaves.end(), fwd.src->leaves.begin(),
|
||||
fwd.src->leaves.end());
|
||||
fwd.dstLeaf->replace(*fwd.src->expr);
|
||||
assignsToDelete.push_back(fwd.src->assign);
|
||||
}
|
||||
|
||||
done.insert(curr);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<ForwardingDAG>
|
||||
getForwardingDAGs(BodiedFunc *func, RD *rd, CFG *cfg, SE *se,
|
||||
std::vector<NumPyOptimizationUnit> &exprs) {
|
||||
auto dag = buildForwardingDAG(func, rd, cfg, se, exprs);
|
||||
auto dags = getForwardingDAGConnectedComponents(dag, exprs);
|
||||
dags.erase(std::remove_if(dags.begin(), dags.end(),
|
||||
[&](ForwardingDAG &dag) { return hasCycle(dag, exprs); }),
|
||||
dags.end());
|
||||
return dags;
|
||||
}
|
||||
|
||||
NumPyOptimizationUnit *doForwarding(ForwardingDAG &dag,
|
||||
std::vector<AssignInstr *> &assignsToDelete) {
|
||||
seqassertn(!dag.empty(), "empty forwarding DAG encountered");
|
||||
std::unordered_set<NumPyOptimizationUnit *> done;
|
||||
for (auto &e : dag) {
|
||||
doForwardingHelper(dag, e.first, done, assignsToDelete);
|
||||
}
|
||||
|
||||
// Find the root
|
||||
std::unordered_set<NumPyOptimizationUnit *> notRoot;
|
||||
for (auto &e : dag) {
|
||||
for (auto &f : e.second) {
|
||||
notRoot.insert(f.src);
|
||||
}
|
||||
}
|
||||
seqassertn(notRoot.size() == dag.size() - 1,
|
||||
"multiple roots found in forwarding DAG");
|
||||
|
||||
for (auto &e : dag) {
|
||||
if (notRoot.count(e.first) == 0)
|
||||
return e.first;
|
||||
}
|
||||
|
||||
seqassertn(false, "could not find root in forwarding DAG");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace numpy
|
||||
} // namespace transform
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,877 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "numpy.h"
|
||||
|
||||
#include "codon/cir/analyze/dataflow/reaching.h"
|
||||
#include "codon/cir/analyze/module/global_vars.h"
|
||||
#include "codon/cir/analyze/module/side_effect.h"
|
||||
#include "codon/cir/util/cloning.h"
|
||||
#include "codon/cir/util/irtools.h"
|
||||
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <complex>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#define XLOG(c, ...) \
|
||||
do { \
|
||||
if (Verbose) \
|
||||
LOG(c, ##__VA_ARGS__); \
|
||||
} while (false)
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace transform {
|
||||
namespace numpy {
|
||||
namespace {
|
||||
llvm::cl::opt<int> AlwaysFuseCostThreshold(
|
||||
"npfuse-always", llvm::cl::desc("Expression cost below which (<=) to always fuse"),
|
||||
llvm::cl::init(10));
|
||||
|
||||
llvm::cl::opt<int> NeverFuseCostThreshold(
|
||||
"npfuse-never", llvm::cl::desc("Expression cost above which (>) to never fuse"),
|
||||
llvm::cl::init(50));
|
||||
|
||||
llvm::cl::opt<bool> Verbose("npfuse-verbose",
|
||||
llvm::cl::desc("Print information about fused expressions"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
bool isArrayType(types::Type *t) {
|
||||
return t && isA<types::RecordType>(t) &&
|
||||
t->getName().rfind("std.numpy.ndarray.ndarray[", 0) == 0;
|
||||
}
|
||||
|
||||
bool isUFuncType(types::Type *t) {
|
||||
return t && (t->getName().rfind("std.numpy.ufunc.UnaryUFunc[", 0) == 0 ||
|
||||
t->getName().rfind("std.numpy.ufunc.BinaryUFunc[", 0) == 0);
|
||||
}
|
||||
|
||||
bool isNoneType(types::Type *t, NumPyPrimitiveTypes &T) {
|
||||
return t && (t->is(T.none) || t->is(T.optnone));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const std::string FUSION_MODULE = "std.numpy.fusion";
|
||||
|
||||
NumPyPrimitiveTypes::NumPyPrimitiveTypes(Module *M)
|
||||
: none(M->getNoneType()), optnone(M->getOptionalType(none)),
|
||||
bool_(M->getBoolType()), i8(M->getIntNType(8, true)),
|
||||
u8(M->getIntNType(8, false)), i16(M->getIntNType(16, true)),
|
||||
u16(M->getIntNType(16, false)), i32(M->getIntNType(32, true)),
|
||||
u32(M->getIntNType(32, false)), i64(M->getIntType()),
|
||||
u64(M->getIntNType(64, false)), f16(M->getFloat16Type()),
|
||||
f32(M->getFloat32Type()), f64(M->getFloatType()),
|
||||
c64(M->getType("std.internal.types.complex.complex64")),
|
||||
c128(M->getType("std.internal.types.complex.complex")) {}
|
||||
|
||||
NumPyType::NumPyType(Type dtype, int64_t ndim) : dtype(dtype), ndim(ndim) {
|
||||
seqassertn(ndim >= 0, "ndim must be non-negative");
|
||||
}
|
||||
|
||||
NumPyType::NumPyType() : NumPyType(NP_TYPE_NONE) {}
|
||||
|
||||
NumPyType NumPyType::get(types::Type *t, NumPyPrimitiveTypes &T) {
|
||||
if (t->is(T.bool_))
|
||||
return {NumPyType::NP_TYPE_BOOL};
|
||||
if (t->is(T.i8))
|
||||
return {NumPyType::NP_TYPE_I8};
|
||||
if (t->is(T.u8))
|
||||
return {NumPyType::NP_TYPE_U8};
|
||||
if (t->is(T.i16))
|
||||
return {NumPyType::NP_TYPE_I16};
|
||||
if (t->is(T.u16))
|
||||
return {NumPyType::NP_TYPE_U16};
|
||||
if (t->is(T.i32))
|
||||
return {NumPyType::NP_TYPE_I32};
|
||||
if (t->is(T.u32))
|
||||
return {NumPyType::NP_TYPE_U32};
|
||||
if (t->is(T.i64))
|
||||
return {NumPyType::NP_TYPE_I64};
|
||||
if (t->is(T.u64))
|
||||
return {NumPyType::NP_TYPE_U64};
|
||||
if (t->is(T.f16))
|
||||
return {NumPyType::NP_TYPE_F16};
|
||||
if (t->is(T.f32))
|
||||
return {NumPyType::NP_TYPE_F32};
|
||||
if (t->is(T.f64))
|
||||
return {NumPyType::NP_TYPE_F64};
|
||||
if (t->is(T.c64))
|
||||
return {NumPyType::NP_TYPE_C64};
|
||||
if (t->is(T.c128))
|
||||
return {NumPyType::NP_TYPE_C128};
|
||||
if (isArrayType(t)) {
|
||||
auto generics = t->getGenerics();
|
||||
seqassertn(generics.size() == 2 && generics[0].isType() && generics[1].isStatic(),
|
||||
"unrecognized ndarray generics");
|
||||
auto *dtype = generics[0].getTypeValue();
|
||||
auto ndim = generics[1].getStaticValue();
|
||||
if (dtype->is(T.bool_))
|
||||
return {NumPyType::NP_TYPE_ARR_BOOL, ndim};
|
||||
if (dtype->is(T.i8))
|
||||
return {NumPyType::NP_TYPE_ARR_I8, ndim};
|
||||
if (dtype->is(T.u8))
|
||||
return {NumPyType::NP_TYPE_ARR_U8, ndim};
|
||||
if (dtype->is(T.i16))
|
||||
return {NumPyType::NP_TYPE_ARR_I16, ndim};
|
||||
if (dtype->is(T.u16))
|
||||
return {NumPyType::NP_TYPE_ARR_U16, ndim};
|
||||
if (dtype->is(T.i32))
|
||||
return {NumPyType::NP_TYPE_ARR_I32, ndim};
|
||||
if (dtype->is(T.u32))
|
||||
return {NumPyType::NP_TYPE_ARR_U32, ndim};
|
||||
if (dtype->is(T.i64))
|
||||
return {NumPyType::NP_TYPE_ARR_I64, ndim};
|
||||
if (dtype->is(T.u64))
|
||||
return {NumPyType::NP_TYPE_ARR_U64, ndim};
|
||||
if (dtype->is(T.f16))
|
||||
return {NumPyType::NP_TYPE_ARR_F16, ndim};
|
||||
if (dtype->is(T.f32))
|
||||
return {NumPyType::NP_TYPE_ARR_F32, ndim};
|
||||
if (dtype->is(T.f64))
|
||||
return {NumPyType::NP_TYPE_ARR_F64, ndim};
|
||||
if (dtype->is(T.c64))
|
||||
return {NumPyType::NP_TYPE_ARR_C64, ndim};
|
||||
if (dtype->is(T.c128))
|
||||
return {NumPyType::NP_TYPE_ARR_C128, ndim};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
types::Type *NumPyType::getIRBaseType(NumPyPrimitiveTypes &T) const {
|
||||
switch (dtype) {
|
||||
case NP_TYPE_NONE:
|
||||
seqassertn(false, "unexpected type code (NONE)");
|
||||
return nullptr;
|
||||
case NP_TYPE_BOOL:
|
||||
return T.bool_;
|
||||
case NP_TYPE_I8:
|
||||
return T.i8;
|
||||
case NP_TYPE_U8:
|
||||
return T.u8;
|
||||
case NP_TYPE_I16:
|
||||
return T.i16;
|
||||
case NP_TYPE_U16:
|
||||
return T.u16;
|
||||
case NP_TYPE_I32:
|
||||
return T.i32;
|
||||
case NP_TYPE_U32:
|
||||
return T.u32;
|
||||
case NP_TYPE_I64:
|
||||
return T.i64;
|
||||
case NP_TYPE_U64:
|
||||
return T.u64;
|
||||
case NP_TYPE_F16:
|
||||
return T.f16;
|
||||
case NP_TYPE_F32:
|
||||
return T.f32;
|
||||
case NP_TYPE_F64:
|
||||
return T.f64;
|
||||
case NP_TYPE_C64:
|
||||
return T.c64;
|
||||
case NP_TYPE_C128:
|
||||
return T.c128;
|
||||
case NP_TYPE_SCALAR_END:
|
||||
seqassertn(false, "unexpected type code (SCALAR_END)");
|
||||
return nullptr;
|
||||
case NP_TYPE_ARR_BOOL:
|
||||
return T.bool_;
|
||||
case NP_TYPE_ARR_I8:
|
||||
return T.i8;
|
||||
case NP_TYPE_ARR_U8:
|
||||
return T.u8;
|
||||
case NP_TYPE_ARR_I16:
|
||||
return T.i16;
|
||||
case NP_TYPE_ARR_U16:
|
||||
return T.u16;
|
||||
case NP_TYPE_ARR_I32:
|
||||
return T.i32;
|
||||
case NP_TYPE_ARR_U32:
|
||||
return T.u32;
|
||||
case NP_TYPE_ARR_I64:
|
||||
return T.i64;
|
||||
case NP_TYPE_ARR_U64:
|
||||
return T.u64;
|
||||
case NP_TYPE_ARR_F16:
|
||||
return T.f16;
|
||||
case NP_TYPE_ARR_F32:
|
||||
return T.f32;
|
||||
case NP_TYPE_ARR_F64:
|
||||
return T.f64;
|
||||
case NP_TYPE_ARR_C64:
|
||||
return T.c64;
|
||||
case NP_TYPE_ARR_C128:
|
||||
return T.c128;
|
||||
default:
|
||||
seqassertn(false, "unexpected type code (?)");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, NumPyType const &type) {
|
||||
static const std::unordered_map<NumPyType::Type, std::string> typestrings = {
|
||||
{NumPyType::NP_TYPE_NONE, "none"}, {NumPyType::NP_TYPE_BOOL, "bool"},
|
||||
{NumPyType::NP_TYPE_I8, "i8"}, {NumPyType::NP_TYPE_U8, "u8"},
|
||||
{NumPyType::NP_TYPE_I16, "i16"}, {NumPyType::NP_TYPE_U16, "u16"},
|
||||
{NumPyType::NP_TYPE_I32, "i32"}, {NumPyType::NP_TYPE_U32, "u32"},
|
||||
{NumPyType::NP_TYPE_I64, "i64"}, {NumPyType::NP_TYPE_U64, "u64"},
|
||||
{NumPyType::NP_TYPE_F16, "f16"}, {NumPyType::NP_TYPE_F32, "f32"},
|
||||
{NumPyType::NP_TYPE_F64, "f64"}, {NumPyType::NP_TYPE_C64, "c64"},
|
||||
{NumPyType::NP_TYPE_C128, "c128"}, {NumPyType::NP_TYPE_SCALAR_END, ""},
|
||||
{NumPyType::NP_TYPE_ARR_BOOL, "bool"}, {NumPyType::NP_TYPE_ARR_I8, "i8"},
|
||||
{NumPyType::NP_TYPE_ARR_U8, "u8"}, {NumPyType::NP_TYPE_ARR_I16, "i16"},
|
||||
{NumPyType::NP_TYPE_ARR_U16, "u16"}, {NumPyType::NP_TYPE_ARR_I32, "i32"},
|
||||
{NumPyType::NP_TYPE_ARR_U32, "u32"}, {NumPyType::NP_TYPE_ARR_I64, "i64"},
|
||||
{NumPyType::NP_TYPE_ARR_U64, "u64"}, {NumPyType::NP_TYPE_ARR_F16, "f16"},
|
||||
{NumPyType::NP_TYPE_ARR_F32, "f32"}, {NumPyType::NP_TYPE_ARR_F64, "f64"},
|
||||
{NumPyType::NP_TYPE_ARR_C64, "c64"}, {NumPyType::NP_TYPE_ARR_C128, "c128"},
|
||||
};
|
||||
|
||||
auto it = typestrings.find(type.dtype);
|
||||
seqassertn(it != typestrings.end(), "type not found");
|
||||
auto s = it->second;
|
||||
if (type.isArray())
|
||||
os << "array[" << s << ", " << type.ndim << "]";
|
||||
else
|
||||
os << s;
|
||||
return os;
|
||||
}
|
||||
|
||||
std::string NumPyType::str() const {
|
||||
std::stringstream buffer;
|
||||
buffer << *this;
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
CodegenContext::CodegenContext(Module *M, SeriesFlow *series, BodiedFunc *func,
|
||||
NumPyPrimitiveTypes &T)
|
||||
: M(M), series(series), func(func), vars(), T(T) {}
|
||||
|
||||
std::unique_ptr<NumPyExpr> parse(Value *v,
|
||||
std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
|
||||
NumPyPrimitiveTypes &T) {
|
||||
struct NumPyMagicMethod {
|
||||
std::string name;
|
||||
NumPyExpr::Op op;
|
||||
int args;
|
||||
bool right;
|
||||
};
|
||||
|
||||
struct NumPyUFunc {
|
||||
std::string name;
|
||||
NumPyExpr::Op op;
|
||||
int args;
|
||||
};
|
||||
|
||||
static std::vector<NumPyMagicMethod> magics = {
|
||||
{Module::POS_MAGIC_NAME, NumPyExpr::NP_OP_POS, 1, false},
|
||||
{Module::NEG_MAGIC_NAME, NumPyExpr::NP_OP_NEG, 1, false},
|
||||
{Module::INVERT_MAGIC_NAME, NumPyExpr::NP_OP_INVERT, 1, false},
|
||||
{Module::ABS_MAGIC_NAME, NumPyExpr::NP_OP_ABS, 1, false},
|
||||
|
||||
{Module::ADD_MAGIC_NAME, NumPyExpr::NP_OP_ADD, 2, false},
|
||||
{Module::SUB_MAGIC_NAME, NumPyExpr::NP_OP_SUB, 2, false},
|
||||
{Module::MUL_MAGIC_NAME, NumPyExpr::NP_OP_MUL, 2, false},
|
||||
{Module::MATMUL_MAGIC_NAME, NumPyExpr::NP_OP_MATMUL, 2, false},
|
||||
{Module::TRUE_DIV_MAGIC_NAME, NumPyExpr::NP_OP_TRUE_DIV, 2, false},
|
||||
{Module::FLOOR_DIV_MAGIC_NAME, NumPyExpr::NP_OP_FLOOR_DIV, 2, false},
|
||||
{Module::MOD_MAGIC_NAME, NumPyExpr::NP_OP_MOD, 2, false},
|
||||
{Module::POW_MAGIC_NAME, NumPyExpr::NP_OP_POW, 2, false},
|
||||
{Module::LSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_LSHIFT, 2, false},
|
||||
{Module::RSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_RSHIFT, 2, false},
|
||||
{Module::AND_MAGIC_NAME, NumPyExpr::NP_OP_AND, 2, false},
|
||||
{Module::OR_MAGIC_NAME, NumPyExpr::NP_OP_OR, 2, false},
|
||||
{Module::XOR_MAGIC_NAME, NumPyExpr::NP_OP_XOR, 2, false},
|
||||
|
||||
{Module::RADD_MAGIC_NAME, NumPyExpr::NP_OP_ADD, 2, true},
|
||||
{Module::RSUB_MAGIC_NAME, NumPyExpr::NP_OP_SUB, 2, true},
|
||||
{Module::RMUL_MAGIC_NAME, NumPyExpr::NP_OP_MUL, 2, true},
|
||||
{Module::RMATMUL_MAGIC_NAME, NumPyExpr::NP_OP_MATMUL, 2, true},
|
||||
{Module::RTRUE_DIV_MAGIC_NAME, NumPyExpr::NP_OP_TRUE_DIV, 2, true},
|
||||
{Module::RFLOOR_DIV_MAGIC_NAME, NumPyExpr::NP_OP_FLOOR_DIV, 2, true},
|
||||
{Module::RMOD_MAGIC_NAME, NumPyExpr::NP_OP_MOD, 2, true},
|
||||
{Module::RPOW_MAGIC_NAME, NumPyExpr::NP_OP_POW, 2, true},
|
||||
{Module::RLSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_LSHIFT, 2, true},
|
||||
{Module::RRSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_RSHIFT, 2, true},
|
||||
{Module::RAND_MAGIC_NAME, NumPyExpr::NP_OP_AND, 2, true},
|
||||
{Module::ROR_MAGIC_NAME, NumPyExpr::NP_OP_OR, 2, true},
|
||||
{Module::RXOR_MAGIC_NAME, NumPyExpr::NP_OP_XOR, 2, true},
|
||||
|
||||
{Module::EQ_MAGIC_NAME, NumPyExpr::NP_OP_EQ, 2, false},
|
||||
{Module::NE_MAGIC_NAME, NumPyExpr::NP_OP_NE, 2, false},
|
||||
{Module::LT_MAGIC_NAME, NumPyExpr::NP_OP_LT, 2, false},
|
||||
{Module::LE_MAGIC_NAME, NumPyExpr::NP_OP_LE, 2, false},
|
||||
{Module::GT_MAGIC_NAME, NumPyExpr::NP_OP_GT, 2, false},
|
||||
{Module::GE_MAGIC_NAME, NumPyExpr::NP_OP_GE, 2, false},
|
||||
};
|
||||
|
||||
static std::vector<NumPyUFunc> ufuncs = {
|
||||
{"positive", NumPyExpr::NP_OP_POS, 1},
|
||||
{"negative", NumPyExpr::NP_OP_NEG, 1},
|
||||
{"invert", NumPyExpr::NP_OP_INVERT, 1},
|
||||
{"abs", NumPyExpr::NP_OP_ABS, 1},
|
||||
{"absolute", NumPyExpr::NP_OP_ABS, 1},
|
||||
{"add", NumPyExpr::NP_OP_ADD, 2},
|
||||
{"subtract", NumPyExpr::NP_OP_SUB, 2},
|
||||
{"multiply", NumPyExpr::NP_OP_MUL, 2},
|
||||
{"divide", NumPyExpr::NP_OP_TRUE_DIV, 2},
|
||||
{"floor_divide", NumPyExpr::NP_OP_FLOOR_DIV, 2},
|
||||
{"remainder", NumPyExpr::NP_OP_MOD, 2},
|
||||
{"fmod", NumPyExpr::NP_OP_FMOD, 2},
|
||||
{"power", NumPyExpr::NP_OP_POW, 2},
|
||||
{"left_shift", NumPyExpr::NP_OP_LSHIFT, 2},
|
||||
{"right_shift", NumPyExpr::NP_OP_RSHIFT, 2},
|
||||
{"bitwise_and", NumPyExpr::NP_OP_AND, 2},
|
||||
{"bitwise_or", NumPyExpr::NP_OP_OR, 2},
|
||||
{"bitwise_xor", NumPyExpr::NP_OP_XOR, 2},
|
||||
{"logical_and", NumPyExpr::NP_OP_LOGICAL_AND, 2},
|
||||
{"logical_or", NumPyExpr::NP_OP_LOGICAL_OR, 2},
|
||||
{"logical_xor", NumPyExpr::NP_OP_LOGICAL_XOR, 2},
|
||||
{"equal", NumPyExpr::NP_OP_EQ, 2},
|
||||
{"not_equal", NumPyExpr::NP_OP_NE, 2},
|
||||
{"less", NumPyExpr::NP_OP_LT, 2},
|
||||
{"less_equal", NumPyExpr::NP_OP_LE, 2},
|
||||
{"greater", NumPyExpr::NP_OP_GT, 2},
|
||||
{"greater_equal", NumPyExpr::NP_OP_GE, 2},
|
||||
{"minimum", NumPyExpr::NP_OP_MIN, 2},
|
||||
{"maximum", NumPyExpr::NP_OP_MAX, 2},
|
||||
{"fmin", NumPyExpr::NP_OP_FMIN, 2},
|
||||
{"fmax", NumPyExpr::NP_OP_FMAX, 2},
|
||||
{"sin", NumPyExpr::NP_OP_SIN, 1},
|
||||
{"cos", NumPyExpr::NP_OP_COS, 1},
|
||||
{"tan", NumPyExpr::NP_OP_TAN, 1},
|
||||
{"arcsin", NumPyExpr::NP_OP_ARCSIN, 1},
|
||||
{"arccos", NumPyExpr::NP_OP_ARCCOS, 1},
|
||||
{"arctan", NumPyExpr::NP_OP_ARCTAN, 1},
|
||||
{"arctan2", NumPyExpr::NP_OP_ARCTAN2, 2},
|
||||
{"hypot", NumPyExpr::NP_OP_HYPOT, 2},
|
||||
{"sinh", NumPyExpr::NP_OP_SINH, 1},
|
||||
{"cosh", NumPyExpr::NP_OP_COSH, 1},
|
||||
{"tanh", NumPyExpr::NP_OP_TANH, 1},
|
||||
{"arcsinh", NumPyExpr::NP_OP_ARCSINH, 1},
|
||||
{"arccosh", NumPyExpr::NP_OP_ARCCOSH, 1},
|
||||
{"arctanh", NumPyExpr::NP_OP_ARCTANH, 1},
|
||||
{"conjugate", NumPyExpr::NP_OP_CONJ, 1},
|
||||
{"exp", NumPyExpr::NP_OP_EXP, 1},
|
||||
{"exp2", NumPyExpr::NP_OP_EXP2, 1},
|
||||
{"log", NumPyExpr::NP_OP_LOG, 1},
|
||||
{"log2", NumPyExpr::NP_OP_LOG2, 1},
|
||||
{"log10", NumPyExpr::NP_OP_LOG10, 1},
|
||||
{"expm1", NumPyExpr::NP_OP_EXPM1, 1},
|
||||
{"log1p", NumPyExpr::NP_OP_LOG1P, 1},
|
||||
{"sqrt", NumPyExpr::NP_OP_SQRT, 1},
|
||||
{"square", NumPyExpr::NP_OP_SQUARE, 1},
|
||||
{"cbrt", NumPyExpr::NP_OP_CBRT, 1},
|
||||
{"logaddexp", NumPyExpr::NP_OP_LOGADDEXP, 2},
|
||||
{"logaddexp2", NumPyExpr::NP_OP_LOGADDEXP2, 2},
|
||||
{"reciprocal", NumPyExpr::NP_OP_RECIPROCAL, 1},
|
||||
{"rint", NumPyExpr::NP_OP_RINT, 1},
|
||||
{"floor", NumPyExpr::NP_OP_FLOOR, 1},
|
||||
{"ceil", NumPyExpr::NP_OP_CEIL, 1},
|
||||
{"trunc", NumPyExpr::NP_OP_TRUNC, 1},
|
||||
{"isnan", NumPyExpr::NP_OP_ISNAN, 1},
|
||||
{"isinf", NumPyExpr::NP_OP_ISINF, 1},
|
||||
{"isfinite", NumPyExpr::NP_OP_ISFINITE, 1},
|
||||
{"sign", NumPyExpr::NP_OP_SIGN, 1},
|
||||
{"signbit", NumPyExpr::NP_OP_SIGNBIT, 1},
|
||||
{"copysign", NumPyExpr::NP_OP_COPYSIGN, 2},
|
||||
{"spacing", NumPyExpr::NP_OP_SPACING, 1},
|
||||
{"nextafter", NumPyExpr::NP_OP_NEXTAFTER, 2},
|
||||
{"deg2rad", NumPyExpr::NP_OP_DEG2RAD, 1},
|
||||
{"radians", NumPyExpr::NP_OP_DEG2RAD, 1},
|
||||
{"rad2deg", NumPyExpr::NP_OP_RAD2DEG, 1},
|
||||
{"degrees", NumPyExpr::NP_OP_RAD2DEG, 1},
|
||||
{"heaviside", NumPyExpr::NP_OP_HEAVISIDE, 2},
|
||||
};
|
||||
|
||||
auto getNumPyExprType = [](types::Type *t, NumPyPrimitiveTypes &T) -> NumPyType {
|
||||
if (t->is(T.bool_))
|
||||
return {NumPyType::NP_TYPE_BOOL};
|
||||
if (t->is(T.i8))
|
||||
return {NumPyType::NP_TYPE_I8};
|
||||
if (t->is(T.u8))
|
||||
return {NumPyType::NP_TYPE_U8};
|
||||
if (t->is(T.i16))
|
||||
return {NumPyType::NP_TYPE_I16};
|
||||
if (t->is(T.u16))
|
||||
return {NumPyType::NP_TYPE_U16};
|
||||
if (t->is(T.i32))
|
||||
return {NumPyType::NP_TYPE_I32};
|
||||
if (t->is(T.u32))
|
||||
return {NumPyType::NP_TYPE_U32};
|
||||
if (t->is(T.i64))
|
||||
return {NumPyType::NP_TYPE_I64};
|
||||
if (t->is(T.u64))
|
||||
return {NumPyType::NP_TYPE_U64};
|
||||
if (t->is(T.f16))
|
||||
return {NumPyType::NP_TYPE_F16};
|
||||
if (t->is(T.f32))
|
||||
return {NumPyType::NP_TYPE_F32};
|
||||
if (t->is(T.f64))
|
||||
return {NumPyType::NP_TYPE_F64};
|
||||
if (t->is(T.c64))
|
||||
return {NumPyType::NP_TYPE_C64};
|
||||
if (t->is(T.c128))
|
||||
return {NumPyType::NP_TYPE_C128};
|
||||
if (isArrayType(t)) {
|
||||
auto generics = t->getGenerics();
|
||||
seqassertn(generics.size() == 2 && generics[0].isType() && generics[1].isStatic(),
|
||||
"unrecognized ndarray generics");
|
||||
auto *dtype = generics[0].getTypeValue();
|
||||
auto ndim = generics[1].getStaticValue();
|
||||
if (dtype->is(T.bool_))
|
||||
return {NumPyType::NP_TYPE_ARR_BOOL, ndim};
|
||||
if (dtype->is(T.i8))
|
||||
return {NumPyType::NP_TYPE_ARR_I8, ndim};
|
||||
if (dtype->is(T.u8))
|
||||
return {NumPyType::NP_TYPE_ARR_U8, ndim};
|
||||
if (dtype->is(T.i16))
|
||||
return {NumPyType::NP_TYPE_ARR_I16, ndim};
|
||||
if (dtype->is(T.u16))
|
||||
return {NumPyType::NP_TYPE_ARR_U16, ndim};
|
||||
if (dtype->is(T.i32))
|
||||
return {NumPyType::NP_TYPE_ARR_I32, ndim};
|
||||
if (dtype->is(T.u32))
|
||||
return {NumPyType::NP_TYPE_ARR_U32, ndim};
|
||||
if (dtype->is(T.i64))
|
||||
return {NumPyType::NP_TYPE_ARR_I64, ndim};
|
||||
if (dtype->is(T.u64))
|
||||
return {NumPyType::NP_TYPE_ARR_U64, ndim};
|
||||
if (dtype->is(T.f16))
|
||||
return {NumPyType::NP_TYPE_ARR_F16, ndim};
|
||||
if (dtype->is(T.f32))
|
||||
return {NumPyType::NP_TYPE_ARR_F32, ndim};
|
||||
if (dtype->is(T.f64))
|
||||
return {NumPyType::NP_TYPE_ARR_F64, ndim};
|
||||
if (dtype->is(T.c64))
|
||||
return {NumPyType::NP_TYPE_ARR_C64, ndim};
|
||||
if (dtype->is(T.c128))
|
||||
return {NumPyType::NP_TYPE_ARR_C128, ndim};
|
||||
}
|
||||
return {};
|
||||
};
|
||||
|
||||
auto type = getNumPyExprType(v->getType(), T);
|
||||
if (!type)
|
||||
return {};
|
||||
|
||||
// Don't break up expressions that result in scalars or 0-dim arrays since those
|
||||
// should only be computed once
|
||||
if (type.ndim == 0) {
|
||||
auto res = std::make_unique<NumPyExpr>(type, v);
|
||||
leaves.emplace_back(res.get(), v);
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
if (auto *c = cast<CallInstr>(v)) {
|
||||
auto *f = util::getFunc(c->getCallee());
|
||||
|
||||
// Check for matmul
|
||||
if (f && c->numArgs() == 3 && isNoneType(c->back()->getType(), T) &&
|
||||
(f->getName().rfind("std.numpy.linalg_sym.matmul:0[", 0) == 0 ||
|
||||
(f->getName().rfind("std.numpy.linalg_sym.dot:0[", 0) == 0 &&
|
||||
type.ndim == 2))) {
|
||||
std::vector<Value *> args(c->begin(), c->end());
|
||||
auto op = NumPyExpr::NP_OP_MATMUL;
|
||||
auto lhs = parse(args[0], leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
auto rhs = parse(args[1], leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs), std::move(rhs));
|
||||
}
|
||||
|
||||
// Check for builtin abs()
|
||||
if (f && c->numArgs() == 1 &&
|
||||
(f->getName().rfind("std.internal.builtin.abs:0[", 0) == 0)) {
|
||||
auto op = NumPyExpr::NP_OP_ABS;
|
||||
auto lhs = parse(c->front(), leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
|
||||
}
|
||||
|
||||
// Check for transpose
|
||||
if (f && isArrayType(f->getParentType()) && c->numArgs() == 1 &&
|
||||
f->getUnmangledName() == "T") {
|
||||
auto op = NumPyExpr::NP_OP_TRANSPOSE;
|
||||
auto lhs = parse(c->front(), leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
|
||||
}
|
||||
|
||||
// Check for ufunc (e.g. "np.exp()") call
|
||||
if (f && f->getUnmangledName() == Module::CALL_MAGIC_NAME &&
|
||||
isUFuncType(f->getParentType())) {
|
||||
|
||||
auto ufuncGenerics = f->getParentType()->getGenerics();
|
||||
seqassertn(!ufuncGenerics.empty() && ufuncGenerics[0].isStaticStr(),
|
||||
"unrecognized ufunc class generics");
|
||||
auto ufunc = ufuncGenerics[0].getStaticStringValue();
|
||||
|
||||
auto callGenerics = f->getType()->getGenerics();
|
||||
seqassertn(!callGenerics.empty() && callGenerics[0].isType(),
|
||||
"unrecognized ufunc call generics");
|
||||
auto *dtype = callGenerics[0].getTypeValue();
|
||||
|
||||
if (dtype->is(T.none)) {
|
||||
for (auto &u : ufuncs) {
|
||||
if (u.name == ufunc) {
|
||||
seqassertn(u.args == 1 || u.args == 2,
|
||||
"unexpected number of arguments (ufunc)");
|
||||
|
||||
// Argument order:
|
||||
// - ufunc self
|
||||
// - operand 1
|
||||
// - (if binary) operand 2
|
||||
// - 'out'
|
||||
// - 'where'
|
||||
std::vector<Value *> args(c->begin(), c->end());
|
||||
seqassertn(args.size() == u.args + 3, "unexpected call of {}", u.name);
|
||||
auto *where = args[args.size() - 1];
|
||||
auto *out = args[args.size() - 2];
|
||||
|
||||
if (auto *whereConst = cast<BoolConst>(where)) {
|
||||
if (!whereConst->getVal())
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!isNoneType(out->getType(), T))
|
||||
break;
|
||||
|
||||
auto op = u.op;
|
||||
auto lhs = parse(args[1], leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
if (u.args == 1)
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
|
||||
|
||||
auto rhs = parse(args[2], leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for magic method call
|
||||
if (f && isArrayType(f->getParentType())) {
|
||||
for (auto &m : magics) {
|
||||
if (f->getUnmangledName() == m.name && c->numArgs() == m.args) {
|
||||
seqassertn(m.args == 1 || m.args == 2,
|
||||
"unexpected number of arguments (magic)");
|
||||
std::vector<Value *> args(c->begin(), c->end());
|
||||
auto op = m.op;
|
||||
auto lhs = parse(args[0], leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
if (m.args == 1)
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
|
||||
|
||||
auto rhs = parse(args[1], leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
return m.right ? std::make_unique<NumPyExpr>(type, v, op, std::move(rhs),
|
||||
std::move(lhs))
|
||||
: std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for right-hand-side magic method call
|
||||
// Right-hand-side magics (e.g. __radd__) are compiled into FlowInstr:
|
||||
// <lhs_expr> + <rhs_expr>
|
||||
// becomes:
|
||||
// { v1 = <lhs expr> ; v2 = <rhs expr> ; return rhs_class.__radd__(v2, v1) }
|
||||
// So we need to check for this to detect r-magics.
|
||||
if (auto *flow = cast<FlowInstr>(v)) {
|
||||
auto *series = cast<SeriesFlow>(flow->getFlow());
|
||||
auto *value = cast<CallInstr>(flow->getValue());
|
||||
auto *f = value ? util::getFunc(value->getCallee()) : nullptr;
|
||||
|
||||
if (series && f && value->numArgs() == 2) {
|
||||
std::vector<Value *> assignments(series->begin(), series->end());
|
||||
auto *arg1 = value->front();
|
||||
auto *arg2 = value->back();
|
||||
auto *vv1 = cast<VarValue>(arg1);
|
||||
auto *vv2 = cast<VarValue>(arg2);
|
||||
auto *arg1Var = vv1 ? vv1->getVar() : nullptr;
|
||||
auto *arg2Var = vv2 ? vv2->getVar() : nullptr;
|
||||
|
||||
for (auto &m : magics) {
|
||||
if (f->getUnmangledName() == m.name && value->numArgs() == m.args && m.right) {
|
||||
auto op = m.op;
|
||||
|
||||
if (assignments.size() == 0) {
|
||||
// Case 1: Degenerate flow instruction
|
||||
return parse(value, leaves, T);
|
||||
} else if (assignments.size() == 1) {
|
||||
// Case 2: One var -- check if it's either of the r-magic operands
|
||||
auto *a1 = cast<AssignInstr>(assignments.front());
|
||||
if (a1 && a1->getLhs() == arg1Var) {
|
||||
auto rhs = parse(a1->getRhs(), leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
auto lhs = parse(arg2, leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
} else if (a1 && a1->getLhs() == arg2Var) {
|
||||
auto lhs = parse(a1->getRhs(), leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
auto rhs = parse(arg1, leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
}
|
||||
} else if (assignments.size() == 2) {
|
||||
// Case 2: Two vars -- check both permutations
|
||||
auto *a1 = cast<AssignInstr>(assignments.front());
|
||||
auto *a2 = cast<AssignInstr>(assignments.back());
|
||||
|
||||
if (a1 && a2 && a1->getLhs() == arg1Var && a2->getLhs() == arg2Var) {
|
||||
auto rhs = parse(a1->getRhs(), leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
auto lhs = parse(a2->getRhs(), leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
} else if (a1 && a2 && a2->getLhs() == arg1Var && a1->getLhs() == arg2Var) {
|
||||
auto lhs = parse(a1->getRhs(), leaves, T);
|
||||
if (!lhs)
|
||||
return {};
|
||||
|
||||
auto rhs = parse(a2->getRhs(), leaves, T);
|
||||
if (!rhs)
|
||||
return {};
|
||||
|
||||
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
|
||||
std::move(rhs));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto res = std::make_unique<NumPyExpr>(type, v);
|
||||
leaves.emplace_back(res.get(), v);
|
||||
return std::move(res);
|
||||
}
|
||||
|
||||
namespace {
|
||||
Var *optimizeHelper(NumPyOptimizationUnit &unit, NumPyExpr *expr, CodegenContext &C) {
|
||||
auto *M = unit.value->getModule();
|
||||
auto *series = C.series;
|
||||
|
||||
// Remove some operations that cannot be done element-wise easily by optimizing them
|
||||
// separately, recursively.
|
||||
expr->apply([&](NumPyExpr &e) {
|
||||
if (!e.type.isArray())
|
||||
return;
|
||||
|
||||
if (e.op == NumPyExpr::NP_OP_TRANSPOSE) {
|
||||
auto *lv = optimizeHelper(unit, e.lhs.get(), C);
|
||||
auto *transposeFunc =
|
||||
M->getOrRealizeFunc("_transpose", {lv->getType()}, {}, FUSION_MODULE);
|
||||
seqassertn(transposeFunc, "transpose func not found");
|
||||
auto *var = util::makeVar(util::call(transposeFunc, {M->Nr<VarValue>(lv)}),
|
||||
C.series, C.func);
|
||||
C.vars[&e] = var;
|
||||
NumPyExpr replacement(e.type, M->Nr<VarValue>(var));
|
||||
replacement.freeable = e.lhs->freeable;
|
||||
e.replace(replacement);
|
||||
}
|
||||
|
||||
if (e.op == NumPyExpr::NP_OP_MATMUL) {
|
||||
auto *lv = optimizeHelper(unit, e.lhs.get(), C);
|
||||
auto *rv = optimizeHelper(unit, e.rhs.get(), C);
|
||||
auto *matmulFunc = M->getOrRealizeFunc("_matmul", {lv->getType(), rv->getType()},
|
||||
{}, FUSION_MODULE);
|
||||
seqassertn(matmulFunc, "matmul func not found");
|
||||
auto *var = util::makeVar(
|
||||
util::call(matmulFunc, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv)}), C.series,
|
||||
C.func);
|
||||
C.vars[&e] = var;
|
||||
NumPyExpr replacement(e.type, M->Nr<VarValue>(var));
|
||||
replacement.freeable = true;
|
||||
e.replace(replacement);
|
||||
}
|
||||
});
|
||||
|
||||
// Optimize the given expression
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
expr->apply([&](NumPyExpr &e) {
|
||||
if (e.depth() <= 2)
|
||||
return;
|
||||
|
||||
auto cost = e.cost();
|
||||
auto bcinfo = e.getBroadcastInfo();
|
||||
Var *result = nullptr;
|
||||
|
||||
if (cost <= AlwaysFuseCostThreshold ||
|
||||
(cost <= NeverFuseCostThreshold && bcinfo == BroadcastInfo::NO)) {
|
||||
// Don't care about broadcasting; just fuse.
|
||||
XLOG("-> static fuse:\n{}", e.str());
|
||||
result = e.codegenFusedEval(C);
|
||||
} else if (cost <= NeverFuseCostThreshold && bcinfo != BroadcastInfo::YES) {
|
||||
// Check at runtime if we're broadcasting and fuse conditionally.
|
||||
XLOG("-> conditional fuse:\n{}", e.str());
|
||||
auto *broadcasts = e.codegenBroadcasts(C);
|
||||
auto *seqtSeries = M->Nr<SeriesFlow>();
|
||||
auto *fuseSeries = M->Nr<SeriesFlow>();
|
||||
auto *branch = M->Nr<IfFlow>(broadcasts, seqtSeries, fuseSeries);
|
||||
|
||||
C.series = seqtSeries;
|
||||
auto *seqtResult = e.codegenSequentialEval(C);
|
||||
C.series = fuseSeries;
|
||||
auto *fuseResult = e.codegenFusedEval(C);
|
||||
seqassertn(seqtResult->getType()->is(fuseResult->getType()),
|
||||
"types are not the same: {} {}", seqtResult->getType()->getName(),
|
||||
fuseResult->getType()->getName());
|
||||
|
||||
result = M->Nr<Var>(seqtResult->getType(), false);
|
||||
unit.func->push_back(result);
|
||||
seqtSeries->push_back(M->Nr<AssignInstr>(result, M->Nr<VarValue>(seqtResult)));
|
||||
fuseSeries->push_back(M->Nr<AssignInstr>(result, M->Nr<VarValue>(fuseResult)));
|
||||
C.series = series;
|
||||
series->push_back(branch);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
NumPyExpr tmp(e.type, M->Nr<VarValue>(result));
|
||||
e.replace(tmp);
|
||||
e.freeable = true;
|
||||
C.vars[&e] = result;
|
||||
changed = true;
|
||||
}
|
||||
});
|
||||
} while (changed);
|
||||
|
||||
XLOG("-> sequential eval:\n{}", expr->str());
|
||||
return expr->codegenSequentialEval(C);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool NumPyOptimizationUnit::optimize(NumPyPrimitiveTypes &T) {
|
||||
if (!expr->type.isArray() || expr->depth() <= 2)
|
||||
return false;
|
||||
|
||||
XLOG("Optimizing expression at {}\n{}", value->getSrcInfo(), expr->str());
|
||||
|
||||
auto *M = value->getModule();
|
||||
auto *series = M->Nr<SeriesFlow>();
|
||||
CodegenContext C(M, series, func, T);
|
||||
util::CloneVisitor cv(M);
|
||||
|
||||
for (auto &p : leaves) {
|
||||
auto *var = util::makeVar(cv.clone(p.second), series, func);
|
||||
C.vars.emplace(p.first, var);
|
||||
}
|
||||
|
||||
auto *result = optimizeHelper(*this, expr.get(), C);
|
||||
auto *replacement = M->Nr<FlowInstr>(C.series, M->Nr<VarValue>(result));
|
||||
value->replaceAll(replacement);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct ExtractArrayExpressions : public util::Operator {
|
||||
BodiedFunc *func;
|
||||
NumPyPrimitiveTypes types;
|
||||
std::vector<NumPyOptimizationUnit> exprs;
|
||||
std::unordered_set<id_t> extracted;
|
||||
|
||||
explicit ExtractArrayExpressions(BodiedFunc *func)
|
||||
: util::Operator(), func(func), types(func->getModule()), exprs(), extracted() {}
|
||||
|
||||
void extract(Value *v, AssignInstr *assign = nullptr) {
|
||||
if (extracted.count(v->getId()))
|
||||
return;
|
||||
|
||||
std::vector<std::pair<NumPyExpr *, Value *>> leaves;
|
||||
auto expr = parse(v, leaves, types);
|
||||
if (expr) {
|
||||
int64_t numArrayNodes = 0;
|
||||
expr->apply([&](NumPyExpr &e) {
|
||||
if (e.type.isArray())
|
||||
++numArrayNodes;
|
||||
extracted.emplace(e.val->getId());
|
||||
});
|
||||
if (numArrayNodes > 0 && expr->depth() > 1) {
|
||||
exprs.push_back({v, func, std::move(expr), std::move(leaves), assign});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void preHook(Node *n) override {
|
||||
if (auto *v = cast<AssignInstr>(n)) {
|
||||
extract(v->getRhs(), v->getLhs()->isGlobal() ? nullptr : v);
|
||||
} else if (auto *v = cast<Value>(n)) {
|
||||
extract(v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::string NumPyFusionPass::KEY = "core-numpy-fusion";
|
||||
|
||||
void NumPyFusionPass::visit(BodiedFunc *func) {
|
||||
ExtractArrayExpressions extractor(func);
|
||||
func->accept(extractor);
|
||||
|
||||
if (extractor.exprs.empty())
|
||||
return;
|
||||
|
||||
auto *rdres = getAnalysisResult<analyze::dataflow::RDResult>(reachingDefKey);
|
||||
auto it = rdres->results.find(func->getId());
|
||||
if (it == rdres->results.end())
|
||||
return;
|
||||
auto *rd = it->second.get();
|
||||
auto *se = getAnalysisResult<analyze::module::SideEffectResult>(sideEffectsKey);
|
||||
auto *cfg = rdres->cfgResult->graphs.find(func->getId())->second.get();
|
||||
auto fwd = getForwardingDAGs(func, rd, cfg, se, extractor.exprs);
|
||||
|
||||
for (auto &dag : fwd) {
|
||||
std::vector<AssignInstr *> assignsToDelete;
|
||||
auto *e = doForwarding(dag, assignsToDelete);
|
||||
if (e->optimize(extractor.types)) {
|
||||
for (auto *a : assignsToDelete)
|
||||
a->replaceAll(func->getModule()->Nr<SeriesFlow>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace numpy
|
||||
} // namespace transform
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -0,0 +1,313 @@
|
|||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "codon/cir/analyze/dataflow/reaching.h"
|
||||
#include "codon/cir/analyze/module/global_vars.h"
|
||||
#include "codon/cir/analyze/module/side_effect.h"
|
||||
#include "codon/cir/transform/pass.h"
|
||||
#include "codon/cir/types/types.h"
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace codon {
|
||||
namespace ir {
|
||||
namespace transform {
|
||||
namespace numpy {
|
||||
extern const std::string FUSION_MODULE;
|
||||
|
||||
/// NumPy operator fusion pass.
|
||||
class NumPyFusionPass : public OperatorPass {
|
||||
private:
|
||||
/// Key of the reaching definition analysis
|
||||
std::string reachingDefKey;
|
||||
/// Key of the side effect analysis
|
||||
std::string sideEffectsKey;
|
||||
|
||||
public:
|
||||
static const std::string KEY;
|
||||
|
||||
/// Constructs a NumPy fusion pass.
|
||||
/// @param reachingDefKey the reaching definition analysis' key
|
||||
/// @param sideEffectsKey side effect analysis' key
|
||||
NumPyFusionPass(const std::string &reachingDefKey, const std::string &sideEffectsKey)
|
||||
: OperatorPass(), reachingDefKey(reachingDefKey), sideEffectsKey(sideEffectsKey) {
|
||||
}
|
||||
|
||||
std::string getKey() const override { return KEY; }
|
||||
void visit(BodiedFunc *f) override;
|
||||
};
|
||||
|
||||
struct NumPyPrimitiveTypes {
|
||||
types::Type *none;
|
||||
types::Type *optnone;
|
||||
types::Type *bool_;
|
||||
types::Type *i8;
|
||||
types::Type *u8;
|
||||
types::Type *i16;
|
||||
types::Type *u16;
|
||||
types::Type *i32;
|
||||
types::Type *u32;
|
||||
types::Type *i64;
|
||||
types::Type *u64;
|
||||
types::Type *f16;
|
||||
types::Type *f32;
|
||||
types::Type *f64;
|
||||
types::Type *c64;
|
||||
types::Type *c128;
|
||||
|
||||
explicit NumPyPrimitiveTypes(Module *M);
|
||||
};
|
||||
|
||||
struct NumPyType {
|
||||
enum Type {
|
||||
NP_TYPE_NONE = -1,
|
||||
NP_TYPE_BOOL,
|
||||
NP_TYPE_I8,
|
||||
NP_TYPE_U8,
|
||||
NP_TYPE_I16,
|
||||
NP_TYPE_U16,
|
||||
NP_TYPE_I32,
|
||||
NP_TYPE_U32,
|
||||
NP_TYPE_I64,
|
||||
NP_TYPE_U64,
|
||||
NP_TYPE_F16,
|
||||
NP_TYPE_F32,
|
||||
NP_TYPE_F64,
|
||||
NP_TYPE_C64,
|
||||
NP_TYPE_C128,
|
||||
NP_TYPE_SCALAR_END, // separator value
|
||||
NP_TYPE_ARR_BOOL,
|
||||
NP_TYPE_ARR_I8,
|
||||
NP_TYPE_ARR_U8,
|
||||
NP_TYPE_ARR_I16,
|
||||
NP_TYPE_ARR_U16,
|
||||
NP_TYPE_ARR_I32,
|
||||
NP_TYPE_ARR_U32,
|
||||
NP_TYPE_ARR_I64,
|
||||
NP_TYPE_ARR_U64,
|
||||
NP_TYPE_ARR_F16,
|
||||
NP_TYPE_ARR_F32,
|
||||
NP_TYPE_ARR_F64,
|
||||
NP_TYPE_ARR_C64,
|
||||
NP_TYPE_ARR_C128,
|
||||
} dtype;
|
||||
int64_t ndim;
|
||||
|
||||
NumPyType(Type dtype, int64_t ndim = 0);
|
||||
NumPyType();
|
||||
|
||||
static NumPyType get(types::Type *t, NumPyPrimitiveTypes &T);
|
||||
|
||||
types::Type *getIRBaseType(NumPyPrimitiveTypes &T) const;
|
||||
|
||||
operator bool() const { return dtype != NP_TYPE_NONE; }
|
||||
bool isArray() const { return dtype > NP_TYPE_SCALAR_END; }
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, NumPyType const &type);
|
||||
|
||||
std::string str() const;
|
||||
};
|
||||
|
||||
struct NumPyExpr;
|
||||
|
||||
struct CodegenContext {
|
||||
Module *M;
|
||||
SeriesFlow *series;
|
||||
BodiedFunc *func;
|
||||
std::unordered_map<NumPyExpr *, Var *> vars;
|
||||
NumPyPrimitiveTypes &T;
|
||||
|
||||
CodegenContext(Module *M, SeriesFlow *series, BodiedFunc *func,
|
||||
NumPyPrimitiveTypes &T);
|
||||
};
|
||||
|
||||
enum BroadcastInfo {
|
||||
UNKNOWN,
|
||||
YES,
|
||||
NO,
|
||||
MAYBE,
|
||||
};
|
||||
|
||||
struct NumPyExpr {
|
||||
NumPyType type;
|
||||
Value *val;
|
||||
enum Op {
|
||||
NP_OP_NONE,
|
||||
NP_OP_POS,
|
||||
NP_OP_NEG,
|
||||
NP_OP_INVERT,
|
||||
NP_OP_ABS,
|
||||
NP_OP_TRANSPOSE,
|
||||
NP_OP_ADD,
|
||||
NP_OP_SUB,
|
||||
NP_OP_MUL,
|
||||
NP_OP_MATMUL,
|
||||
NP_OP_TRUE_DIV,
|
||||
NP_OP_FLOOR_DIV,
|
||||
NP_OP_MOD,
|
||||
NP_OP_FMOD,
|
||||
NP_OP_POW,
|
||||
NP_OP_LSHIFT,
|
||||
NP_OP_RSHIFT,
|
||||
NP_OP_AND,
|
||||
NP_OP_OR,
|
||||
NP_OP_XOR,
|
||||
NP_OP_LOGICAL_AND,
|
||||
NP_OP_LOGICAL_OR,
|
||||
NP_OP_LOGICAL_XOR,
|
||||
NP_OP_EQ,
|
||||
NP_OP_NE,
|
||||
NP_OP_LT,
|
||||
NP_OP_LE,
|
||||
NP_OP_GT,
|
||||
NP_OP_GE,
|
||||
NP_OP_MIN,
|
||||
NP_OP_MAX,
|
||||
NP_OP_FMIN,
|
||||
NP_OP_FMAX,
|
||||
NP_OP_SIN,
|
||||
NP_OP_COS,
|
||||
NP_OP_TAN,
|
||||
NP_OP_ARCSIN,
|
||||
NP_OP_ARCCOS,
|
||||
NP_OP_ARCTAN,
|
||||
NP_OP_ARCTAN2,
|
||||
NP_OP_HYPOT,
|
||||
NP_OP_SINH,
|
||||
NP_OP_COSH,
|
||||
NP_OP_TANH,
|
||||
NP_OP_ARCSINH,
|
||||
NP_OP_ARCCOSH,
|
||||
NP_OP_ARCTANH,
|
||||
NP_OP_CONJ,
|
||||
NP_OP_EXP,
|
||||
NP_OP_EXP2,
|
||||
NP_OP_LOG,
|
||||
NP_OP_LOG2,
|
||||
NP_OP_LOG10,
|
||||
NP_OP_EXPM1,
|
||||
NP_OP_LOG1P,
|
||||
NP_OP_SQRT,
|
||||
NP_OP_SQUARE,
|
||||
NP_OP_CBRT,
|
||||
NP_OP_LOGADDEXP,
|
||||
NP_OP_LOGADDEXP2,
|
||||
NP_OP_RECIPROCAL,
|
||||
NP_OP_RINT,
|
||||
NP_OP_FLOOR,
|
||||
NP_OP_CEIL,
|
||||
NP_OP_TRUNC,
|
||||
NP_OP_ISNAN,
|
||||
NP_OP_ISINF,
|
||||
NP_OP_ISFINITE,
|
||||
NP_OP_SIGN,
|
||||
NP_OP_SIGNBIT,
|
||||
NP_OP_COPYSIGN,
|
||||
NP_OP_SPACING,
|
||||
NP_OP_NEXTAFTER,
|
||||
NP_OP_DEG2RAD,
|
||||
NP_OP_RAD2DEG,
|
||||
NP_OP_HEAVISIDE,
|
||||
} op;
|
||||
std::unique_ptr<NumPyExpr> lhs;
|
||||
std::unique_ptr<NumPyExpr> rhs;
|
||||
bool freeable;
|
||||
|
||||
NumPyExpr(NumPyType type, Value *val)
|
||||
: type(std::move(type)), val(val), op(NP_OP_NONE), lhs(), rhs(), freeable(false) {
|
||||
}
|
||||
NumPyExpr(NumPyType type, Value *val, NumPyExpr::Op op,
|
||||
std::unique_ptr<NumPyExpr> lhs)
|
||||
: type(std::move(type)), val(val), op(op), lhs(std::move(lhs)), rhs(),
|
||||
freeable(false) {}
|
||||
NumPyExpr(NumPyType type, Value *val, NumPyExpr::Op op,
|
||||
std::unique_ptr<NumPyExpr> lhs, std::unique_ptr<NumPyExpr> rhs)
|
||||
: type(std::move(type)), val(val), op(op), lhs(std::move(lhs)),
|
||||
rhs(std::move(rhs)), freeable(false) {}
|
||||
|
||||
static std::unique_ptr<NumPyExpr>
|
||||
parse(Value *v, std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
|
||||
NumPyPrimitiveTypes &T);
|
||||
|
||||
void replace(NumPyExpr &e);
|
||||
bool haveVectorizedLoop() const;
|
||||
|
||||
int64_t opcost() const;
|
||||
int64_t cost() const;
|
||||
|
||||
std::string opstring() const;
|
||||
void dump(std::ostream &os, int level, int &leafId) const;
|
||||
friend std::ostream &operator<<(std::ostream &os, NumPyExpr const &expr);
|
||||
std::string str() const;
|
||||
|
||||
bool isLeaf() const { return !lhs && !rhs; }
|
||||
|
||||
int depth() const {
|
||||
return std::max(lhs ? lhs->depth() : 0, rhs ? rhs->depth() : 0) + 1;
|
||||
}
|
||||
|
||||
int nodes() const { return (lhs ? lhs->nodes() : 0) + (rhs ? rhs->nodes() : 0) + 1; }
|
||||
|
||||
void apply(std::function<void(NumPyExpr &)> f);
|
||||
|
||||
Value *codegenBroadcasts(CodegenContext &C);
|
||||
|
||||
Var *codegenFusedEval(CodegenContext &C);
|
||||
|
||||
Var *codegenSequentialEval(CodegenContext &C);
|
||||
|
||||
BroadcastInfo getBroadcastInfo();
|
||||
|
||||
Value *codegenScalarExpr(CodegenContext &C,
|
||||
const std::unordered_map<NumPyExpr *, Var *> &args,
|
||||
const std::unordered_map<NumPyExpr *, unsigned> &scalarMap,
|
||||
Var *scalars);
|
||||
};
|
||||
|
||||
std::unique_ptr<NumPyExpr> parse(Value *v,
|
||||
std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
|
||||
NumPyPrimitiveTypes &T);
|
||||
|
||||
struct NumPyOptimizationUnit {
|
||||
/// Original IR value being corresponding to expression
|
||||
Value *value;
|
||||
/// Function in which the value exists
|
||||
BodiedFunc *func;
|
||||
/// Root expression
|
||||
std::unique_ptr<NumPyExpr> expr;
|
||||
/// Leaves ordered by execution in original expression
|
||||
std::vector<std::pair<NumPyExpr *, Value *>> leaves;
|
||||
/// AssignInstr in which RHS is represented by this expression, or null if none
|
||||
AssignInstr *assign;
|
||||
|
||||
bool optimize(NumPyPrimitiveTypes &T);
|
||||
};
|
||||
|
||||
struct Forwarding {
|
||||
NumPyOptimizationUnit *dst;
|
||||
NumPyOptimizationUnit *src;
|
||||
Var *var;
|
||||
NumPyExpr *dstLeaf;
|
||||
int64_t dstId;
|
||||
int64_t srcId;
|
||||
};
|
||||
|
||||
using ForwardingDAG =
|
||||
std::unordered_map<NumPyOptimizationUnit *, std::vector<Forwarding>>;
|
||||
|
||||
NumPyOptimizationUnit *doForwarding(ForwardingDAG &dag,
|
||||
std::vector<AssignInstr *> &assignsToDelete);
|
||||
|
||||
std::vector<ForwardingDAG> getForwardingDAGs(BodiedFunc *func,
|
||||
analyze::dataflow::RDInspector *rd,
|
||||
analyze::dataflow::CFGraph *cfg,
|
||||
analyze::module::SideEffectResult *se,
|
||||
std::vector<NumPyOptimizationUnit> &exprs);
|
||||
|
||||
} // namespace numpy
|
||||
} // namespace transform
|
||||
} // namespace ir
|
||||
} // namespace codon
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "openmp.h"
|
||||
|
||||
|
@ -402,7 +402,8 @@ struct ReductionIdentifier : public util::Operator {
|
|||
static void extractAssociativeOpChain(Value *v, const std::string &op,
|
||||
types::Type *type,
|
||||
std::vector<Value *> &result) {
|
||||
if (util::isCallOf(v, op, {type, type}, type, /*method=*/true)) {
|
||||
if (util::isCallOf(v, op, {type, nullptr}, type, /*method=*/true) ||
|
||||
util::isCallOf(v, op, {nullptr, type}, type, /*method=*/true)) {
|
||||
auto *call = cast<CallInstr>(v);
|
||||
extractAssociativeOpChain(call->front(), op, type, result);
|
||||
extractAssociativeOpChain(call->back(), op, type, result);
|
||||
|
@ -450,7 +451,8 @@ struct ReductionIdentifier : public util::Operator {
|
|||
|
||||
for (auto &rf : reductionFunctions) {
|
||||
if (rf.method) {
|
||||
if (!util::isCallOf(item, rf.name, {type, type}, type, /*method=*/true))
|
||||
if (!(util::isCallOf(item, rf.name, {type, nullptr}, type, /*method=*/true) ||
|
||||
util::isCallOf(item, rf.name, {nullptr, type}, type, /*method=*/true)))
|
||||
continue;
|
||||
} else {
|
||||
if (!util::isCallOf(item, rf.name,
|
||||
|
@ -464,8 +466,7 @@ struct ReductionIdentifier : public util::Operator {
|
|||
|
||||
if (rf.method) {
|
||||
std::vector<Value *> opChain;
|
||||
extractAssociativeOpChain(callRHS, rf.name, callRHS->front()->getType(),
|
||||
opChain);
|
||||
extractAssociativeOpChain(callRHS, rf.name, type, opChain);
|
||||
if (opChain.size() < 2)
|
||||
continue;
|
||||
|
||||
|
@ -640,10 +641,11 @@ struct ParallelLoopTemplateReplacer : public LoopTemplateReplacer {
|
|||
|
||||
auto *series = M->Nr<SeriesFlow>();
|
||||
auto *tupleVal = util::makeVar(reductionTuple, series, parent);
|
||||
auto *reduceCode = util::call(
|
||||
reduceNoWait, {M->Nr<VarValue>(reductionLocRef), M->Nr<VarValue>(gtid),
|
||||
tupleVal, rawReducer, M->Nr<PointerValue>(lck)});
|
||||
auto *codeVar = util::makeVar(reduceCode, series, parent)->getVar();
|
||||
auto *reduceCode =
|
||||
util::call(reduceNoWait,
|
||||
{M->Nr<VarValue>(reductionLocRef), M->Nr<VarValue>(gtid),
|
||||
M->Nr<VarValue>(tupleVal), rawReducer, M->Nr<PointerValue>(lck)});
|
||||
auto *codeVar = util::makeVar(reduceCode, series, parent);
|
||||
seqassertn(codeVar->getType()->is(M->getIntType()), "wrong reduce code type");
|
||||
|
||||
auto *sectionNonAtomic = M->Nr<SeriesFlow>();
|
||||
|
@ -740,11 +742,11 @@ struct ImperativeLoopTemplateReplacer : public ParallelLoopTemplateReplacer {
|
|||
"unknown reduction init value");
|
||||
}
|
||||
|
||||
VarValue *newVar = util::makeVar(
|
||||
initVal, cast<SeriesFlow>(parent->getBody()), parent, /*prepend=*/true);
|
||||
sharedInfo.push_back({next, newVar->getVar(), reduction});
|
||||
auto *newVar = util::makeVar(initVal, cast<SeriesFlow>(parent->getBody()),
|
||||
parent, /*prepend=*/true);
|
||||
sharedInfo.push_back({next, newVar, reduction});
|
||||
|
||||
newArg = M->Nr<PointerValue>(newVar->getVar());
|
||||
newArg = M->Nr<PointerValue>(newVar);
|
||||
++next;
|
||||
} else {
|
||||
newArg = util::tupleGet(M->Nr<VarValue>(extras), next++);
|
||||
|
@ -918,9 +920,9 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
|
|||
for (auto *val : shareds) {
|
||||
if (getVarFromOutlinedArg(val)->getId() != loopVar->getId()) {
|
||||
if (auto &reduction = sharedRedux[sharedsNext]) {
|
||||
Var *newVar = util::getVar(util::makeVar(
|
||||
reduction.getInitial(), cast<SeriesFlow>(parent->getBody()), parent,
|
||||
/*prepend=*/true));
|
||||
auto *newVar = util::makeVar(reduction.getInitial(),
|
||||
cast<SeriesFlow>(parent->getBody()), parent,
|
||||
/*prepend=*/true);
|
||||
sharedInfo.push_back({sharedsNext, newVar, reduction});
|
||||
}
|
||||
}
|
||||
|
@ -1050,7 +1052,7 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
|
|||
seqassertn(irArrayType, "could not find 'TaskReductionInputArray' type");
|
||||
auto *taskRedInputsArray = util::makeVar(
|
||||
M->Nr<StackAllocInstr>(irArrayType, numRed), taskRedInitSeries, parent);
|
||||
array = util::getVar(taskRedInputsArray);
|
||||
array = taskRedInputsArray;
|
||||
auto *taskRedInputsArrayType = taskRedInputsArray->getType();
|
||||
|
||||
auto *taskRedSetItem = M->getOrRealizeMethod(
|
||||
|
@ -1081,7 +1083,7 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
|
|||
M->Nr<VarValue>(gtid),
|
||||
M->getInt(numRed), arrayPtr}),
|
||||
taskRedInitSeries, parent);
|
||||
tskgrp = util::getVar(taskRedInitResult);
|
||||
tskgrp = taskRedInitResult;
|
||||
v->replaceAll(taskRedInitSeries);
|
||||
}
|
||||
|
||||
|
@ -1345,14 +1347,13 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le
|
|||
for (auto *loop : loopNests) {
|
||||
LoopRange range;
|
||||
range.loop = loop;
|
||||
range.start = util::makeVar(loop->getStart(), setup, parent)->getVar();
|
||||
range.stop = util::makeVar(loop->getEnd(), setup, parent)->getVar();
|
||||
range.start = util::makeVar(loop->getStart(), setup, parent);
|
||||
range.stop = util::makeVar(loop->getEnd(), setup, parent);
|
||||
range.step = loop->getStep();
|
||||
range.len = util::makeVar(util::call(lenCalc, {M->Nr<VarValue>(range.start),
|
||||
M->Nr<VarValue>(range.stop),
|
||||
M->getInt(range.step)}),
|
||||
setup, parent)
|
||||
->getVar();
|
||||
range.len = util::makeVar(
|
||||
util::call(lenCalc, {M->Nr<VarValue>(range.start), M->Nr<VarValue>(range.stop),
|
||||
M->getInt(range.step)}),
|
||||
setup, parent);
|
||||
ranges.push_back(range);
|
||||
}
|
||||
|
||||
|
@ -1374,11 +1375,9 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le
|
|||
for (auto it = ranges.rbegin(); it != ranges.rend(); ++it) {
|
||||
auto *k = lastDiv ? lastDiv : collapsedVar;
|
||||
auto *div =
|
||||
util::makeVar(*M->Nr<VarValue>(k) / *M->Nr<VarValue>(it->len), body, parent)
|
||||
->getVar();
|
||||
util::makeVar(*M->Nr<VarValue>(k) / *M->Nr<VarValue>(it->len), body, parent);
|
||||
auto *mod =
|
||||
util::makeVar(*M->Nr<VarValue>(k) % *M->Nr<VarValue>(it->len), body, parent)
|
||||
->getVar();
|
||||
util::makeVar(*M->Nr<VarValue>(k) % *M->Nr<VarValue>(it->len), body, parent);
|
||||
auto *i =
|
||||
*M->Nr<VarValue>(it->start) + *(*M->Nr<VarValue>(mod) * *M->getInt(it->step));
|
||||
body->push_back(M->Nr<AssignInstr>(it->loop->getVar(), i));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "schedule.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "pass.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "dict.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "generator.h"
|
||||
|
||||
|
@ -150,7 +150,7 @@ Func *genToSum(BodiedFunc *gen, types::Type *startType, types::Type *outType) {
|
|||
if (!init || !init->getType()->is(outType))
|
||||
return nullptr;
|
||||
|
||||
auto *accumulator = util::makeVar(init, body, fn, /*prepend=*/true)->getVar();
|
||||
auto *accumulator = util::makeVar(init, body, fn, /*prepend=*/true);
|
||||
GeneratorSumTransformer xgen(accumulator);
|
||||
fn->accept(xgen);
|
||||
body->push_back(M->Nr<ReturnInstr>(M->Nr<VarValue>(accumulator)));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "io.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "list.h"
|
||||
|
||||
|
@ -45,7 +45,7 @@ struct ElementHandler {
|
|||
void doSetup(const std::vector<Value *> &values, SeriesFlow *block,
|
||||
BodiedFunc *parent) {
|
||||
for (auto *v : values) {
|
||||
vars.push_back(util::makeVar(v, block, parent)->getVar());
|
||||
vars.push_back(util::makeVar(v, block, parent));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,7 +226,7 @@ Value *optimize(BodiedFunc *parent, InspectionResult &r) {
|
|||
}
|
||||
|
||||
auto *opt = M->Nr<SeriesFlow>();
|
||||
auto *len = util::makeVar(M->getInt(0), opt, parent)->getVar();
|
||||
auto *len = util::makeVar(M->getInt(0), opt, parent);
|
||||
|
||||
for (auto &h : handlers) {
|
||||
h->setup(opt, parent);
|
||||
|
@ -238,8 +238,7 @@ Value *optimize(BodiedFunc *parent, InspectionResult &r) {
|
|||
|
||||
auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_opt_new", {M->getIntType()});
|
||||
seqassertn(fn, "could not find list new helper");
|
||||
auto *result =
|
||||
util::makeVar(util::call(fn, {M->Nr<VarValue>(len)}), opt, parent)->getVar();
|
||||
auto *result = util::makeVar(util::call(fn, {M->Nr<VarValue>(len)}), opt, parent);
|
||||
|
||||
for (auto &h : handlers) {
|
||||
opt->push_back(h->append(M->Nr<VarValue>(result)));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
|
||||
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
||||
|
||||
#include "str.h"
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue