Compare commits

...

21 Commits

Author SHA1 Message Date
Jonas Neubert dcb41dcfc9
codon build command: add --cir output type option () 2025-04-22 11:46:03 -04:00
A. R. Shajii c1dae7d87d Update OpenBLAS 2025-04-04 14:59:13 -04:00
A. R. Shajii 984974b40d Support CMake 4.0 2025-04-04 11:27:35 -04:00
A. R. Shajii 915cb4e9f0
Support converting bytes object to Codon str () 2025-04-03 10:41:19 -04:00
A. R. Shajii ce5c49edb5 Fix typo in docs and README 2025-04-03 10:39:45 -04:00
A. R. Shajii 59f5bbb73b Bump versions 2025-03-18 10:46:58 -04:00
A. R. Shajii 93fb3d53e3
JIT argument order fix ()
* Fix argument ordering in JIT

* Format

* Update JIT tests

* Fix JIT test
2025-03-18 10:45:34 -04:00
A. R. Shajii b3f6c12d57 Fix 0d array conversions from Python 2025-03-03 11:31:49 -05:00
A. R. Shajii b17d21513d Remove -static-libstdc++ compilation flag 2025-02-18 14:49:45 -05:00
Ibrahim Numanagić d035f1dc97
C-based Cython Backend ()
* Move to C-based Cython backend (to avoid all those C++ ABI issues with std::string)

* Fix CI
2025-02-18 10:22:03 -05:00
A. R. Shajii dc5e5ac7a6 Bump version 2025-02-11 22:04:22 -05:00
A. R. Shajii 01a7503762 Bump version 2025-02-11 17:41:16 -05:00
A. R. Shajii f1ab7116d8 Fix np.pad() casting 2025-02-11 15:49:15 -05:00
A. R. Shajii b58b1ee767 Update OpenMP reduction detection for new ops 2025-02-07 12:04:12 -05:00
A. R. Shajii 56c00d36c2 Add additional int-float operators 2025-02-06 14:11:52 -05:00
A. R. Shajii 4521182aa8 Update np.correlate() 2025-02-04 17:32:54 -05:00
A. R. Shajii 44c59c2a03 Fix artifact names 2025-01-29 20:17:05 -05:00
A. R. Shajii 15c43eb94e Publish to PyPI in workflow 2025-01-29 15:52:50 -05:00
A. R. Shajii b8c1eeed36
2025 updates ()
* 2025 updates

* Update ci.yml
2025-01-29 15:41:43 -05:00
A. R. Shajii d13d6a58e3 Fix doc subcommand if no path given 2024-11-13 11:30:00 -05:00
Ibrahim Numanagić 37ff25a907
Fix underscore float parsing ()
* Fix underscore float parsing

* Add tests

* Update float parsing

---------

Co-authored-by: A. R. Shajii <ars@ars.me>
2024-10-01 15:35:11 -04:00
422 changed files with 73619 additions and 708 deletions

View File

@ -0,0 +1,3 @@
FROM quay.io/pypa/manylinux2014_aarch64
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -0,0 +1,5 @@
name: manylinux build (aarch64)
description: Builds Codon on manylinux (aarch64)
runs:
using: docker
image: Dockerfile

View File

@ -4,13 +4,12 @@ set -e
# setup
cd /github/workspace
yum -y update
yum -y install python3 python3-devel
yum -y install python3 python3-devel gcc-gfortran
# env
export PYTHONPATH=$(pwd)/test/python
export CODON_PYTHON=$(python3 test/python/find-python-library.py)
python3 -m pip install -Iv pip==21.3.1
python3 -m pip install numpy
python3 -m pip install -Iv pip==21.3.1 numpy==1.17.5
# deps
if [ ! -d ./llvm ]; then
@ -22,6 +21,7 @@ mkdir build
export CC="$(pwd)/llvm/bin/clang"
export CXX="$(pwd)/llvm/bin/clang++"
export LLVM_DIR=$(llvm/bin/llvm-config --cmakedir)
export CODON_SYSTEM_LIBRARIES=/usr/lib64
(cd build && cmake .. -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX})
@ -44,6 +44,7 @@ build/codon_test
# package
export CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake \
codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
du -sh codon-deploy

View File

@ -0,0 +1,5 @@
name: manylinux build (x86_64)
description: Builds Codon on manylinux (x86_64)
runs:
using: docker
image: Dockerfile

View File

@ -0,0 +1,50 @@
#!/bin/sh -l
set -e
# setup
cd /github/workspace
yum -y update
yum -y install python3 python3-devel gcc-gfortran
# env
export PYTHONPATH=$(pwd)/test/python
export CODON_PYTHON=$(python3 test/python/find-python-library.py)
python3 -m pip install -Iv pip==21.3.1 numpy==1.17.5
# deps
if [ ! -d ./llvm ]; then
/bin/bash scripts/deps.sh 2;
fi
# build
mkdir build
export CC="$(pwd)/llvm/bin/clang"
export CXX="$(pwd)/llvm/bin/clang++"
export LLVM_DIR=$(llvm/bin/llvm-config --cmakedir)
export CODON_SYSTEM_LIBRARIES=/usr/lib64
(cd build && cmake .. -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX})
cmake --build build --config Release -- VERBOSE=1
cmake --install build --prefix=codon-deploy
# build cython
export PATH=$PATH:$(pwd)/llvm/bin
python3 -m pip install cython wheel astunparse
(cd codon-deploy/python && python3 setup.py sdist)
CODON_DIR=$(pwd)/codon-deploy python3 -m pip install -v codon-deploy/python/dist/*.gz
python3 test/python/cython_jit.py
# test
export LD_LIBRARY_PATH=$(pwd)/build:$LD_LIBRARY_PATH
export PYTHONPATH=$(pwd):$PYTHONPATH
export CODON_PATH=$(pwd)/stdlib
ln -s build/libcodonrt.so .
build/codon_test
# package
export CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake \
codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
du -sh codon-deploy

View File

@ -1,5 +0,0 @@
name: manylinux build
description: Builds Codon on manylinux
runs:
using: docker
image: Dockerfile

View File

@ -26,7 +26,12 @@ jobs:
uses: ncipollo/release-action@v1
manylinux:
runs-on: ubuntu-latest
strategy:
matrix:
arch:
- x86_64
# - aarch64
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-arm-latest' || 'ubuntu-latest' }}
name: Codon CI (manylinux)
needs: create_release
permissions:
@ -39,10 +44,15 @@ jobs:
uses: actions/cache@v4
with:
path: llvm
key: manylinux-llvm
key: manylinux-${{ matrix.arch }}-llvm
- name: Main
uses: ./.github/actions/build-manylinux
- name: Main x86_64
if: matrix.arch == 'x86_64'
uses: ./.github/actions/build-manylinux-x86_64
- name: Main aarch64
if: matrix.arch == 'aarch64'
uses: ./.github/actions/build-manylinux-aarch64
- name: Upload Release Asset
if: contains(github.ref, 'tags/v')
@ -66,7 +76,8 @@ jobs:
matrix:
os:
- ubuntu-latest
- macos-12
- macos-latest
# - ubuntu-arm-latest
runs-on: ${{ matrix.os }}
name: Codon CI
needs: create_release
@ -79,23 +90,49 @@ jobs:
with:
python-version: '3.9'
- name: Linux Setup
if: startsWith(matrix.os, 'ubuntu')
- name: x86_64 Linux Setup
if: startsWith(matrix.os, 'ubuntu') && matrix.os != 'ubuntu-arm-latest'
run: |
sudo apt update
sudo apt install -y gfortran libgfortran5 lsb-release wget software-properties-common gnupg
wget https://apt.llvm.org/llvm.sh
sudo chmod +x llvm.sh
sudo ./llvm.sh 17
echo "LIBEXT=so" >> $GITHUB_ENV
echo "OS_NAME=linux" >> $GITHUB_ENV
echo "CODON_SYSTEM_LIBRARIES=/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
echo "CC=clang-17" >> $GITHUB_ENV
echo "CXX=clang++-17" >> $GITHUB_ENV
- name: Arm Linux Setup
if: matrix.os == 'ubuntu-arm-latest'
run: |
sudo apt update
sudo apt install -y gfortran libgfortran5 lsb-release wget software-properties-common gnupg
wget https://apt.llvm.org/llvm.sh
sudo chmod +x llvm.sh
sudo ./llvm.sh 17
echo "LIBEXT=so" >> $GITHUB_ENV
echo "OS_NAME=linux" >> $GITHUB_ENV
echo "CODON_SYSTEM_LIBRARIES=/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
echo "CC=clang-17" >> $GITHUB_ENV
echo "CXX=clang++-17" >> $GITHUB_ENV
- name: macOS Setup
if: startsWith(matrix.os, 'macos')
run: |
brew install automake
echo "LIBEXT=dylib" >> $GITHUB_ENV
echo "OS_NAME=osx" >> $GITHUB_ENV
echo "CODON_SYSTEM_LIBRARIES=$(brew --prefix gcc)/lib/gcc/current" >> $GITHUB_ENV
echo "CC=clang" >> $GITHUB_ENV
echo "CXX=clang++" >> $GITHUB_ENV
echo "FC=gfortran-12" >> $GITHUB_ENV
- name: Set up Python
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install numpy cython wheel astunparse
python -m pip install cython wheel astunparse
python -m pip install --force-reinstall -v "numpy==1.26.4"
which python
which pip
echo "CODON_PYTHON=$(python test/python/find-python-library.py)" >> $GITHUB_ENV
@ -105,14 +142,11 @@ jobs:
uses: actions/cache@v4
with:
path: llvm
key: ${{ runner.os }}-llvm
key: ${{ runner.os }}-${{ matrix.os }}-llvm
- name: Build Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
run: ./scripts/deps.sh 2
env:
CC: clang
CXX: clang++
- name: Build
run: |
@ -123,18 +157,12 @@ jobs:
-DCMAKE_CXX_COMPILER=${CXX})
cmake --build build --config Release -- VERBOSE=1
cmake --install build --prefix=codon-deploy
env:
CC: clang
CXX: clang++
- name: Build Cython
run: |
(cd codon-deploy/python && python3 setup.py sdist)
CODON_DIR=$(pwd)/codon-deploy python -m pip install -v codon-deploy/python/dist/*.gz
python test/python/cython_jit.py
env:
CC: clang
CXX: clang++
CODON_PATH=$(pwd)/codon-deploy/lib/codon/stdlib python test/python/cython_jit.py
- name: Test
run: |
@ -151,10 +179,15 @@ jobs:
run: |
echo "CODON_BUILD_ARCHIVE=codon-$(uname -s | awk '{print tolower($0)}')-$(uname -m).tar.gz" >> $GITHUB_ENV
- name: Codesign (macOS)
if: startsWith(matrix.os, 'macos')
run: |
codesign -f -s - codon-deploy/bin/codon codon-deploy/lib/codon/*.dylib
- name: Prepare Artifacts
run: |
cp -rf codon-deploy/python/dist .
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon.egg-info codon-deploy/python/dist codon-deploy/python/build
rm -rf codon-deploy/lib/libfmt.a codon-deploy/lib/pkgconfig codon-deploy/lib/cmake codon-deploy/python/codon_jit.egg-info codon-deploy/python/build
tar -czf ${CODON_BUILD_ARCHIVE} codon-deploy
du -sh codon-deploy
@ -165,24 +198,31 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create_release.outputs.upload_url }}
asset_path: ./codon-darwin-x86_64.tar.gz
asset_name: codon-darwin-x86_64.tar.gz
asset_path: ./codon-darwin-arm64.tar.gz
asset_name: codon-darwin-arm64.tar.gz
asset_content_type: application/gzip
- name: Upload Artifacts
if: startsWith(matrix.os, 'macos')
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-x86_64
path: codon-darwin-x86_64.tar.gz
name: ${{ matrix.os }}-arm64
path: codon-darwin-arm64.tar.gz
- name: Upload Artifacts
if: startsWith(matrix.os, 'ubuntu')
if: startsWith(matrix.os, 'ubuntu') && matrix.os != 'ubuntu-arm-latest'
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-x86_64
path: codon-linux-x86_64.tar.gz
# - name: Publish Package
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && startsWith(matrix.os, 'ubuntu')
# uses: pypa/gh-action-pypi-publish@release/v1
- name: Upload Artifacts
if: matrix.os == 'ubuntu-arm-latest'
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-arm64
path: codon-linux-arm64.tar.gz
- name: Publish Package
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && startsWith(matrix.os, 'ubuntu')
uses: pypa/gh-action-pypi-publish@release/v1

View File

@ -1,10 +1,10 @@
cmake_minimum_required(VERSION 3.14)
project(
Codon
VERSION "0.17.0"
VERSION "0.18.2"
HOMEPAGE_URL "https://github.com/exaloop/codon"
DESCRIPTION "high-performance, extensible Python compiler")
set(CODON_JIT_PYTHON_VERSION "0.2.0")
set(CODON_JIT_PYTHON_VERSION "0.3.2")
configure_file("${PROJECT_SOURCE_DIR}/cmake/config.h.in"
"${PROJECT_SOURCE_DIR}/codon/config/config.h")
configure_file("${PROJECT_SOURCE_DIR}/cmake/config.py.in"
@ -48,10 +48,8 @@ include(${CMAKE_SOURCE_DIR}/cmake/deps.cmake)
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
if(APPLE)
set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/../lib/codon")
set(STATIC_LIBCPP "")
else()
set(CMAKE_INSTALL_RPATH "$ORIGIN:$ORIGIN/../lib/codon")
set(STATIC_LIBCPP "-static-libstdc++")
endif()
add_executable(peg2cpp codon/util/peg2cpp.cpp)
@ -73,17 +71,72 @@ set(CODON_JUPYTER_FILES codon/util/jupyter.h codon/util/jupyter.cpp)
add_library(codon_jupyter SHARED ${CODON_JUPYTER_FILES})
# Codon runtime library
add_library(codonfloat STATIC
codon/runtime/floatlib/extenddftf2.c
codon/runtime/floatlib/fp_trunc.h
codon/runtime/floatlib/truncdfhf2.c
codon/runtime/floatlib/extendhfsf2.c
codon/runtime/floatlib/int_endianness.h
codon/runtime/floatlib/truncdfsf2.c
codon/runtime/floatlib/extendhftf2.c
codon/runtime/floatlib/int_lib.h
# codon/runtime/floatlib/truncsfbf2.c
codon/runtime/floatlib/extendsfdf2.c
codon/runtime/floatlib/int_math.h
codon/runtime/floatlib/truncsfhf2.c
codon/runtime/floatlib/extendsftf2.c
codon/runtime/floatlib/int_types.h
codon/runtime/floatlib/trunctfdf2.c
codon/runtime/floatlib/fp_extend.h
codon/runtime/floatlib/int_util.h
codon/runtime/floatlib/trunctfhf2.c
codon/runtime/floatlib/fp_lib.h
# codon/runtime/floatlib/truncdfbf2.c
codon/runtime/floatlib/trunctfsf2.c)
target_compile_options(codonfloat PRIVATE -O3)
target_compile_definitions(codonfloat PRIVATE COMPILER_RT_HAS_FLOAT16)
set(CODONRT_FILES codon/runtime/lib.h codon/runtime/lib.cpp
codon/runtime/re.cpp codon/runtime/exc.cpp
codon/runtime/gpu.cpp)
codon/runtime/gpu.cpp codon/runtime/numpy/sort.cpp
codon/runtime/numpy/loops.cpp codon/runtime/numpy/zmath.cpp)
add_library(codonrt SHARED ${CODONRT_FILES})
add_dependencies(codonrt zlibstatic gc backtrace bz2 liblzma re2 fast_float)
add_dependencies(codonrt zlibstatic gc backtrace bz2 liblzma
re2 hwy hwy_contrib fast_float codonfloat)
if(DEFINED ENV{CODON_SYSTEM_LIBRARIES})
if(APPLE)
set(copied_libgfortran "${CMAKE_BINARY_DIR}/libgfortran.5${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(copied_libquadmath "${CMAKE_BINARY_DIR}/libquadmath.0${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(copied_libgcc "${CMAKE_BINARY_DIR}/libgcc_s.1.1${CMAKE_SHARED_LIBRARY_SUFFIX}")
else()
set(copied_libgfortran "${CMAKE_BINARY_DIR}/libgfortran${CMAKE_SHARED_LIBRARY_SUFFIX}.5")
set(copied_libquadmath "${CMAKE_BINARY_DIR}/libquadmath${CMAKE_SHARED_LIBRARY_SUFFIX}.0")
set(copied_libgcc "${CMAKE_BINARY_DIR}/libgcc_s${CMAKE_SHARED_LIBRARY_SUFFIX}.1")
endif()
add_custom_command(
OUTPUT ${copied_libgfortran}
DEPENDS "${CMAKE_SOURCE_DIR}/scripts/get_system_libs.sh"
COMMAND ${CMAKE_SOURCE_DIR}/scripts/get_system_libs.sh "$ENV{CODON_SYSTEM_LIBRARIES}" ${CMAKE_BINARY_DIR}
COMMENT "Copying system libraries to build directory")
add_custom_target(copy_libraries ALL DEPENDS ${copied_libgfortran})
add_dependencies(codonrt copy_libraries)
add_library(libgfortran SHARED IMPORTED)
set_target_properties(libgfortran PROPERTIES IMPORTED_LOCATION ${copied_libgfortran})
target_link_libraries(codonrt PRIVATE libgfortran)
else()
message(FATAL_ERROR "Set 'CODON_SYSTEM_LIBRARIES' to the directory containing system libraries.")
endif()
target_include_directories(codonrt PRIVATE ${backtrace_SOURCE_DIR}
${re2_SOURCE_DIR}
${highway_SOURCE_DIR}
"${gc_SOURCE_DIR}/include"
"${fast_float_SOURCE_DIR}/include" runtime)
target_link_libraries(codonrt PRIVATE fmt omp backtrace ${STATIC_LIBCPP}
LLVMSupport)
target_link_libraries(codonrt PRIVATE fmt omp backtrace LLVMSupport)
if(APPLE)
target_link_libraries(
codonrt
@ -91,13 +144,19 @@ if(APPLE)
-Wl,-force_load,$<TARGET_FILE:gc>
-Wl,-force_load,$<TARGET_FILE:bz2>
-Wl,-force_load,$<TARGET_FILE:liblzma>
-Wl,-force_load,$<TARGET_FILE:re2>)
-Wl,-force_load,$<TARGET_FILE:re2>
-Wl,-force_load,$<TARGET_FILE:hwy>
-Wl,-force_load,$<TARGET_FILE:hwy_contrib>
-Wl,-force_load,$<TARGET_FILE:codonfloat>)
target_link_libraries(codonrt PUBLIC "-framework Accelerate")
else()
add_dependencies(codonrt openblas)
target_link_libraries(
codonrt
PRIVATE -Wl,--whole-archive $<TARGET_FILE:zlibstatic> $<TARGET_FILE:gc>
$<TARGET_FILE:bz2> $<TARGET_FILE:liblzma> $<TARGET_FILE:re2>
-Wl,--no-whole-archive)
$<TARGET_FILE:openblas> $<TARGET_FILE:hwy> $<TARGET_FILE:hwy_contrib>
$<TARGET_FILE:codonfloat> -Wl,--no-whole-archive)
endif()
if(ASAN)
target_compile_options(
@ -173,6 +232,10 @@ set(CODON_HPPFILES
codon/cir/llvm/gpu.h
codon/cir/llvm/llvisitor.h
codon/cir/llvm/llvm.h
codon/cir/llvm/native/native.h
codon/cir/llvm/native/targets/aarch64.h
codon/cir/llvm/native/targets/target.h
codon/cir/llvm/native/targets/x86.h
codon/cir/llvm/optimize.h
codon/cir/module.h
codon/cir/pyextension.h
@ -187,6 +250,7 @@ set(CODON_HPPFILES
codon/cir/transform/folding/rule.h
codon/cir/transform/lowering/imperative.h
codon/cir/transform/lowering/pipeline.h
codon/cir/transform/numpy/numpy.h
codon/cir/transform/manager.h
codon/cir/transform/parallel/openmp.h
codon/cir/transform/parallel/schedule.h
@ -283,6 +347,9 @@ set(CODON_CPPFILES
codon/cir/instr.cpp
codon/cir/llvm/gpu.cpp
codon/cir/llvm/llvisitor.cpp
codon/cir/llvm/native/native.cpp
codon/cir/llvm/native/targets/aarch64.cpp
codon/cir/llvm/native/targets/x86.cpp
codon/cir/llvm/optimize.cpp
codon/cir/module.cpp
codon/cir/transform/cleanup/canonical.cpp
@ -294,6 +361,9 @@ set(CODON_CPPFILES
codon/cir/transform/folding/folding.cpp
codon/cir/transform/lowering/imperative.cpp
codon/cir/transform/lowering/pipeline.cpp
codon/cir/transform/numpy/expr.cpp
codon/cir/transform/numpy/forward.cpp
codon/cir/transform/numpy/numpy.cpp
codon/cir/transform/manager.cpp
codon/cir/transform/parallel/openmp.cpp
codon/cir/transform/parallel/schedule.cpp
@ -362,11 +432,7 @@ llvm_map_components_to_libnames(
TransformUtils
Vectorize
Passes)
if(APPLE)
target_link_libraries(codonc PRIVATE ${LLVM_LIBS} fmt dl codonrt)
else()
target_link_libraries(codonc PRIVATE ${STATIC_LIBCPP} ${LLVM_LIBS} fmt dl codonrt)
endif()
target_link_libraries(codonc PRIVATE ${LLVM_LIBS} fmt dl codonrt)
# Gather headers
add_custom_target(
@ -399,18 +465,24 @@ add_custom_target(
COMMAND
${CMAKE_COMMAND} -E copy
"${CMAKE_BINARY_DIR}/libomp${CMAKE_SHARED_LIBRARY_SUFFIX}"
"${CMAKE_BINARY_DIR}/lib/codon")
"${CMAKE_BINARY_DIR}/lib/codon"
COMMAND
${CMAKE_COMMAND} -E copy ${copied_libgfortran} "${CMAKE_BINARY_DIR}/lib/codon"
COMMAND
${CMAKE_COMMAND} -E copy ${copied_libquadmath} "${CMAKE_BINARY_DIR}/lib/codon"
COMMAND
${CMAKE_COMMAND} -E copy ${copied_libgcc} "${CMAKE_BINARY_DIR}/lib/codon")
add_dependencies(libs codonrt codonc)
# Codon command-line tool
add_executable(codon codon/app/main.cpp)
target_link_libraries(codon PUBLIC ${STATIC_LIBCPP} fmt codonc codon_jupyter Threads::Threads)
target_link_libraries(codon PUBLIC fmt codonc codon_jupyter Threads::Threads)
# Codon test Download and unpack googletest at configure time
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
@ -442,6 +514,9 @@ target_compile_definitions(codon_test
install(TARGETS codonrt codonc codon_jupyter DESTINATION lib/codon)
install(FILES ${CMAKE_BINARY_DIR}/libomp${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib/codon)
install(FILES ${copied_libgfortran} DESTINATION lib/codon)
install(FILES ${copied_libquadmath} DESTINATION lib/codon)
install(FILES ${copied_libgcc} DESTINATION lib/codon)
install(TARGETS codon DESTINATION bin)
install(DIRECTORY ${CMAKE_BINARY_DIR}/include/codon DESTINATION include)
install(DIRECTORY ${CMAKE_SOURCE_DIR}/stdlib DESTINATION lib/codon)

240
LICENSE
View File

@ -1,91 +1,201 @@
Business Source License 1.1
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
"Business Source License" is a trademark of MariaDB Corporation Ab.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-----------------------------------------------------------------------------
1. Definitions.
Parameters
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
Licensor: Exaloop, Inc.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
Licensed Work: Codon compiler, runtime, and standard library
The Licensed Work is (c) 2022-2024 Exaloop Inc.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
Additional Use Grant: None
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
Change Date: 2028-03-01
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
Change License: Apache License, Version 2.0
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
For information about alternative licensing arrangements for the Software,
please visit: https://exaloop.io/
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
-----------------------------------------------------------------------------
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
Terms
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited
production use.
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
MariaDB hereby grants you permission to use this License's text to license
your works, and to refer to it using the trademark "Business Source License",
as long as you comply with the Covenants of Licensor below.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
Covenants of Licensor
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
In consideration of the right to use this License's text and the "Business
Source License" name and trademark, Licensor covenants to MariaDB, and to all
other recipients of the licensed work to be provided by Licensor:
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
1. To specify as the Change License the GPL Version 2.0 or any later version,
or a license that is compatible with GPL Version 2.0 or a later version,
where "compatible" means that software provided under the Change License can
be included in a program with software provided under GPL Version 2.0 or a
later version. Licensor may specify additional Change Licenses without
limitation.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
2. To either: (a) specify an additional grant of rights to use that does not
impose any additional restriction on the right granted in this License, as
the Additional Use Grant; or (b) insert the text "None".
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
3. To specify a Change Date.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
4. Not to modify this License in any other way.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

149
README.md
View File

@ -1,19 +1,19 @@
<p align="center">
<img src="docs/img/codon.png?raw=true" width="600" alt="Codon"/>
</p>
<h1 align="center">
<img src="docs/img/codon-banner.svg" alt="Codon banner"/>
</h1>
<h3 align="center">
<a href="https://docs.exaloop.io/codon" target="_blank"><b>Docs</b></a>
&nbsp;&#183;&nbsp;
<a href="https://docs.exaloop.io/codon/general/faq" target="_blank"><b>FAQ</b></a>
&nbsp;&#183;&nbsp;
<a href="https://blog.exaloop.io" target="_blank"><b>Blog</b></a>
<a href="https://exaloop.io/blog" target="_blank"><b>Blog</b></a>
&nbsp;&#183;&nbsp;
<a href="https://join.slack.com/t/exaloop/shared_invite/zt-1jusa4kc0-T3rRWrrHDk_iZ1dMS8s0JQ" target="_blank">Chat</a>
&nbsp;&#183;&nbsp;
<a href="https://docs.exaloop.io/codon/general/roadmap" target="_blank">Roadmap</a>
&nbsp;&#183;&nbsp;
<a href="https://exaloop.io/benchmarks" target="_blank">Benchmarks</a>
<a href="https://exaloop.io/#benchmarks" target="_blank">Benchmarks</a>
</h3>
<a href="https://github.com/exaloop/codon/actions/workflows/ci.yml">
@ -21,7 +21,7 @@
alt="Build Status">
</a>
## What is Codon?
# What is Codon?
Codon is a high-performance Python implementation that compiles to native machine code without
any runtime overhead. Typical speedups over vanilla Python are on the order of 10-100x or more, on
@ -32,7 +32,7 @@ higher still.
*Think of Codon as Python reimagined for static, ahead-of-time compilation, built from the ground
up with best possible performance in mind.*
### Goals
## Goals
- :bulb: **No learning curve:** Be as close to CPython as possible in terms of syntax, semantics and libraries
- :rocket: **Top-notch performance:** At *least* on par with low-level languages like C, C++ or Rust
@ -41,7 +41,7 @@ up with best possible performance in mind.*
and libraries
- :battery: **Interoperability:** Full interoperability with Python's ecosystem of packages and libraries
### Non-goals
## Non-goals
- :x: *Drop-in replacement for CPython:* Codon is not a drop-in replacement for CPython. There are some
aspects of Python that are not suitable for static compilation — we don't support these in Codon.
@ -54,55 +54,62 @@ up with best possible performance in mind.*
features as much as possible. While Codon does add some new syntax in a couple places (e.g. to express
parallelism), we try to make it as familiar and intuitive as possible.
## Install
## How it works
Pre-built binaries for Linux (x86_64) and macOS (x86_64 and arm64) are available alongside [each release](https://github.com/exaloop/codon/releases).
Download and install with:
<p align="center">
<img src="docs/img/codon-figure.svg" width="90%" alt="Codon figure"/>
</p>
# Quick start
Download and install Codon with this command:
```bash
/bin/bash -c "$(curl -fsSL https://exaloop.io/install.sh)"
```
Or you can [build from source](https://docs.exaloop.io/codon/advanced/build).
After following the prompts, the `codon` command will be available to use. For example:
## Examples
- To run a program: `codon run file.py`
- To run a program with optimizations enabled: `codon run -release file.py`
- To compile to an executable: `codon build -release file.py`
- To generate LLVM IR: `codon build -release -llvm file.py`
Codon is a Python-compatible language, and many Python programs will work with few if any modifications:
Many more options are available and described in [the docs](https://docs.exaloop.io/codon/general/intro).
Alternatively, you can [build from source](https://docs.exaloop.io/codon/advanced/build).
# Examples
## Basics
Codon supports much of Python, and many Python programs will work with few if any modifications.
Here's a simple script `fib.py` that computes the 40th Fibonacci number...
``` python
from time import time
```python
def fib(n):
a, b = 0, 1
while a < n:
print(a, end=' ')
a, b = b, a+b
print()
fib(1000)
return n if n < 2 else fib(n - 1) + fib(n - 2)
t0 = time()
ans = fib(40)
t1 = time()
print(f'Computed fib(40) = {ans} in {t1 - t0} seconds.')
```
The `codon` compiler has a number of options and modes:
... run through Python and Codon:
```bash
# compile and run the program
codon run fib.py
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
# compile and run the program with optimizations enabled
codon run -release fib.py
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
# compile to executable with optimizations enabled
codon build -release -exe fib.py
./fib
# 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987
# compile to LLVM IR file with optimizations enabled
codon build -release -llvm fib.py
# outputs file fib.ll
```
$ python3 fib.py
Computed fib(40) = 102334155 in 17.979357957839966 seconds.
$ codon run -release fib.py
Computed fib(40) = 102334155 in 0.275645 seconds.
```
See [the docs](https://docs.exaloop.io/codon/general/intro) for more options and examples.
## Using Python libraries
You can import and use any Python package from Codon. For example:
You can import and use any Python package from Codon via `from python import`. For example:
```python
from python import matplotlib.pyplot as plt
@ -112,11 +119,13 @@ plt.show()
```
(Just remember to set the `CODON_PYTHON` environment variable to the CPython shared library,
as explained in the [the docs](https://docs.exaloop.io/codon/interoperability/python).)
as explained in the [the Python interoperability docs](https://docs.exaloop.io/codon/interoperability/python).)
This prime counting example showcases Codon's [OpenMP](https://www.openmp.org/) support, enabled
with the addition of one line. The `@par` annotation tells the compiler to parallelize the
following `for`-loop, in this case using a dynamic schedule, chunk size of 100, and 16 threads.
## Parallelism
Codon supports native multithreading via [OpenMP](https://www.openmp.org/). The `@par` annotation
in the code below tells the compiler to parallelize the following `for`-loop, in this case using
a dynamic schedule, chunk size of 100, and 16 threads.
```python
from sys import argv
@ -139,7 +148,10 @@ for i in range(2, limit):
print(total)
```
Codon supports writing and executing GPU kernels. Here's an example that computes the
Note that Codon automatically turns the `total += 1` statement in the loop body into an atomic
reduction to avoid race conditions. Learn more in the [multithreading docs](https://docs.exaloop.io/codon/advanced/parallel).
Codon also supports writing and executing GPU kernels. Here's an example that computes the
[Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set):
```python
@ -169,8 +181,47 @@ def mandelbrot(pixels):
mandelbrot(pixels, grid=(N*N)//1024, block=1024)
```
GPU programming can also be done using the `@par` syntax with `@par(gpu=True)`.
GPU programming can also be done using the `@par` syntax with `@par(gpu=True)`. See the
[GPU programming docs](https://docs.exaloop.io/codon/advanced/gpu) for more details.
## Documentation
## NumPy support
Please see [docs.exaloop.io](https://docs.exaloop.io/codon) for in-depth documentation.
Codon includes a feature-complete, fully-compiled native NumPy implementation. It uses the same
API as NumPy, but re-implements everything in Codon itself, allowing for a range of optimizations
and performance improvements.
Here's an example NumPy program that approximates $\pi$ using random numbers...
``` python
import time
import numpy as np
rng = np.random.default_rng(seed=0)
x = rng.random(500_000_000)
y = rng.random(500_000_000)
t0 = time.time()
# pi ~= 4 x (fraction of points in circle)
pi = ((x-1)**2 + (y-1)**2 < 1).sum() * (4 / len(x))
t1 = time.time()
print(f'Computed pi~={pi:.4f} in {t1 - t0:.2f} sec')
```
... run through Python and Codon:
```
$ python3 pi.py
Computed pi~=3.1417 in 2.25 sec
$ codon run -release pi.py
Computed pi~=3.1417 in 0.43 sec
```
Codon can speed up NumPy code through general-purpose and NumPy-specific compiler optimizations,
including inlining, fusion, memory allocation elision and more. Furthermore, Codon's NumPy
implementation works with its multithreading and GPU capabilities, and can even integrate with
[PyTorch](https://pytorch.org). Learn more in the [Codon-NumPy docs](https://docs.exaloop.io/codon/interoperability/numpy).
# Documentation
Please see [docs.exaloop.io](https://docs.exaloop.io) for in-depth documentation.

View File

@ -1,8 +1,8 @@
set(CPM_DOWNLOAD_VERSION 0.32.3)
set(CPM_DOWNLOAD_VERSION 0.40.8)
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
message(STATUS "Downloading CPM.cmake...")
file(DOWNLOAD https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION})
file(DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION})
endif()
include(${CPM_DOWNLOAD_LOCATION})
@ -77,9 +77,9 @@ endif()
CPMAddPackage(
NAME bdwgc
GITHUB_REPOSITORY "ivmai/bdwgc"
GITHUB_REPOSITORY "exaloop/bdwgc"
VERSION 8.0.5
GIT_TAG d0ba209660ea8c663e06d9a68332ba5f42da54ba
GIT_TAG e16c67244aff26802203060422545d38305e0160
EXCLUDE_FROM_ALL YES
OPTIONS "CMAKE_POSITION_INDEPENDENT_CODE ON"
"BUILD_SHARED_LIBS OFF"
@ -163,3 +163,28 @@ CPMAddPackage(
GITHUB_REPOSITORY "fastfloat/fast_float"
GIT_TAG v6.1.1
EXCLUDE_FROM_ALL YES)
if(NOT APPLE)
enable_language(Fortran)
CPMAddPackage(
NAME openblas
GITHUB_REPOSITORY "OpenMathLib/OpenBLAS"
GIT_TAG v0.3.29
EXCLUDE_FROM_ALL YES
OPTIONS "DYNAMIC_ARCH ON"
"BUILD_TESTING OFF"
"BUILD_BENCHMARKS OFF"
"NUM_THREADS 64"
"CCOMMON_OPT -O3")
endif()
CPMAddPackage(
NAME highway
GITHUB_REPOSITORY "google/highway"
GIT_TAG 1.2.0
EXCLUDE_FROM_ALL YES
OPTIONS "HWY_ENABLE_CONTRIB ON"
"HWY_ENABLE_EXAMPLES OFF"
"HWY_ENABLE_INSTALL OFF"
"HWY_ENABLE_TESTS OFF"
"BUILD_TESTING OFF")

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include <algorithm>
#include <cstdio>
@ -11,6 +11,7 @@
#include <unordered_map>
#include <vector>
#include "codon/cir/util/format.h"
#include "codon/compiler/compiler.h"
#include "codon/compiler/error.h"
#include "codon/compiler/jit.h"
@ -87,7 +88,7 @@ void initLogFlags(const llvm::cl::opt<std::string> &log) {
codon::getLogger().parse(std::string(d));
}
enum BuildKind { LLVM, Bitcode, Object, Executable, Library, PyExtension, Detect };
enum BuildKind { LLVM, Bitcode, Object, Executable, Library, PyExtension, Detect, CIR };
enum OptMode { Debug, Release };
enum Numerics { C, Python };
} // namespace
@ -121,7 +122,8 @@ int docMode(const std::vector<const char *> &args, const std::string &argv0) {
}
};
collectPaths(args[1]);
if (args.size() > 1)
collectPaths(args[1]);
auto compiler = std::make_unique<codon::Compiler>(args[0]);
bool failed = false;
auto result = compiler->docgen(files);
@ -332,6 +334,7 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
clEnumValN(Executable, "exe", "Generate executable"),
clEnumValN(Library, "lib", "Generate shared library"),
clEnumValN(PyExtension, "pyext", "Generate Python extension module"),
clEnumValN(CIR, "cir", "Generate Codon Intermediate Representation"),
clEnumValN(Detect, "detect",
"Detect output type based on output file extension")),
llvm::cl::init(Detect));
@ -371,6 +374,9 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
case BuildKind::Detect:
extension = "";
break;
case BuildKind::CIR:
extension = ".cir";
break;
default:
seqassertn(0, "unknown build kind");
}
@ -400,6 +406,11 @@ int buildMode(const std::vector<const char *> &args, const std::string &argv0) {
compiler->getLLVMVisitor()->writeToPythonExtension(*compiler->getCache()->pyModule,
filename);
break;
case BuildKind::CIR: {
std::ofstream out(filename);
codon::ir::util::format(out, compiler->getModule());
break;
}
case BuildKind::Detect:
compiler->getLLVMVisitor()->compile(filename, argv0, libsVec, lflags);
break;

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "analysis.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "capture.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "cfg.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "dominator.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "reaching.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "global_vars.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "side_effect.h"
@ -293,7 +293,7 @@ struct SideEfectAnalyzer : public util::ConstVisitor {
}
void visit(const CallInstr *v) override {
auto s = Status::PURE;
auto s = process(v->getCallee());
auto callStatus = Status::UNKNOWN;
for (auto *x : *v) {
s = max(s, process(x));
@ -303,7 +303,6 @@ struct SideEfectAnalyzer : public util::ConstVisitor {
s = max(s, callStatus);
} else {
// unknown function
process(v->getCallee());
s = Status::UNKNOWN;
}
set(v, s, callStatus);

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "attribute.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "base.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "const.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "nodes.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "flow.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "func.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "instr.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "gpu.h"
@ -6,6 +6,7 @@
#include <memory>
#include <string>
#include "codon/cir/llvm/optimize.h"
#include "codon/util/common.h"
namespace codon {
@ -204,6 +205,139 @@ llvm::Function *makeNoOp(llvm::Function *F) {
using Codegen =
std::function<void(llvm::IRBuilder<> &, const std::vector<llvm::Value *> &)>;
void codegenVectorizedUnaryLoop(llvm::IRBuilder<> &B,
const std::vector<llvm::Value *> &args,
llvm::Function *func) {
// Create IR to represent:
// p_in = in
// p_out = out
// for i in range(n):
// *p_out = func(*p_in)
// p_in += is
// p_out += os
auto &context = B.getContext();
auto *parent = B.GetInsertBlock()->getParent();
auto *ty = func->getReturnType();
auto *in = args[0];
auto *is = args[1];
auto *out = args[2];
auto *os = args[3];
auto *n = args[4];
auto *loop = llvm::BasicBlock::Create(context, "loop", parent);
auto *exit = llvm::BasicBlock::Create(context, "exit", parent);
auto *pinStore = B.CreateAlloca(B.getPtrTy());
auto *poutStore = B.CreateAlloca(B.getPtrTy());
auto *idxStore = B.CreateAlloca(B.getInt64Ty());
// p_in = in
B.CreateStore(in, pinStore);
// p_out = out
B.CreateStore(out, poutStore);
// i = 0
B.CreateStore(B.getInt64(0), idxStore);
// if n > 0: goto loop; else: goto exit
B.CreateCondBr(B.CreateICmpSGT(n, B.getInt64(0)), loop, exit);
// load pointers
B.SetInsertPoint(loop);
auto *pin = B.CreateLoad(B.getPtrTy(), pinStore);
auto *pout = B.CreateLoad(B.getPtrTy(), poutStore);
// y = func(x)
auto *x = B.CreateLoad(ty, pin);
auto *y = B.CreateCall(func, x);
B.CreateStore(y, pout);
auto *idx = B.CreateLoad(B.getInt64Ty(), idxStore);
// i += 1
B.CreateStore(B.CreateAdd(idx, B.getInt64(1)), idxStore);
// p_in += is
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin, is), pinStore);
// p_out += os
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pout, os), poutStore);
idx = B.CreateLoad(B.getInt64Ty(), idxStore);
// if i < n: goto loop; else: goto exit
B.CreateCondBr(B.CreateICmpSLT(idx, n), loop, exit);
B.SetInsertPoint(exit);
B.CreateRet(llvm::UndefValue::get(parent->getReturnType()));
}
void codegenVectorizedBinaryLoop(llvm::IRBuilder<> &B,
const std::vector<llvm::Value *> &args,
llvm::Function *func) {
// Create IR to represent:
// p_in1 = in1
// p_in2 = in2
// p_out = out
// for i in range(n):
// *p_out = func(*p_in1, *p_in2)
// p_in1 += is1
// p_in2 += is2
// p_out += os
auto &context = B.getContext();
auto *parent = B.GetInsertBlock()->getParent();
auto *ty = func->getReturnType();
auto *in1 = args[0];
auto *is1 = args[1];
auto *in2 = args[2];
auto *is2 = args[3];
auto *out = args[4];
auto *os = args[5];
auto *n = args[6];
auto *loop = llvm::BasicBlock::Create(context, "loop", parent);
auto *exit = llvm::BasicBlock::Create(context, "exit", parent);
auto *pin1Store = B.CreateAlloca(B.getPtrTy());
auto *pin2Store = B.CreateAlloca(B.getPtrTy());
auto *poutStore = B.CreateAlloca(B.getPtrTy());
auto *idxStore = B.CreateAlloca(B.getInt64Ty());
// p_in1 = in1
B.CreateStore(in1, pin1Store);
// p_in2 = in2
B.CreateStore(in2, pin2Store);
// p_out = out
B.CreateStore(out, poutStore);
// i = 0
B.CreateStore(B.getInt64(0), idxStore);
// if n > 0: goto loop; else: goto exit
B.CreateCondBr(B.CreateICmpSGT(n, B.getInt64(0)), loop, exit);
// load pointers
B.SetInsertPoint(loop);
auto *pin1 = B.CreateLoad(B.getPtrTy(), pin1Store);
auto *pin2 = B.CreateLoad(B.getPtrTy(), pin2Store);
auto *pout = B.CreateLoad(B.getPtrTy(), poutStore);
// y = func(x1, x2)
auto *x1 = B.CreateLoad(ty, pin1);
auto *x2 = B.CreateLoad(ty, pin2);
auto *y = B.CreateCall(func, {x1, x2});
B.CreateStore(y, pout);
auto *idx = B.CreateLoad(B.getInt64Ty(), idxStore);
// i += 1
B.CreateStore(B.CreateAdd(idx, B.getInt64(1)), idxStore);
// p_in1 += is1
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin1, is1), pin1Store);
// p_in2 += is2
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pin2, is2), pin2Store);
// p_out += os
B.CreateStore(B.CreateGEP(B.getInt8Ty(), pout, os), poutStore);
idx = B.CreateLoad(B.getInt64Ty(), idxStore);
// if i < n: goto loop; else: goto exit
B.CreateCondBr(B.CreateICmpSLT(idx, n), loop, exit);
B.SetInsertPoint(exit);
B.CreateRet(llvm::UndefValue::get(parent->getReturnType()));
}
llvm::Function *makeFillIn(llvm::Function *F, Codegen codegen) {
auto *M = F->getParent();
auto &context = M->getContext();
@ -346,6 +480,13 @@ void remapFunctions(llvm::Module *M) {
B.CreateRet(mem);
}},
{"seq_alloc_uncollectable",
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
auto *M = B.GetInsertBlock()->getModule();
llvm::Value *mem = B.CreateCall(makeMalloc(M), args[0]);
B.CreateRet(mem);
}},
{"seq_alloc_atomic",
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
auto *M = B.GetInsertBlock()->getModule();
@ -353,6 +494,13 @@ void remapFunctions(llvm::Module *M) {
B.CreateRet(mem);
}},
{"seq_alloc_atomic_uncollectable",
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
auto *M = B.GetInsertBlock()->getModule();
llvm::Value *mem = B.CreateCall(makeMalloc(M), args[0]);
B.CreateRet(mem);
}},
{"seq_realloc",
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
auto *M = B.GetInsertBlock()->getModule();
@ -396,6 +544,93 @@ void remapFunctions(llvm::Module *M) {
[](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) {
B.CreateUnreachable();
}},
#define FILLIN_VECLOOP_UNARY32(loop, func) \
{ \
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
auto *M = B.GetInsertBlock()->getModule(); \
auto f = llvm::cast<llvm::Function>( \
M->getOrInsertFunction(func, B.getFloatTy(), B.getFloatTy()).getCallee()); \
f->setWillReturn(); \
codegenVectorizedUnaryLoop(B, args, f); \
} \
}
#define FILLIN_VECLOOP_UNARY64(loop, func) \
{ \
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
auto *M = B.GetInsertBlock()->getModule(); \
auto f = llvm::cast<llvm::Function>( \
M->getOrInsertFunction(func, B.getDoubleTy(), B.getDoubleTy()).getCallee()); \
f->setWillReturn(); \
codegenVectorizedUnaryLoop(B, args, f); \
} \
}
#define FILLIN_VECLOOP_BINARY32(loop, func) \
{ \
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
auto *M = B.GetInsertBlock()->getModule(); \
auto f = llvm::cast<llvm::Function>( \
M->getOrInsertFunction(func, B.getFloatTy(), B.getFloatTy(), B.getFloatTy()) \
.getCallee()); \
f->setWillReturn(); \
codegenVectorizedBinaryLoop(B, args, f); \
} \
}
#define FILLIN_VECLOOP_BINARY64(loop, func) \
{ \
loop, [](llvm::IRBuilder<> &B, const std::vector<llvm::Value *> &args) { \
auto *M = B.GetInsertBlock()->getModule(); \
auto f = llvm::cast<llvm::Function>( \
M->getOrInsertFunction(func, B.getDoubleTy(), B.getDoubleTy(), \
B.getDoubleTy()) \
.getCallee()); \
f->setWillReturn(); \
codegenVectorizedBinaryLoop(B, args, f); \
} \
}
FILLIN_VECLOOP_UNARY64("cnp_acos_float64", "__nv_acos"),
FILLIN_VECLOOP_UNARY64("cnp_acosh_float64", "__nv_acosh"),
FILLIN_VECLOOP_UNARY64("cnp_asin_float64", "__nv_asin"),
FILLIN_VECLOOP_UNARY64("cnp_asinh_float64", "__nv_asinh"),
FILLIN_VECLOOP_UNARY64("cnp_atan_float64", "__nv_atan"),
FILLIN_VECLOOP_UNARY64("cnp_atanh_float64", "__nv_atanh"),
FILLIN_VECLOOP_BINARY64("cnp_atan2_float64", "__nv_atan2"),
FILLIN_VECLOOP_UNARY64("cnp_exp_float64", "__nv_exp"),
FILLIN_VECLOOP_UNARY64("cnp_exp2_float64", "__nv_exp2"),
FILLIN_VECLOOP_UNARY64("cnp_expm1_float64", "__nv_expm1"),
FILLIN_VECLOOP_UNARY64("cnp_log_float64", "__nv_log"),
FILLIN_VECLOOP_UNARY64("cnp_log10_float64", "__nv_log10"),
FILLIN_VECLOOP_UNARY64("cnp_log1p_float64", "__nv_log1p"),
FILLIN_VECLOOP_UNARY64("cnp_log2_float64", "__nv_log2"),
FILLIN_VECLOOP_UNARY64("cnp_sin_float64", "__nv_sin"),
FILLIN_VECLOOP_UNARY64("cnp_sinh_float64", "__nv_sinh"),
FILLIN_VECLOOP_UNARY64("cnp_tan_float64", "__nv_tan"),
FILLIN_VECLOOP_UNARY64("cnp_tanh_float64", "__nv_tanh"),
FILLIN_VECLOOP_BINARY64("cnp_hypot_float64", "__nv_hypot"),
FILLIN_VECLOOP_UNARY32("cnp_acos_float32", "__nv_acosf"),
FILLIN_VECLOOP_UNARY32("cnp_acosh_float32", "__nv_acoshf"),
FILLIN_VECLOOP_UNARY32("cnp_asin_float32", "__nv_asinf"),
FILLIN_VECLOOP_UNARY32("cnp_asinh_float32", "__nv_asinhf"),
FILLIN_VECLOOP_UNARY32("cnp_atan_float32", "__nv_atanf"),
FILLIN_VECLOOP_UNARY32("cnp_atanh_float32", "__nv_atanhf"),
FILLIN_VECLOOP_BINARY32("cnp_atan2_float32", "__nv_atan2f"),
FILLIN_VECLOOP_UNARY32("cnp_exp_float32", "__nv_expf"),
FILLIN_VECLOOP_UNARY32("cnp_exp2_float32", "__nv_exp2f"),
FILLIN_VECLOOP_UNARY32("cnp_expm1_float32", "__nv_expm1f"),
FILLIN_VECLOOP_UNARY32("cnp_log_float32", "__nv_logf"),
FILLIN_VECLOOP_UNARY32("cnp_log10_float32", "__nv_log10f"),
FILLIN_VECLOOP_UNARY32("cnp_log1p_float32", "__nv_log1pf"),
FILLIN_VECLOOP_UNARY32("cnp_log2_float32", "__nv_log2f"),
FILLIN_VECLOOP_UNARY32("cnp_sin_float32", "__nv_sinf"),
FILLIN_VECLOOP_UNARY32("cnp_sinh_float32", "__nv_sinhf"),
FILLIN_VECLOOP_UNARY32("cnp_tan_float32", "__nv_tanf"),
FILLIN_VECLOOP_UNARY32("cnp_tanh_float32", "__nv_tanhf"),
FILLIN_VECLOOP_BINARY32("cnp_hypot_float32", "__nv_hypotf"),
};
for (auto &pair : remapping) {
@ -636,6 +871,11 @@ void applyGPUTransformations(llvm::Module *M, const std::string &ptxFilename) {
clone->setTargetTriple(llvm::Triple::normalize(GPU_TRIPLE));
clone->setDataLayout(GPU_DL);
if (isFastMathOn()) {
clone->addModuleFlag(llvm::Module::ModFlagBehavior::Override, "nvvm-reflect-ftz",
1);
}
llvm::NamedMDNode *nvvmAnno = clone->getOrInsertNamedMetadata("nvvm.annotations");
std::vector<llvm::GlobalValue *> kernels;

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "llvisitor.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -0,0 +1,115 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "native.h"
#include "codon/cir/llvm/llvm.h"
#include "codon/cir/llvm/native/targets/aarch64.h"
#include "codon/cir/llvm/native/targets/x86.h"
namespace codon {
namespace ir {
namespace {
std::unique_ptr<Target> getNativeTarget(const llvm::Triple &triple) {
std::unique_ptr<Target> result = std::unique_ptr<Target>();
switch (triple.getArch()) {
default:
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
// nothing
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
// nothing
break;
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
// nothing
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
// nothing
break;
case llvm::Triple::systemz:
// nothing
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
result = std::make_unique<Aarch64>();
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
result = std::make_unique<X86>();
break;
case llvm::Triple::hexagon:
// nothing
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
// nothing
break;
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
// nothing
break;
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
// nothing
break;
case llvm::Triple::msp430:
// nothing
break;
case llvm::Triple::ve:
// nothing
break;
}
return result;
}
class ArchNativePass : public llvm::PassInfoMixin<ArchNativePass> {
private:
std::string cpu;
std::string features;
public:
explicit ArchNativePass(const std::string &cpu = "", const std::string &features = "")
: cpu(cpu), features(features) {}
llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &) {
if (!cpu.empty())
F.addFnAttr("target-cpu", cpu);
if (!features.empty())
F.addFnAttr("target-features", features);
F.addFnAttr("frame-pointer", "none");
return llvm::PreservedAnalyses::all();
}
};
} // namespace
void addNativeLLVMPasses(llvm::PassBuilder *pb) {
llvm::Triple triple = llvm::EngineBuilder().selectTarget()->getTargetTriple();
auto target = getNativeTarget(triple);
if (!target)
return;
std::string cpu = target->getCPU(triple);
std::string features = target->getFeatures(triple);
pb->registerPipelineEarlySimplificationEPCallback(
[cpu, features](llvm::ModulePassManager &pm, llvm::OptimizationLevel opt) {
pm.addPass(
llvm::createModuleToFunctionPassAdaptor(ArchNativePass(cpu, features)));
});
}
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,13 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
#include "codon/cir/llvm/llvm.h"
namespace codon {
namespace ir {
void addNativeLLVMPasses(llvm::PassBuilder *pb);
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,162 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "aarch64.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
namespace codon {
namespace ir {
namespace {
template <typename T> std::string join(const T &v, const std::string &delim = ",") {
std::ostringstream s;
for (const auto &i : v) {
if (&i != &v[0])
s << delim;
s << std::string(i);
}
return s.str();
}
} // namespace
std::string Aarch64::getCPU(const llvm::Triple &triple) const {
return llvm::sys::getHostCPUName().str();
}
std::string Aarch64::getFeatures(const llvm::Triple &triple) const {
std::vector<llvm::StringRef> features;
// Enable NEON by default.
features.push_back("+neon");
std::string cpu(llvm::sys::getHostCPUName());
const std::optional<llvm::AArch64::CpuInfo> cpuInfo = llvm::AArch64::parseCpu(cpu);
if (!cpuInfo)
return "";
if (cpu == "cyclone" || llvm::StringRef(cpu).startswith("apple")) {
features.push_back("+zcm");
features.push_back("+zcz");
}
auto *archInfo = &cpuInfo->Arch;
features.push_back(archInfo->ArchFeature);
uint64_t extension = cpuInfo->getImpliedExtensions();
if (!llvm::AArch64::getExtensionFeatures(extension, features))
return "";
// Handle (arch-dependent) fp16fml/fullfp16 relationship.
// FIXME: this fp16fml option handling will be reimplemented after the
// TargetParser rewrite.
const auto ItRNoFullFP16 = std::find(features.rbegin(), features.rend(), "-fullfp16");
const auto ItRFP16FML = std::find(features.rbegin(), features.rend(), "+fp16fml");
if (llvm::is_contained(features, "+v8.4a")) {
const auto ItRFullFP16 = std::find(features.rbegin(), features.rend(), "+fullfp16");
if (ItRFullFP16 < ItRNoFullFP16 && ItRFullFP16 < ItRFP16FML) {
// Only entangled feature that can be to the right of this +fullfp16 is -fp16fml.
// Only append the +fp16fml if there is no -fp16fml after the +fullfp16.
if (std::find(features.rbegin(), ItRFullFP16, "-fp16fml") == ItRFullFP16)
features.push_back("+fp16fml");
} else
goto fp16_fml_fallthrough;
} else {
fp16_fml_fallthrough:
// In both of these cases, putting the 'other' feature on the end of the vector will
// result in the same effect as placing it immediately after the current feature.
if (ItRNoFullFP16 < ItRFP16FML)
features.push_back("-fp16fml");
else if (ItRNoFullFP16 > ItRFP16FML)
features.push_back("+fullfp16");
}
// FIXME: this needs reimplementation too after the TargetParser rewrite
//
// Context sensitive meaning of Crypto:
// 1) For Arch >= ARMv8.4a: crypto = sm4 + sha3 + sha2 + aes
// 2) For Arch <= ARMv8.3a: crypto = sha2 + aes
const auto ItBegin = features.begin();
const auto ItEnd = features.end();
const auto ItRBegin = features.rbegin();
const auto ItREnd = features.rend();
const auto ItRCrypto = std::find(ItRBegin, ItREnd, "+crypto");
const auto ItRNoCrypto = std::find(ItRBegin, ItREnd, "-crypto");
const auto HasCrypto = ItRCrypto != ItREnd;
const auto HasNoCrypto = ItRNoCrypto != ItREnd;
const ptrdiff_t PosCrypto = ItRCrypto - ItRBegin;
const ptrdiff_t PosNoCrypto = ItRNoCrypto - ItRBegin;
bool NoCrypto = false;
if (HasCrypto && HasNoCrypto) {
if (PosNoCrypto < PosCrypto)
NoCrypto = true;
}
if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd) {
if (HasCrypto && !NoCrypto) {
// Check if we have NOT disabled an algorithm with something like:
// +crypto, -algorithm
// And if "-algorithm" does not occur, we enable that crypto algorithm.
const bool HasSM4 = (std::find(ItBegin, ItEnd, "-sm4") == ItEnd);
const bool HasSHA3 = (std::find(ItBegin, ItEnd, "-sha3") == ItEnd);
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "-sha2") == ItEnd);
const bool HasAES = (std::find(ItBegin, ItEnd, "-aes") == ItEnd);
if (HasSM4)
features.push_back("+sm4");
if (HasSHA3)
features.push_back("+sha3");
if (HasSHA2)
features.push_back("+sha2");
if (HasAES)
features.push_back("+aes");
} else if (HasNoCrypto) {
// Check if we have NOT enabled a crypto algorithm with something like:
// -crypto, +algorithm
// And if "+algorithm" does not occur, we disable that crypto algorithm.
const bool HasSM4 = (std::find(ItBegin, ItEnd, "+sm4") != ItEnd);
const bool HasSHA3 = (std::find(ItBegin, ItEnd, "+sha3") != ItEnd);
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "+sha2") != ItEnd);
const bool HasAES = (std::find(ItBegin, ItEnd, "+aes") != ItEnd);
if (!HasSM4)
features.push_back("-sm4");
if (!HasSHA3)
features.push_back("-sha3");
if (!HasSHA2)
features.push_back("-sha2");
if (!HasAES)
features.push_back("-aes");
}
} else {
if (HasCrypto && !NoCrypto) {
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "-sha2") == ItEnd);
const bool HasAES = (std::find(ItBegin, ItEnd, "-aes") == ItEnd);
if (HasSHA2)
features.push_back("+sha2");
if (HasAES)
features.push_back("+aes");
} else if (HasNoCrypto) {
const bool HasSHA2 = (std::find(ItBegin, ItEnd, "+sha2") != ItEnd);
const bool HasAES = (std::find(ItBegin, ItEnd, "+aes") != ItEnd);
const bool HasV82a = (std::find(ItBegin, ItEnd, "+v8.2a") != ItEnd);
const bool HasV83a = (std::find(ItBegin, ItEnd, "+v8.3a") != ItEnd);
const bool HasV84a = (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd);
if (!HasSHA2)
features.push_back("-sha2");
if (!HasAES)
features.push_back("-aes");
if (HasV82a || HasV83a || HasV84a) {
features.push_back("-sm4");
features.push_back("-sha3");
}
}
}
auto V8_6Pos = llvm::find(features, "+v8.6a");
if (V8_6Pos != std::end(features))
V8_6Pos = features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
if (triple.isOSOpenBSD())
features.push_back("+strict-align");
return join(features);
}
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,17 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
#include "codon/cir/llvm/native/targets/target.h"
namespace codon {
namespace ir {
class Aarch64 : public Target {
public:
std::string getCPU(const llvm::Triple &triple) const override;
std::string getFeatures(const llvm::Triple &triple) const override;
};
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,21 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
#include <sstream>
#include <string>
#include "codon/cir/llvm/llvm.h"
namespace codon {
namespace ir {
class Target {
public:
virtual ~Target() {}
virtual std::string getCPU(const llvm::Triple &triple) const = 0;
virtual std::string getFeatures(const llvm::Triple &triple) const = 0;
};
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,108 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "x86.h"
namespace codon {
namespace ir {
namespace {
template <typename T> std::string join(const T &v, const std::string &delim = ",") {
std::ostringstream s;
for (const auto &i : v) {
if (&i != &v[0])
s << delim;
s << std::string(i);
}
return s.str();
}
} // namespace
std::string X86::getCPU(const llvm::Triple &triple) const {
auto CPU = llvm::sys::getHostCPUName();
if (!CPU.empty() && CPU != "generic")
return std::string(CPU);
// Select the default CPU if none was given (or detection failed).
if (!triple.isX86())
return ""; // This routine is only handling x86 targets.
bool is64Bit = triple.getArch() == llvm::Triple::x86_64;
// FIXME: Need target hooks.
if (triple.isOSDarwin()) {
if (triple.getArchName() == "x86_64h")
return "core-avx2";
// macosx10.12 drops support for all pre-Penryn Macs.
// Simulators can still run on 10.11 though, like Xcode.
if (triple.isMacOSX() && !triple.isOSVersionLT(10, 12))
return "penryn";
if (triple.isDriverKit())
return "nehalem";
// The oldest x86_64 Macs have core2/Merom; the oldest x86 Macs have Yonah.
return is64Bit ? "core2" : "yonah";
}
// Set up default CPU name for PS4/PS5 compilers.
if (triple.isPS4())
return "btver2";
if (triple.isPS5())
return "znver2";
// On Android use targets compatible with gcc
if (triple.isAndroid())
return is64Bit ? "x86-64" : "i686";
// Everything else goes to x86-64 in 64-bit mode.
if (is64Bit)
return "x86-64";
switch (triple.getOS()) {
case llvm::Triple::NetBSD:
return "i486";
case llvm::Triple::Haiku:
case llvm::Triple::OpenBSD:
return "i586";
case llvm::Triple::FreeBSD:
return "i686";
default:
// Fallback to p4.
return "pentium4";
}
}
std::string X86::getFeatures(const llvm::Triple &triple) const {
std::vector<std::string> features;
llvm::StringMap<bool> hostFeatures;
if (llvm::sys::getHostCPUFeatures(hostFeatures)) {
for (auto &f : hostFeatures) {
features.push_back((f.second ? "+" : "-") + f.first().str());
}
}
if (triple.getArchName() == "x86_64h") {
// x86_64h implies quite a few of the more modern subtarget features
// for Haswell class CPUs, but not all of them. Opt-out of a few.
features.push_back("-rdrnd");
features.push_back("-aes");
features.push_back("-pclmul");
features.push_back("-rtm");
features.push_back("-fsgsbase");
}
const llvm::Triple::ArchType ArchType = triple.getArch();
// Add features to be compatible with gcc for Android.
if (triple.isAndroid()) {
if (ArchType == llvm::Triple::x86_64) {
features.push_back("+sse4.2");
features.push_back("+popcnt");
features.push_back("+cx16");
} else
features.push_back("+ssse3");
}
return join(features);
}
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,17 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
#include "codon/cir/llvm/native/targets/target.h"
namespace codon {
namespace ir {
class X86 : public Target {
public:
std::string getCPU(const llvm::Triple &triple) const override;
std::string getFeatures(const llvm::Triple &triple) const override;
};
} // namespace ir
} // namespace codon

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "optimize.h"
@ -6,12 +6,23 @@
#include <deque>
#include "codon/cir/llvm/gpu.h"
#include "codon/cir/llvm/native/native.h"
#include "codon/util/common.h"
static llvm::codegen::RegisterCodeGenFlags CFG;
namespace codon {
namespace ir {
namespace {
llvm::cl::opt<bool>
AutoFree("auto-free",
llvm::cl::desc("Insert free() calls on allocated memory automatically"),
llvm::cl::init(false), llvm::cl::Hidden);
llvm::cl::opt<bool> FastMath("fast-math",
llvm::cl::desc("Apply fastmath optimizations"),
llvm::cl::init(false));
} // namespace
std::unique_ptr<llvm::TargetMachine>
getTargetMachine(llvm::Triple triple, llvm::StringRef cpuStr,
@ -77,6 +88,27 @@ void applyDebugTransformations(llvm::Module *module, bool debug, bool jit) {
}
}
void applyFastMathTransformations(llvm::Module *module) {
if (!FastMath)
return;
for (auto &f : *module) {
for (auto &block : f) {
for (auto &inst : block) {
if (auto *binop = llvm::dyn_cast<llvm::BinaryOperator>(&inst)) {
if (binop->getType()->isFloatingPointTy())
binop->setFast(true);
}
if (auto *intrinsic = llvm::dyn_cast<llvm::IntrinsicInst>(&inst)) {
if (intrinsic->getType()->isFloatingPointTy())
intrinsic->setFast(true);
}
}
}
}
}
struct AllocInfo {
std::vector<std::string> allocators;
std::string realloc;
@ -751,6 +783,136 @@ struct AllocationHoister : public llvm::PassInfoMixin<AllocationHoister> {
}
};
struct AllocationAutoFree : public llvm::PassInfoMixin<AllocationAutoFree> {
AllocInfo info;
explicit AllocationAutoFree(
std::vector<std::string> allocators = {"seq_alloc", "seq_alloc_atomic",
"seq_alloc_uncollectable",
"seq_alloc_atomic_uncollectable"},
const std::string &realloc = "seq_realloc", const std::string &free = "seq_free")
: info(std::move(allocators), realloc, free) {}
llvm::PreservedAnalyses run(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) {
// Get the necessary analysis results.
auto &MSSA = FAM.getResult<llvm::MemorySSAAnalysis>(F);
auto &TLI = FAM.getResult<llvm::TargetLibraryAnalysis>(F);
auto &AA = FAM.getResult<llvm::AAManager>(F);
auto &DT = FAM.getResult<llvm::DominatorTreeAnalysis>(F);
auto &PDT = FAM.getResult<llvm::PostDominatorTreeAnalysis>(F);
auto &LI = FAM.getResult<llvm::LoopAnalysis>(F);
auto &CI = FAM.getResult<llvm::CycleAnalysis>(F);
bool Changed = false;
// Traverse the function to find allocs and insert corresponding frees.
for (auto &BB : F) {
for (auto &I : BB) {
if (auto *Alloc = llvm::dyn_cast<llvm::CallInst>(&I)) {
auto *Callee = Alloc->getCalledFunction();
if (!Callee || !Callee->isDeclaration())
continue;
if (info.isAlloc(Alloc)) {
if (llvm::PointerMayBeCaptured(Alloc, /*ReturnCaptures=*/true,
/*StoreCaptures=*/true))
continue;
Changed |= insertFree(Alloc, F, DT, PDT, LI, CI);
}
}
}
}
return (Changed ? llvm::PreservedAnalyses::none() : llvm::PreservedAnalyses::all());
}
bool insertFree(llvm::Instruction *Alloc, llvm::Function &F, llvm::DominatorTree &DT,
llvm::PostDominatorTree &PDT, llvm::LoopInfo &LI,
llvm::CycleInfo &CI) {
llvm::SmallVector<llvm::Value *, 8> Worklist;
llvm::SmallPtrSet<llvm::Value *, 8> Visited;
llvm::SmallVector<llvm::BasicBlock *, 8> UseBlocks;
// We need to find a basic block that:
// 1. Post-dominates the allocation block (so we always free it)
// 2. Is dominated by the allocation block (so the use is valid)
// 3. Post-dominates all uses
// Start with the original pointer.
Worklist.push_back(Alloc);
UseBlocks.push_back(Alloc->getParent());
// Track all blocks where the pointer or its derived values are used.
while (!Worklist.empty()) {
auto *CurrentPtr = Worklist.pop_back_val();
if (!Visited.insert(CurrentPtr).second)
continue;
// Traverse all users of the current pointer.
for (auto *U : CurrentPtr->users()) {
if (auto *Inst = llvm::dyn_cast<llvm::Instruction>(U)) {
if (auto *call = llvm::dyn_cast<llvm::CallBase>(Inst))
if (call->getCalledFunction() && info.isFree(call->getCalledFunction()))
return false;
if (llvm::isa<llvm::GetElementPtrInst>(Inst) ||
llvm::isa<llvm::BitCastInst>(Inst) || llvm::isa<llvm::PHINode>(Inst) ||
llvm::isa<llvm::SelectInst>(Inst)) {
Worklist.push_back(Inst);
} else {
// If this is a real use, record the block.
UseBlocks.push_back(Inst->getParent());
}
}
}
}
// Find the closest post-dominating block of all the use blocks.
llvm::BasicBlock *PostDomBlock = nullptr;
for (auto *BB : UseBlocks) {
if (!PostDomBlock) {
PostDomBlock = BB;
} else {
PostDomBlock = PDT.findNearestCommonDominator(PostDomBlock, BB);
if (!PostDomBlock) {
return false;
}
}
}
auto *allocLoop = LI.getLoopFor(Alloc->getParent());
auto *freeLoop = LI.getLoopFor(PostDomBlock);
while (allocLoop != freeLoop) {
if (!freeLoop)
return false;
PostDomBlock = freeLoop->getExitBlock();
if (!PostDomBlock)
return false;
freeLoop = LI.getLoopFor(PostDomBlock);
}
if (!DT.dominates(Alloc->getParent(), PostDomBlock)) {
return false;
}
llvm::IRBuilder<> B(PostDomBlock->getTerminator());
auto *FreeFunc = F.getParent()->getFunction(info.free);
if (!FreeFunc) {
FreeFunc = llvm::Function::Create(
llvm::FunctionType::get(B.getVoidTy(), {B.getPtrTy()}, false),
llvm::Function::ExternalLinkage, info.free, F.getParent());
FreeFunc->setWillReturn();
FreeFunc->setDoesNotThrow();
}
// Add free
B.CreateCall(FreeFunc, Alloc);
return true;
}
};
/// Sometimes coroutine lowering produces hard-to-analyze loops involving
/// function pointer comparisons. This pass puts them into a somewhat
/// easier-to-analyze form.
@ -826,9 +988,15 @@ struct CoroBranchSimplifier : public llvm::PassInfoMixin<CoroBranchSimplifier> {
}
};
llvm::cl::opt<bool>
DisableNative("disable-native",
llvm::cl::desc("Disable architecture-specific optimizations"),
llvm::cl::init(false));
void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
PluginManager *plugins) {
applyDebugTransformations(module, debug, jit);
applyFastMathTransformations(module);
llvm::LoopAnalysisManager lam;
llvm::FunctionAnalysisManager fam;
@ -860,9 +1028,14 @@ void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
pm.addPass(llvm::LoopSimplifyPass());
pm.addPass(llvm::LCSSAPass());
pm.addPass(AllocationHoister());
if (AutoFree)
pm.addPass(AllocationAutoFree());
}
});
if (!DisableNative)
addNativeLLVMPasses(&pb);
if (plugins) {
for (auto *plugin : *plugins) {
plugin->dsl->addLLVMPasses(&pb, debug);
@ -884,7 +1057,15 @@ void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit,
void verify(llvm::Module *module) {
const bool broken = llvm::verifyModule(*module, &llvm::errs());
seqassertn(!broken, "module broken");
if (broken) {
auto fo = fopen("_dump.ll", "w");
llvm::raw_fd_ostream fout(fileno(fo), true);
fout << *module;
fout.close();
}
seqassertn(!broken, "Generated LLVM IR is invalid and has been dumped to '_dump.ll'. "
"Please submit a bug report at https://github.com/exaloop/codon "
"including the code and generated LLVM IR.");
}
} // namespace
@ -906,5 +1087,7 @@ void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins
verify(module);
}
bool isFastMathOn() { return FastMath; }
} // namespace ir
} // namespace codon

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
@ -20,5 +20,7 @@ getTargetMachine(llvm::Module *module, bool setFunctionAttributes = false,
void optimize(llvm::Module *module, bool debug, bool jit = false,
PluginManager *plugins = nullptr);
bool isFastMathOn();
} // namespace ir
} // namespace codon

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "module.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "canonical.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "dead_code.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "global_demote.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "replacer.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "const_fold.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "const_prop.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "folding.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "imperative.h"
@ -117,7 +117,7 @@ void ImperativeForFlowLowering::handle(ForFlow *v) {
// body
auto *parent = cast<BodiedFunc>(getParentFunc());
auto *series = M->N<SeriesFlow>(v->getSrcInfo());
auto *listVar = util::makeVar(list, series, parent)->getVar();
auto *listVar = util::makeVar(list, series, parent);
auto *lenVal = M->Nr<ExtractInstr>(M->Nr<VarValue>(listVar), "len");
auto *lenVar = util::makeVar(lenVal, series, parent);
auto *ptrVal = M->Nr<ExtractInstr>(
@ -129,12 +129,14 @@ void ImperativeForFlowLowering::handle(ForFlow *v) {
auto *oldLoopVar = v->getVar();
auto *newLoopVar = M->Nr<Var>(M->getIntType());
parent->push_back(newLoopVar);
auto *replacement = M->N<ImperativeForFlow>(
v->getSrcInfo(), M->getInt(0), 1, lenVar, body, newLoopVar, std::move(sched));
auto *replacement = M->N<ImperativeForFlow>(v->getSrcInfo(), M->getInt(0), 1,
M->Nr<VarValue>(lenVar), body,
newLoopVar, std::move(sched));
series->push_back(replacement);
body->insert(
body->begin(),
M->Nr<AssignInstr>(oldLoopVar, (*ptrVar)[*M->Nr<VarValue>(newLoopVar)]));
M->Nr<AssignInstr>(oldLoopVar,
(*M->Nr<VarValue>(ptrVar))[*M->Nr<VarValue>(newLoopVar)]));
v->replaceAll(series);
}
}

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "pipeline.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "manager.h"
@ -15,6 +15,7 @@
#include "codon/cir/transform/lowering/imperative.h"
#include "codon/cir/transform/lowering/pipeline.h"
#include "codon/cir/transform/manager.h"
#include "codon/cir/transform/numpy/numpy.h"
#include "codon/cir/transform/parallel/openmp.h"
#include "codon/cir/transform/pass.h"
#include "codon/cir/transform/pythonic/dict.h"
@ -196,6 +197,9 @@ void PassManager::registerStandardPasses(PassManager::Init init) {
pyNumerics),
/*insertBefore=*/"", {seKey1, rdKey, globalKey},
{seKey1, rdKey, cfgKey, globalKey, capKey});
registerPass(std::make_unique<numpy::NumPyFusionPass>(rdKey, seKey2),
/*insertBefore=*/"", {rdKey, seKey2},
{seKey1, rdKey, cfgKey, globalKey, capKey});
// parallel
registerPass(std::make_unique<parallel::OpenMPPass>(), /*insertBefore=*/"", {},

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -0,0 +1,982 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "numpy.h"
#include "codon/cir/util/irtools.h"
namespace codon {
namespace ir {
namespace transform {
namespace numpy {
namespace {
types::Type *coerceScalarArray(NumPyType &scalar, NumPyType &array,
NumPyPrimitiveTypes &T) {
auto xtype = scalar.dtype;
auto atype = array.dtype;
bool aIsInt = false;
bool xIsInt = false;
bool aIsFloat = false;
bool xIsFloat = false;
bool aIsComplex = false;
bool xIsComplex = false;
switch (atype) {
case NumPyType::NP_TYPE_ARR_BOOL:
break;
case NumPyType::NP_TYPE_ARR_I8:
case NumPyType::NP_TYPE_ARR_U8:
case NumPyType::NP_TYPE_ARR_I16:
case NumPyType::NP_TYPE_ARR_U16:
case NumPyType::NP_TYPE_ARR_I32:
case NumPyType::NP_TYPE_ARR_U32:
case NumPyType::NP_TYPE_ARR_I64:
case NumPyType::NP_TYPE_ARR_U64:
aIsInt = true;
break;
case NumPyType::NP_TYPE_ARR_F16:
case NumPyType::NP_TYPE_ARR_F32:
case NumPyType::NP_TYPE_ARR_F64:
aIsFloat = true;
break;
case NumPyType::NP_TYPE_ARR_C64:
case NumPyType::NP_TYPE_ARR_C128:
aIsComplex = true;
break;
default:
seqassertn(false, "unexpected type");
}
xIsInt = (xtype == NumPyType::NP_TYPE_BOOL || xtype == NumPyType::NP_TYPE_I64);
xIsFloat = (xtype == NumPyType::NP_TYPE_F64);
xIsComplex = (xtype == NumPyType::NP_TYPE_C128);
bool shouldCast =
((xIsInt && (aIsInt || aIsFloat || aIsComplex)) ||
(xIsFloat && (aIsFloat || aIsComplex)) || (xIsComplex && aIsComplex));
if ((atype == NumPyType::NP_TYPE_ARR_F16 || atype == NumPyType::NP_TYPE_ARR_F32) &&
xtype == NumPyType::NP_TYPE_C128)
return T.c64;
else if (shouldCast)
return array.getIRBaseType(T);
else
return scalar.getIRBaseType(T);
}
template <typename E>
types::Type *decideTypes(E *expr, NumPyType &lhs, NumPyType &rhs,
NumPyPrimitiveTypes &T) {
// Special case(s)
if (expr->op == E::NP_OP_COPYSIGN)
return expr->type.getIRBaseType(T);
if (lhs.isArray() && !rhs.isArray())
return coerceScalarArray(rhs, lhs, T);
if (!lhs.isArray() && rhs.isArray())
return coerceScalarArray(lhs, rhs, T);
auto *t1 = lhs.getIRBaseType(T);
auto *t2 = rhs.getIRBaseType(T);
auto *M = t1->getModule();
auto *coerceFunc = M->getOrRealizeFunc("_coerce", {}, {t1, t2}, FUSION_MODULE);
seqassertn(coerceFunc, "coerce func not found");
return util::getReturnType(coerceFunc);
}
} // namespace
void NumPyExpr::replace(NumPyExpr &e) {
type = e.type;
val = e.val;
op = e.op;
lhs = std::move(e.lhs);
rhs = std::move(e.rhs);
freeable = e.freeable;
e.type = {};
e.val = nullptr;
e.op = NP_OP_NONE;
e.lhs = {};
e.rhs = {};
e.freeable = false;
}
bool NumPyExpr::haveVectorizedLoop() const {
if (lhs && !(lhs->type.dtype == NumPyType::NP_TYPE_ARR_F32 ||
lhs->type.dtype == NumPyType::NP_TYPE_ARR_F64))
return false;
if (rhs && !(rhs->type.dtype == NumPyType::NP_TYPE_ARR_F32 ||
rhs->type.dtype == NumPyType::NP_TYPE_ARR_F64))
return false;
if (lhs && rhs && lhs->type.dtype != rhs->type.dtype)
return false;
// These are the loops available in the runtime library.
static const std::vector<std::string> VecLoops = {
"arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "arctan2",
"cos", "exp", "exp2", "expm1", "log", "log10", "log1p",
"log2", "sin", "sinh", "tanh", "hypot"};
return std::find(VecLoops.begin(), VecLoops.end(), opstring()) != VecLoops.end();
}
int64_t NumPyExpr::opcost() const {
switch (op) {
case NP_OP_NONE:
return 0;
case NP_OP_POS:
return 0;
case NP_OP_NEG:
return 0;
case NP_OP_INVERT:
return 0;
case NP_OP_ABS:
return 1;
case NP_OP_TRANSPOSE:
return 0;
case NP_OP_ADD:
return 1;
case NP_OP_SUB:
return 1;
case NP_OP_MUL:
return 1;
case NP_OP_MATMUL:
return 20;
case NP_OP_TRUE_DIV:
return 8;
case NP_OP_FLOOR_DIV:
return 8;
case NP_OP_MOD:
return 8;
case NP_OP_FMOD:
return 8;
case NP_OP_POW:
return 8;
case NP_OP_LSHIFT:
return 1;
case NP_OP_RSHIFT:
return 1;
case NP_OP_AND:
return 1;
case NP_OP_OR:
return 1;
case NP_OP_XOR:
return 1;
case NP_OP_LOGICAL_AND:
return 1;
case NP_OP_LOGICAL_OR:
return 1;
case NP_OP_LOGICAL_XOR:
return 1;
case NP_OP_EQ:
return 1;
case NP_OP_NE:
return 1;
case NP_OP_LT:
return 1;
case NP_OP_LE:
return 1;
case NP_OP_GT:
return 1;
case NP_OP_GE:
return 1;
case NP_OP_MIN:
return 3;
case NP_OP_MAX:
return 3;
case NP_OP_FMIN:
return 3;
case NP_OP_FMAX:
return 3;
case NP_OP_SIN:
return 10;
case NP_OP_COS:
return 10;
case NP_OP_TAN:
return 10;
case NP_OP_ARCSIN:
return 20;
case NP_OP_ARCCOS:
return 20;
case NP_OP_ARCTAN:
return 20;
case NP_OP_ARCTAN2:
return 35;
case NP_OP_HYPOT:
return 5;
case NP_OP_SINH:
return 10;
case NP_OP_COSH:
return 10;
case NP_OP_TANH:
return 10;
case NP_OP_ARCSINH:
return 10;
case NP_OP_ARCCOSH:
return 10;
case NP_OP_ARCTANH:
return 10;
case NP_OP_CONJ:
return 1;
case NP_OP_EXP:
return 5;
case NP_OP_EXP2:
return 5;
case NP_OP_LOG:
return 5;
case NP_OP_LOG2:
return 5;
case NP_OP_LOG10:
return 5;
case NP_OP_EXPM1:
return 5;
case NP_OP_LOG1P:
return 5;
case NP_OP_SQRT:
return 2;
case NP_OP_SQUARE:
return 1;
case NP_OP_CBRT:
return 5;
case NP_OP_LOGADDEXP:
return 10;
case NP_OP_LOGADDEXP2:
return 10;
case NP_OP_RECIPROCAL:
return 1;
case NP_OP_RINT:
return 1;
case NP_OP_FLOOR:
return 1;
case NP_OP_CEIL:
return 1;
case NP_OP_TRUNC:
return 1;
case NP_OP_ISNAN:
return 1;
case NP_OP_ISINF:
return 1;
case NP_OP_ISFINITE:
return 1;
case NP_OP_SIGN:
return 1;
case NP_OP_SIGNBIT:
return 1;
case NP_OP_COPYSIGN:
return 1;
case NP_OP_SPACING:
return 1;
case NP_OP_NEXTAFTER:
return 1;
case NP_OP_DEG2RAD:
return 2;
case NP_OP_RAD2DEG:
return 2;
case NP_OP_HEAVISIDE:
return 3;
}
}
int64_t NumPyExpr::cost() const {
auto c = opcost();
if (c == -1)
return -1;
// Account for the fact that the vectorized loops are much faster.
if (haveVectorizedLoop()) {
c *= 3;
if (lhs->type.dtype == NumPyType::NP_TYPE_ARR_F32)
c *= 2;
}
bool lhsIntConst = (lhs && lhs->isLeaf() && isA<IntConst>(lhs->val));
bool rhsIntConst = (rhs && rhs->isLeaf() && isA<IntConst>(rhs->val));
bool lhsFloatConst = (lhs && lhs->isLeaf() && isA<FloatConst>(lhs->val));
bool rhsFloatConst = (rhs && rhs->isLeaf() && isA<FloatConst>(rhs->val));
bool lhsConst = lhsIntConst || lhsFloatConst;
bool rhsConst = rhsIntConst || rhsFloatConst;
if (rhsConst || lhsConst) {
switch (op) {
case NP_OP_TRUE_DIV:
case NP_OP_FLOOR_DIV:
case NP_OP_MOD:
case NP_OP_FMOD:
c = 1;
break;
case NP_OP_POW:
if (rhsIntConst)
c = (cast<IntConst>(rhs->val)->getVal() == 2) ? 1 : 5;
break;
default:
break;
}
}
if (lhs) {
auto cl = lhs->cost();
if (cl == -1)
return -1;
c += cl;
}
if (rhs) {
auto cr = rhs->cost();
if (cr == -1)
return -1;
c += cr;
}
return c;
}
std::string NumPyExpr::opstring() const {
static const std::unordered_map<Op, std::string> m = {
{NP_OP_NONE, "a"},
{NP_OP_POS, "pos"},
{NP_OP_NEG, "neg"},
{NP_OP_INVERT, "invert"},
{NP_OP_ABS, "abs"},
{NP_OP_TRANSPOSE, "transpose"},
{NP_OP_ADD, "add"},
{NP_OP_SUB, "sub"},
{NP_OP_MUL, "mul"},
{NP_OP_MATMUL, "matmul"},
{NP_OP_TRUE_DIV, "true_div"},
{NP_OP_FLOOR_DIV, "floor_div"},
{NP_OP_MOD, "mod"},
{NP_OP_FMOD, "fmod"},
{NP_OP_POW, "pow"},
{NP_OP_LSHIFT, "lshift"},
{NP_OP_RSHIFT, "rshift"},
{NP_OP_AND, "and"},
{NP_OP_OR, "or"},
{NP_OP_XOR, "xor"},
{NP_OP_LOGICAL_AND, "logical_and"},
{NP_OP_LOGICAL_OR, "logical_or"},
{NP_OP_LOGICAL_XOR, "logical_xor"},
{NP_OP_EQ, "eq"},
{NP_OP_NE, "ne"},
{NP_OP_LT, "lt"},
{NP_OP_LE, "le"},
{NP_OP_GT, "gt"},
{NP_OP_GE, "ge"},
{NP_OP_MIN, "minimum"},
{NP_OP_MAX, "maximum"},
{NP_OP_FMIN, "fmin"},
{NP_OP_FMAX, "fmax"},
{NP_OP_SIN, "sin"},
{NP_OP_COS, "cos"},
{NP_OP_TAN, "tan"},
{NP_OP_ARCSIN, "arcsin"},
{NP_OP_ARCCOS, "arccos"},
{NP_OP_ARCTAN, "arctan"},
{NP_OP_ARCTAN2, "arctan2"},
{NP_OP_HYPOT, "hypot"},
{NP_OP_SINH, "sinh"},
{NP_OP_COSH, "cosh"},
{NP_OP_TANH, "tanh"},
{NP_OP_ARCSINH, "arcsinh"},
{NP_OP_ARCCOSH, "arccosh"},
{NP_OP_ARCTANH, "arctanh"},
{NP_OP_CONJ, "conj"},
{NP_OP_EXP, "exp"},
{NP_OP_EXP2, "exp2"},
{NP_OP_LOG, "log"},
{NP_OP_LOG2, "log2"},
{NP_OP_LOG10, "log10"},
{NP_OP_EXPM1, "expm1"},
{NP_OP_LOG1P, "log1p"},
{NP_OP_SQRT, "sqrt"},
{NP_OP_SQUARE, "square"},
{NP_OP_CBRT, "cbrt"},
{NP_OP_LOGADDEXP, "logaddexp"},
{NP_OP_LOGADDEXP2, "logaddexp2"},
{NP_OP_RECIPROCAL, "reciprocal"},
{NP_OP_RINT, "rint"},
{NP_OP_FLOOR, "floor"},
{NP_OP_CEIL, "ceil"},
{NP_OP_TRUNC, "trunc"},
{NP_OP_ISNAN, "isnan"},
{NP_OP_ISINF, "isinf"},
{NP_OP_ISFINITE, "isfinite"},
{NP_OP_SIGN, "sign"},
{NP_OP_SIGNBIT, "signbit"},
{NP_OP_COPYSIGN, "copysign"},
{NP_OP_SPACING, "spacing"},
{NP_OP_NEXTAFTER, "nextafter"},
{NP_OP_DEG2RAD, "deg2rad"},
{NP_OP_RAD2DEG, "rad2deg"},
{NP_OP_HEAVISIDE, "heaviside"},
};
auto it = m.find(op);
seqassertn(it != m.end(), "op not found");
return it->second;
}
void NumPyExpr::dump(std::ostream &os, int level, int &leafId) const {
auto indent = [&]() {
for (int i = 0; i < level; i++)
os << " ";
};
indent();
if (op == NP_OP_NONE) {
os << "\033[1;36m" << opstring() << leafId;
++leafId;
} else {
os << "\033[1;33m" << opstring();
}
os << "\033[0m <" << type << ">";
if (op != NP_OP_NONE)
os << " \033[1;35m[cost=" << cost() << "]\033[0m";
os << "\n";
if (lhs)
lhs->dump(os, level + 1, leafId);
if (rhs)
rhs->dump(os, level + 1, leafId);
}
std::ostream &operator<<(std::ostream &os, NumPyExpr const &expr) {
int leafId = 0;
expr.dump(os, 0, leafId);
return os;
}
std::string NumPyExpr::str() const {
std::stringstream buffer;
buffer << *this;
return buffer.str();
}
void NumPyExpr::apply(std::function<void(NumPyExpr &)> f) {
f(*this);
if (lhs)
lhs->apply(f);
if (rhs)
rhs->apply(f);
}
Value *NumPyExpr::codegenBroadcasts(CodegenContext &C) {
auto *M = C.M;
auto &vars = C.vars;
Value *targetShape = nullptr;
Value *result = nullptr;
apply([&](NumPyExpr &e) {
if (e.isLeaf() && e.type.isArray()) {
auto it = vars.find(&e);
seqassertn(it != vars.end(),
"NumPyExpr not found in vars map (codegen broadcasts)");
auto *var = it->second;
auto *shape = M->getOrRealizeFunc("_shape", {var->getType()}, {}, FUSION_MODULE);
seqassertn(shape, "shape function not found");
auto *leafShape = util::call(shape, {M->Nr<VarValue>(var)});
if (!targetShape) {
targetShape = leafShape;
} else {
auto *diff = (*targetShape != *leafShape);
if (result) {
result = *result | *diff;
} else {
result = diff;
}
}
}
});
return result ? result : M->getBool(false);
}
Var *NumPyExpr::codegenFusedEval(CodegenContext &C) {
auto *M = C.M;
auto *series = C.series;
auto *func = C.func;
auto &vars = C.vars;
auto &T = C.T;
std::vector<std::pair<NumPyExpr *, Var *>> leaves;
apply([&](NumPyExpr &e) {
if (e.isLeaf()) {
auto it = vars.find(&e);
seqassertn(it != vars.end(), "NumPyExpr not found in vars map (fused eval)");
auto *var = it->second;
leaves.emplace_back(&e, var);
}
});
// Arrays for scalar expression function
std::vector<Value *> arrays;
std::vector<std::string> scalarFuncArgNames;
std::vector<types::Type *> scalarFuncArgTypes;
std::unordered_map<NumPyExpr *, Var *> scalarFuncArgMap;
// Scalars passed through 'extra' arg of ndarray._loop()
std::vector<Value *> extra;
std::unordered_map<NumPyExpr *, unsigned> extraMap;
auto *baseType = type.getIRBaseType(T);
scalarFuncArgNames.push_back("out");
scalarFuncArgTypes.push_back(M->getPointerType(baseType));
unsigned argIdx = 0;
unsigned extraIdx = 0;
for (auto &e : leaves) {
if (e.first->type.isArray()) {
arrays.push_back(M->Nr<VarValue>(e.second));
scalarFuncArgNames.push_back("in" + std::to_string(argIdx++));
scalarFuncArgTypes.push_back(M->getPointerType(e.first->type.getIRBaseType(T)));
} else {
extra.push_back(M->Nr<VarValue>(e.second));
extraMap.emplace(e.first, extraIdx++);
}
}
auto *extraTuple = util::makeTuple(extra, M);
scalarFuncArgNames.push_back("extra");
scalarFuncArgTypes.push_back(extraTuple->getType());
auto *scalarFuncType = M->getFuncType(M->getNoneType(), scalarFuncArgTypes);
auto *scalarFunc = M->Nr<BodiedFunc>("__numpy_fusion_scalar_fn");
scalarFunc->realize(scalarFuncType, scalarFuncArgNames);
std::vector<Var *> scalarFuncArgVars(scalarFunc->arg_begin(), scalarFunc->arg_end());
argIdx = 1;
for (auto &e : leaves) {
if (e.first->type.isArray()) {
scalarFuncArgMap.emplace(e.first, scalarFuncArgVars[argIdx++]);
}
}
auto *scalarExpr =
codegenScalarExpr(C, scalarFuncArgMap, extraMap, scalarFuncArgVars.back());
auto *ptrsetFunc = M->getOrRealizeFunc("_ptrset", {scalarFuncArgTypes[0], baseType},
{}, FUSION_MODULE);
seqassertn(ptrsetFunc, "ptrset func not found");
scalarFunc->setBody(util::series(
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), scalarExpr})));
auto *arraysTuple = util::makeTuple(arrays);
auto *loopFunc = M->getOrRealizeFunc(
"_loop_alloc",
{arraysTuple->getType(), scalarFunc->getType(), extraTuple->getType()},
{baseType}, FUSION_MODULE);
seqassertn(loopFunc, "loop_alloc func not found");
auto *result = util::makeVar(
util::call(loopFunc, {arraysTuple, M->Nr<VarValue>(scalarFunc), extraTuple}),
series, func);
// Free temporary arrays
apply([&](NumPyExpr &e) {
if (e.isLeaf() && e.freeable) {
auto it = vars.find(&e);
seqassertn(it != vars.end(), "NumPyExpr not found in vars map (fused eval)");
auto *var = it->second;
auto *freeFunc =
M->getOrRealizeFunc("_free", {var->getType()}, {}, FUSION_MODULE);
seqassertn(freeFunc, "free func not found");
series->push_back(util::call(freeFunc, {M->Nr<VarValue>(var)}));
}
});
return result;
}
Var *NumPyExpr::codegenSequentialEval(CodegenContext &C) {
auto *M = C.M;
auto *series = C.series;
auto *func = C.func;
auto &vars = C.vars;
auto &T = C.T;
if (isLeaf()) {
auto it = vars.find(this);
seqassertn(it != vars.end(),
"NumPyExpr not found in vars map (codegen sequential eval)");
return it->second;
}
Var *lv = lhs->codegenSequentialEval(C);
Var *rv = rhs ? rhs->codegenSequentialEval(C) : nullptr;
Var *like = nullptr;
Value *outShapeVal = nullptr;
if (rv) {
// Can't do anything special with matmul here...
if (op == NP_OP_MATMUL) {
auto *matmul = M->getOrRealizeFunc("_matmul", {lv->getType(), rv->getType()}, {},
FUSION_MODULE);
return util::makeVar(
util::call(matmul, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv)}), series, func);
}
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
seqassertn(lshape, "shape func not found for left arg");
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
seqassertn(rshape, "shape func not found for right arg");
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
auto *shape = M->getOrRealizeFunc(
"_broadcast", {leftShape->getType(), rightShape->getType()}, {}, FUSION_MODULE);
seqassertn(shape, "output shape func not found");
like = rhs->type.ndim > lhs->type.ndim ? rv : lv;
outShapeVal = util::call(shape, {leftShape, rightShape});
} else {
auto *shape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
seqassertn(shape, "shape func not found");
like = lv;
outShapeVal = util::call(shape, {M->Nr<VarValue>(lv)});
}
auto *outShape = util::makeVar(outShapeVal, series, func);
Var *result = nullptr;
bool lfreeable = lhs && lhs->type.isArray() && (lhs->freeable || !lhs->isLeaf());
bool rfreeable = rhs && rhs->type.isArray() && (rhs->freeable || !rhs->isLeaf());
bool ltmp = lfreeable && lhs->type.dtype == type.dtype && lhs->type.ndim == type.ndim;
bool rtmp = rfreeable && rhs->type.dtype == type.dtype && rhs->type.ndim == type.ndim;
auto *t = type.getIRBaseType(T);
auto newArray = [&]() {
auto *create = M->getOrRealizeFunc(
"_create", {like->getType(), outShape->getType()}, {t}, FUSION_MODULE);
seqassertn(create, "create func not found");
return util::call(create, {M->Nr<VarValue>(like), M->Nr<VarValue>(outShape)});
};
bool freeLeftStatic = false;
bool freeRightStatic = false;
Var *lcond = nullptr;
Var *rcond = nullptr;
if (rv) {
if (ltmp && rhs->type.ndim == 0) {
// We are adding lhs temp array to const or 0-dim array, so reuse lhs array.
result = lv;
} else if (rtmp && lhs->type.ndim == 0) {
// We are adding rhs temp array to const or 0-dim array, so reuse rhs array.
result = rv;
} else if (!ltmp && !rtmp) {
// Neither operand is a temp array, so we must allocate a new array.
result = util::makeVar(newArray(), series, func);
freeLeftStatic = lfreeable;
freeRightStatic = rfreeable;
} else if (ltmp && rtmp) {
// We won't know until runtime if we can reuse the temp array(s) since they
// might broadcast.
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
seqassertn(lshape, "shape function func not found for left arg");
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
seqassertn(rshape, "shape function func not found for right arg");
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
lcond = util::makeVar(*leftShape == *M->Nr<VarValue>(outShape), series, func);
rcond = util::makeVar(*rightShape == *M->Nr<VarValue>(outShape), series, func);
auto *arr = M->Nr<TernaryInstr>(
M->Nr<VarValue>(lcond), M->Nr<VarValue>(lv),
M->Nr<TernaryInstr>(M->Nr<VarValue>(rcond), M->Nr<VarValue>(rv), newArray()));
result = util::makeVar(arr, series, func);
} else if (ltmp && !rtmp) {
// We won't know until runtime if we can reuse the temp array(s) since they
// might broadcast.
auto *lshape = M->getOrRealizeFunc("_shape", {lv->getType()}, {}, FUSION_MODULE);
seqassertn(lshape, "shape function func not found for left arg");
auto *leftShape = util::call(lshape, {M->Nr<VarValue>(lv)});
lcond = util::makeVar(*leftShape == *M->Nr<VarValue>(outShape), series, func);
auto *arr =
M->Nr<TernaryInstr>(M->Nr<VarValue>(lcond), M->Nr<VarValue>(lv), newArray());
result = util::makeVar(arr, series, func);
freeRightStatic = rfreeable;
} else if (!ltmp && rtmp) {
// We won't know until runtime if we can reuse the temp array(s) since they
// might broadcast.
auto *rshape = M->getOrRealizeFunc("_shape", {rv->getType()}, {}, FUSION_MODULE);
seqassertn(rshape, "shape function func not found for right arg");
auto *rightShape = util::call(rshape, {M->Nr<VarValue>(rv)});
rcond = util::makeVar(*rightShape == *M->Nr<VarValue>(outShape), series, func);
auto *arr =
M->Nr<TernaryInstr>(M->Nr<VarValue>(rcond), M->Nr<VarValue>(rv), newArray());
result = util::makeVar(arr, series, func);
freeLeftStatic = lfreeable;
}
} else {
if (ltmp) {
result = lv;
} else {
result = util::makeVar(newArray(), series, func);
freeLeftStatic = lfreeable;
}
}
auto opstr = opstring();
if (haveVectorizedLoop()) {
// We have a vectorized loop available for this operations.
if (rv) {
auto *vecloop = M->getOrRealizeFunc(
"_apply_vectorized_loop_binary",
{lv->getType(), rv->getType(), result->getType()}, {opstr}, FUSION_MODULE);
seqassertn(vecloop, "binary vec loop func not found ({})", opstr);
series->push_back(util::call(vecloop, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv),
M->Nr<VarValue>(result)}));
} else {
auto *vecloop = M->getOrRealizeFunc("_apply_vectorized_loop_unary",
{lv->getType(), result->getType()}, {opstr},
FUSION_MODULE);
seqassertn(vecloop, "unary vec loop func not found ({})", opstr);
series->push_back(
util::call(vecloop, {M->Nr<VarValue>(lv), M->Nr<VarValue>(result)}));
}
} else {
// Arrays for scalar expression function
std::vector<Value *> arrays = {M->Nr<VarValue>(result)};
std::vector<std::string> scalarFuncArgNames;
std::vector<types::Type *> scalarFuncArgTypes;
std::unordered_map<NumPyExpr *, Var *> scalarFuncArgMap;
// Scalars passed through 'extra' arg of ndarray._loop()
std::vector<Value *> extra;
auto *baseType = type.getIRBaseType(T);
scalarFuncArgNames.push_back("out");
scalarFuncArgTypes.push_back(M->getPointerType(baseType));
if (lhs->type.isArray()) {
if (result != lv) {
scalarFuncArgNames.push_back("in0");
scalarFuncArgTypes.push_back(M->getPointerType(lhs->type.getIRBaseType(T)));
arrays.push_back(M->Nr<VarValue>(lv));
}
} else {
extra.push_back(M->Nr<VarValue>(lv));
}
if (rv) {
if (rhs->type.isArray()) {
if (result != rv) {
scalarFuncArgNames.push_back("in1");
scalarFuncArgTypes.push_back(M->getPointerType(rhs->type.getIRBaseType(T)));
arrays.push_back(M->Nr<VarValue>(rv));
}
} else {
extra.push_back(M->Nr<VarValue>(rv));
}
}
auto *extraTuple = util::makeTuple(extra, M);
scalarFuncArgNames.push_back("extra");
scalarFuncArgTypes.push_back(extraTuple->getType());
auto *scalarFuncType = M->getFuncType(M->getNoneType(), scalarFuncArgTypes);
auto *scalarFunc = M->Nr<BodiedFunc>("__numpy_fusion_scalar_fn");
scalarFunc->realize(scalarFuncType, scalarFuncArgNames);
std::vector<Var *> scalarFuncArgVars(scalarFunc->arg_begin(),
scalarFunc->arg_end());
auto *body = M->Nr<SeriesFlow>();
auto name = "_" + opstr;
auto deref = [&](unsigned idx) {
return (*M->Nr<VarValue>(scalarFuncArgVars[idx]))[*M->getInt(0)];
};
if (rv) {
Value *litem = nullptr;
Value *ritem = nullptr;
if (lhs->type.isArray() && rhs->type.isArray()) {
if (result == lv) {
litem = deref(0);
ritem = deref(1);
} else if (result == rv) {
litem = deref(1);
ritem = deref(0);
} else {
litem = deref(1);
ritem = deref(2);
}
} else if (lhs->type.isArray()) {
if (result == lv) {
litem = deref(0);
} else {
litem = deref(1);
}
ritem = util::tupleGet(M->Nr<VarValue>(scalarFuncArgVars.back()), 0);
} else if (rhs->type.isArray()) {
if (result == rv) {
ritem = deref(0);
} else {
ritem = deref(1);
}
litem = util::tupleGet(M->Nr<VarValue>(scalarFuncArgVars.back()), 0);
} else {
seqassertn(false, "both lhs are rhs are scalars");
}
auto *commonType = decideTypes(this, lhs->type, rhs->type, T);
auto *lcast =
M->getOrRealizeFunc("_cast", {litem->getType()}, {commonType}, FUSION_MODULE);
seqassertn(lcast, "cast func not found for left arg");
litem = util::call(lcast, {litem});
auto *rcast =
M->getOrRealizeFunc("_cast", {ritem->getType()}, {commonType}, FUSION_MODULE);
seqassertn(rcast, "cast func not found for left arg");
ritem = util::call(rcast, {ritem});
auto *op = M->getOrRealizeFunc(name, {litem->getType(), ritem->getType()}, {},
FUSION_MODULE);
seqassertn(op, "2-op func '{}' not found", name);
auto *oitem = util::call(op, {litem, ritem});
auto *ptrsetFunc = M->getOrRealizeFunc(
"_ptrset", {scalarFuncArgTypes[0], oitem->getType()}, {}, FUSION_MODULE);
seqassertn(ptrsetFunc, "ptrset func not found");
body->push_back(
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), oitem}));
} else {
auto *litem = deref(result == lv ? 0 : 1);
auto *op = M->getOrRealizeFunc(name, {litem->getType()}, {}, FUSION_MODULE);
seqassertn(op, "1-op func '{}' not found", name);
auto *oitem = util::call(op, {litem});
auto *ptrsetFunc = M->getOrRealizeFunc(
"_ptrset", {scalarFuncArgTypes[0], oitem->getType()}, {}, FUSION_MODULE);
seqassertn(ptrsetFunc, "ptrset func not found");
body->push_back(
util::call(ptrsetFunc, {M->Nr<VarValue>(scalarFuncArgVars[0]), oitem}));
}
scalarFunc->setBody(body);
auto *arraysTuple = util::makeTuple(arrays);
auto *loopFunc = M->getOrRealizeFunc(
"_loop_basic",
{arraysTuple->getType(), scalarFunc->getType(), extraTuple->getType()}, {},
FUSION_MODULE);
seqassertn(loopFunc, "loop_basic func not found");
series->push_back(
util::call(loopFunc, {arraysTuple, M->Nr<VarValue>(scalarFunc), extraTuple}));
}
auto freeArray = [&](Var *arr) {
auto *freeFunc = M->getOrRealizeFunc("_free", {arr->getType()}, {}, FUSION_MODULE);
seqassertn(freeFunc, "free func not found");
return util::call(freeFunc, {M->Nr<VarValue>(arr)});
};
seqassertn(!(freeLeftStatic && lcond), "unexpected free conditions for left arg");
seqassertn(!(freeRightStatic && rcond), "unexpected free conditions for right arg");
if (lcond && rcond) {
series->push_back(M->Nr<IfFlow>(
M->Nr<VarValue>(lcond), util::series(freeArray(rv)),
util::series(freeArray(lv),
M->Nr<IfFlow>(M->Nr<VarValue>(rcond), M->Nr<SeriesFlow>(),
util::series(freeArray(rv))))));
} else {
if (freeLeftStatic) {
series->push_back(freeArray(lv));
} else if (lcond) {
series->push_back(M->Nr<IfFlow>(M->Nr<VarValue>(lcond), M->Nr<SeriesFlow>(),
util::series(freeArray(lv))));
}
if (freeRightStatic) {
series->push_back(freeArray(rv));
} else if (rcond) {
series->push_back(M->Nr<IfFlow>(M->Nr<VarValue>(rcond), M->Nr<SeriesFlow>(),
util::series(freeArray(rv))));
}
}
return result;
}
BroadcastInfo NumPyExpr::getBroadcastInfo() {
int64_t arrDim = -1;
Var *varLeaf = nullptr;
bool multipleLeafVars = false;
int numNonVarLeafArrays = 0;
bool definitelyBroadcasts = false;
apply([&](NumPyExpr &e) {
if (e.isLeaf() && e.type.isArray()) {
if (arrDim == -1) {
arrDim = e.type.ndim;
} else if (arrDim != e.type.ndim) {
definitelyBroadcasts = true;
}
if (auto *v = cast<VarValue>(e.val)) {
if (varLeaf) {
if (varLeaf != v->getVar())
multipleLeafVars = true;
} else {
varLeaf = v->getVar();
}
} else {
++numNonVarLeafArrays;
}
}
});
bool mightBroadcast = numNonVarLeafArrays > 1 || multipleLeafVars ||
(numNonVarLeafArrays == 1 && varLeaf);
if (definitelyBroadcasts) {
return BroadcastInfo::YES;
} else if (mightBroadcast) {
return BroadcastInfo::MAYBE;
} else {
return BroadcastInfo::NO;
}
}
Value *NumPyExpr::codegenScalarExpr(
CodegenContext &C, const std::unordered_map<NumPyExpr *, Var *> &args,
const std::unordered_map<NumPyExpr *, unsigned> &scalarMap, Var *scalars) {
auto *M = C.M;
auto &T = C.T;
Value *lv = lhs ? lhs->codegenScalarExpr(C, args, scalarMap, scalars) : nullptr;
Value *rv = rhs ? rhs->codegenScalarExpr(C, args, scalarMap, scalars) : nullptr;
auto name = "_" + opstring();
if (lv && rv) {
auto *t = type.getIRBaseType(T);
auto *commonType = decideTypes(this, lhs->type, rhs->type, T);
auto *cast1 =
M->getOrRealizeFunc("_cast", {lv->getType()}, {commonType}, FUSION_MODULE);
auto *cast2 =
M->getOrRealizeFunc("_cast", {rv->getType()}, {commonType}, FUSION_MODULE);
lv = util::call(cast1, {lv});
rv = util::call(cast2, {rv});
auto *f =
M->getOrRealizeFunc(name, {lv->getType(), rv->getType()}, {}, FUSION_MODULE);
seqassertn(f, "2-op func '{}' not found", name);
return util::call(f, {lv, rv});
} else if (lv) {
auto *t = type.getIRBaseType(T);
auto *f = M->getOrRealizeFunc(name, {lv->getType()}, {}, FUSION_MODULE);
seqassertn(f, "1-op func '{}' not found", name);
return util::call(f, {lv});
} else {
if (type.isArray()) {
auto it = args.find(this);
seqassertn(it != args.end(), "NumPyExpr not found in args map (codegen expr)");
auto *var = it->second;
return (*M->Nr<VarValue>(var))[*M->getInt(0)];
} else {
auto it = scalarMap.find(this);
seqassertn(it != scalarMap.end(),
"NumPyExpr not found in scalar map (codegen expr)");
auto idx = it->second;
return util::tupleGet(M->Nr<VarValue>(scalars), idx);
}
}
}
} // namespace numpy
} // namespace transform
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,385 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "numpy.h"
namespace codon {
namespace ir {
namespace transform {
namespace numpy {
namespace {
using CFG = analyze::dataflow::CFGraph;
using CFBlock = analyze::dataflow::CFBlock;
using RD = analyze::dataflow::RDInspector;
using SE = analyze::module::SideEffectResult;
struct GetVars : public util::Operator {
std::unordered_set<id_t> &vids;
explicit GetVars(std::unordered_set<id_t> &vids) : util::Operator(), vids(vids) {}
void preHook(Node *v) override {
for (auto *var : v->getUsedVariables()) {
if (!isA<Func>(var))
vids.insert(var->getId());
}
}
};
struct OkToForwardPast : public util::Operator {
std::unordered_set<id_t> &vids;
const std::unordered_map<id_t, NumPyExpr *> &parsedValues;
SE *se;
bool ok;
OkToForwardPast(std::unordered_set<id_t> &vids,
const std::unordered_map<id_t, NumPyExpr *> &parsedValues, SE *se)
: util::Operator(), vids(vids), parsedValues(parsedValues), se(se), ok(true) {}
void preHook(Node *v) override {
if (!ok) {
return;
} else if (auto *assign = cast<AssignInstr>(v)) {
if (vids.count(assign->getLhs()->getId()))
ok = false;
} else if (auto *val = cast<Value>(v)) {
auto it = parsedValues.find(val->getId());
if (it != parsedValues.end()) {
it->second->apply([&](NumPyExpr &e) {
if (e.isLeaf() && se->hasSideEffect(e.val))
ok = false;
});
// Skip children since we are processing them manually above.
for (auto *used : val->getUsedValues())
see(used);
} else if (se->hasSideEffect(val)) {
ok = false;
}
}
}
};
struct GetAllUses : public util::Operator {
Var *var;
std::vector<Value *> &uses;
GetAllUses(Var *var, std::vector<Value *> &uses)
: util::Operator(), var(var), uses(uses) {}
void preHook(Node *n) override {
if (auto *v = cast<Value>(n)) {
auto vars = v->getUsedVariables();
if (std::find(vars.begin(), vars.end(), var) != vars.end())
uses.push_back(v);
}
}
};
bool canForwardExpressionAlongPath(
Value *source, Value *destination, std::unordered_set<id_t> &vids,
const std::unordered_map<id_t, NumPyExpr *> &parsedValues, SE *se,
const std::vector<CFBlock *> &path) {
if (path.empty())
return true;
bool go = false;
for (auto *block : path) {
for (const auto *value : *block) {
// Skip things before 'source' in first block
if (!go && block == path.front() && value == source) {
go = true;
continue;
}
// Skip things after 'destination' in last block
if (go && block == path.back() && value == destination) {
go = false;
break;
}
if (!go)
continue;
OkToForwardPast check(vids, parsedValues, se);
const_cast<Value *>(value)->accept(check);
if (!check.ok)
return false;
}
}
return true;
}
bool canForwardExpression(NumPyOptimizationUnit *expr, Value *target,
const std::unordered_map<id_t, NumPyExpr *> &parsedValues,
CFG *cfg, SE *se) {
std::unordered_set<id_t> vids;
bool pure = true;
expr->expr->apply([&](NumPyExpr &e) {
if (e.isLeaf()) {
if (se->hasSideEffect(e.val)) {
pure = false;
} else {
GetVars gv(vids);
e.val->accept(gv);
}
}
});
if (!pure)
return false;
auto *source = expr->assign;
auto *start = cfg->getBlock(source);
auto *end = cfg->getBlock(target);
seqassertn(start, "start CFG block not found");
seqassertn(end, "end CFG block not found");
bool ok = true;
std::function<void(CFBlock *, std::vector<CFBlock *> &)> dfs =
[&](CFBlock *curr, std::vector<CFBlock *> &path) {
path.push_back(curr);
if (curr == end) {
if (!canForwardExpressionAlongPath(source, target, vids, parsedValues, se,
path))
ok = false;
} else {
for (auto it = curr->successors_begin(); it != curr->successors_end(); ++it) {
if (std::find(path.begin(), path.end(), *it) != path.end())
dfs(*it, path);
}
}
path.pop_back();
};
std::vector<CFBlock *> path;
dfs(start, path);
return ok;
}
bool canForwardVariable(AssignInstr *assign, Value *destination, BodiedFunc *func,
RD *rd) {
auto *var = assign->getLhs();
// Check 1: Only the given assignment should reach the destination.
auto reaching = rd->getReachingDefinitions(var, destination);
if (reaching.size() != 1 && *reaching.begin() != assign->getRhs()->getId())
return false;
// Check 2: There should be no other uses of the variable that the given assignment
// reaches.
std::vector<Value *> uses;
GetAllUses gu(var, uses);
func->accept(gu);
for (auto *use : uses) {
if (use != destination && use->getId() != assign->getId() &&
rd->getReachingDefinitions(var, use).count(assign->getRhs()->getId()))
return false;
}
return true;
}
ForwardingDAG buildForwardingDAG(BodiedFunc *func, RD *rd, CFG *cfg, SE *se,
std::vector<NumPyOptimizationUnit> &exprs) {
std::unordered_map<id_t, NumPyExpr *> parsedValues;
for (auto &e : exprs) {
e.expr->apply([&](NumPyExpr &e) {
if (e.val)
parsedValues.emplace(e.val->getId(), &e);
});
}
ForwardingDAG dag;
int64_t dstId = 0;
for (auto &dst : exprs) {
auto *target = dst.expr.get();
auto &forwardingVec = dag[&dst];
std::vector<std::pair<Var *, NumPyExpr *>> vars;
target->apply([&](NumPyExpr &e) {
if (e.isLeaf()) {
if (auto *v = cast<VarValue>(e.val)) {
vars.emplace_back(v->getVar(), &e);
}
}
});
for (auto &p : vars) {
int64_t srcId = 0;
for (auto &src : exprs) {
if (srcId != dstId && src.assign && src.assign->getLhs() == p.first) {
auto checkFwdVar = canForwardVariable(src.assign, p.second->val, func, rd);
auto checkFwdExpr =
canForwardExpression(&src, p.second->val, parsedValues, cfg, se);
if (checkFwdVar && checkFwdExpr)
forwardingVec.push_back({&dst, &src, p.first, p.second, dstId, srcId});
}
++srcId;
}
}
++dstId;
}
return dag;
}
struct UnionFind {
std::vector<int64_t> parent;
std::vector<int64_t> rank;
explicit UnionFind(int64_t n) : parent(n), rank(n) {
for (auto i = 0; i < n; i++) {
parent[i] = i;
rank[i] = 0;
}
}
int64_t find(int64_t u) {
if (parent[u] != u)
parent[u] = find(parent[u]);
return parent[u];
}
void union_(int64_t u, int64_t v) {
auto ru = find(u);
auto rv = find(v);
if (ru != rv) {
if (rank[ru] > rank[rv]) {
parent[rv] = ru;
} else if (rank[ru] < rank[rv]) {
parent[ru] = rv;
} else {
parent[rv] = ru;
++rank[ru];
}
}
}
};
std::vector<ForwardingDAG>
getForwardingDAGConnectedComponents(ForwardingDAG &dag,
std::vector<NumPyOptimizationUnit> &exprs) {
auto n = exprs.size();
UnionFind uf(n);
for (auto i = 0; i < n; i++) {
for (auto &fwd : dag[&exprs[i]]) {
uf.union_(i, fwd.srcId);
}
}
std::vector<std::vector<NumPyOptimizationUnit *>> components(n);
for (auto i = 0; i < n; i++) {
auto root = uf.find(i);
components[root].push_back(&exprs[i]);
}
std::vector<ForwardingDAG> result;
for (auto &c : components) {
if (c.empty())
continue;
ForwardingDAG d;
for (auto *expr : c)
d.emplace(expr, dag[expr]);
result.push_back(d);
}
return result;
}
bool hasCycleHelper(int64_t v, ForwardingDAG &dag,
std::vector<NumPyOptimizationUnit> &exprs,
std::vector<bool> &visited, std::vector<bool> &recStack) {
visited[v] = true;
recStack[v] = true;
for (auto &neighbor : dag[&exprs[v]]) {
if (!visited[neighbor.srcId]) {
if (hasCycleHelper(neighbor.srcId, dag, exprs, visited, recStack))
return true;
} else if (recStack[neighbor.srcId]) {
return true;
}
}
recStack[v] = false;
return false;
}
bool hasCycle(ForwardingDAG &dag, std::vector<NumPyOptimizationUnit> &exprs) {
auto n = exprs.size();
std::vector<bool> visited(n, false);
std::vector<bool> recStack(n, false);
for (auto i = 0; i < n; i++) {
if (dag.find(&exprs[i]) != dag.end() && !visited[i] &&
hasCycleHelper(i, dag, exprs, visited, recStack))
return true;
}
return false;
}
void doForwardingHelper(ForwardingDAG &dag, NumPyOptimizationUnit *curr,
std::unordered_set<NumPyOptimizationUnit *> &done,
std::vector<AssignInstr *> &assignsToDelete) {
if (done.count(curr))
return;
auto forwardings = dag[curr];
for (auto &fwd : forwardings) {
doForwardingHelper(dag, fwd.src, done, assignsToDelete);
// Note that order of leaves here doesn't matter since they're guaranteed to have no
// side effects based on forwarding checks.
fwd.dst->leaves.insert(fwd.dst->leaves.end(), fwd.src->leaves.begin(),
fwd.src->leaves.end());
fwd.dstLeaf->replace(*fwd.src->expr);
assignsToDelete.push_back(fwd.src->assign);
}
done.insert(curr);
}
} // namespace
std::vector<ForwardingDAG>
getForwardingDAGs(BodiedFunc *func, RD *rd, CFG *cfg, SE *se,
std::vector<NumPyOptimizationUnit> &exprs) {
auto dag = buildForwardingDAG(func, rd, cfg, se, exprs);
auto dags = getForwardingDAGConnectedComponents(dag, exprs);
dags.erase(std::remove_if(dags.begin(), dags.end(),
[&](ForwardingDAG &dag) { return hasCycle(dag, exprs); }),
dags.end());
return dags;
}
NumPyOptimizationUnit *doForwarding(ForwardingDAG &dag,
std::vector<AssignInstr *> &assignsToDelete) {
seqassertn(!dag.empty(), "empty forwarding DAG encountered");
std::unordered_set<NumPyOptimizationUnit *> done;
for (auto &e : dag) {
doForwardingHelper(dag, e.first, done, assignsToDelete);
}
// Find the root
std::unordered_set<NumPyOptimizationUnit *> notRoot;
for (auto &e : dag) {
for (auto &f : e.second) {
notRoot.insert(f.src);
}
}
seqassertn(notRoot.size() == dag.size() - 1,
"multiple roots found in forwarding DAG");
for (auto &e : dag) {
if (notRoot.count(e.first) == 0)
return e.first;
}
seqassertn(false, "could not find root in forwarding DAG");
return nullptr;
}
} // namespace numpy
} // namespace transform
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,877 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "numpy.h"
#include "codon/cir/analyze/dataflow/reaching.h"
#include "codon/cir/analyze/module/global_vars.h"
#include "codon/cir/analyze/module/side_effect.h"
#include "codon/cir/util/cloning.h"
#include "codon/cir/util/irtools.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <complex>
#include <sstream>
#include <utility>
#define XLOG(c, ...) \
do { \
if (Verbose) \
LOG(c, ##__VA_ARGS__); \
} while (false)
namespace codon {
namespace ir {
namespace transform {
namespace numpy {
namespace {
llvm::cl::opt<int> AlwaysFuseCostThreshold(
"npfuse-always", llvm::cl::desc("Expression cost below which (<=) to always fuse"),
llvm::cl::init(10));
llvm::cl::opt<int> NeverFuseCostThreshold(
"npfuse-never", llvm::cl::desc("Expression cost above which (>) to never fuse"),
llvm::cl::init(50));
llvm::cl::opt<bool> Verbose("npfuse-verbose",
llvm::cl::desc("Print information about fused expressions"),
llvm::cl::init(false));
bool isArrayType(types::Type *t) {
return t && isA<types::RecordType>(t) &&
t->getName().rfind("std.numpy.ndarray.ndarray[", 0) == 0;
}
bool isUFuncType(types::Type *t) {
return t && (t->getName().rfind("std.numpy.ufunc.UnaryUFunc[", 0) == 0 ||
t->getName().rfind("std.numpy.ufunc.BinaryUFunc[", 0) == 0);
}
bool isNoneType(types::Type *t, NumPyPrimitiveTypes &T) {
return t && (t->is(T.none) || t->is(T.optnone));
}
} // namespace
const std::string FUSION_MODULE = "std.numpy.fusion";
NumPyPrimitiveTypes::NumPyPrimitiveTypes(Module *M)
: none(M->getNoneType()), optnone(M->getOptionalType(none)),
bool_(M->getBoolType()), i8(M->getIntNType(8, true)),
u8(M->getIntNType(8, false)), i16(M->getIntNType(16, true)),
u16(M->getIntNType(16, false)), i32(M->getIntNType(32, true)),
u32(M->getIntNType(32, false)), i64(M->getIntType()),
u64(M->getIntNType(64, false)), f16(M->getFloat16Type()),
f32(M->getFloat32Type()), f64(M->getFloatType()),
c64(M->getType("std.internal.types.complex.complex64")),
c128(M->getType("std.internal.types.complex.complex")) {}
NumPyType::NumPyType(Type dtype, int64_t ndim) : dtype(dtype), ndim(ndim) {
seqassertn(ndim >= 0, "ndim must be non-negative");
}
NumPyType::NumPyType() : NumPyType(NP_TYPE_NONE) {}
NumPyType NumPyType::get(types::Type *t, NumPyPrimitiveTypes &T) {
if (t->is(T.bool_))
return {NumPyType::NP_TYPE_BOOL};
if (t->is(T.i8))
return {NumPyType::NP_TYPE_I8};
if (t->is(T.u8))
return {NumPyType::NP_TYPE_U8};
if (t->is(T.i16))
return {NumPyType::NP_TYPE_I16};
if (t->is(T.u16))
return {NumPyType::NP_TYPE_U16};
if (t->is(T.i32))
return {NumPyType::NP_TYPE_I32};
if (t->is(T.u32))
return {NumPyType::NP_TYPE_U32};
if (t->is(T.i64))
return {NumPyType::NP_TYPE_I64};
if (t->is(T.u64))
return {NumPyType::NP_TYPE_U64};
if (t->is(T.f16))
return {NumPyType::NP_TYPE_F16};
if (t->is(T.f32))
return {NumPyType::NP_TYPE_F32};
if (t->is(T.f64))
return {NumPyType::NP_TYPE_F64};
if (t->is(T.c64))
return {NumPyType::NP_TYPE_C64};
if (t->is(T.c128))
return {NumPyType::NP_TYPE_C128};
if (isArrayType(t)) {
auto generics = t->getGenerics();
seqassertn(generics.size() == 2 && generics[0].isType() && generics[1].isStatic(),
"unrecognized ndarray generics");
auto *dtype = generics[0].getTypeValue();
auto ndim = generics[1].getStaticValue();
if (dtype->is(T.bool_))
return {NumPyType::NP_TYPE_ARR_BOOL, ndim};
if (dtype->is(T.i8))
return {NumPyType::NP_TYPE_ARR_I8, ndim};
if (dtype->is(T.u8))
return {NumPyType::NP_TYPE_ARR_U8, ndim};
if (dtype->is(T.i16))
return {NumPyType::NP_TYPE_ARR_I16, ndim};
if (dtype->is(T.u16))
return {NumPyType::NP_TYPE_ARR_U16, ndim};
if (dtype->is(T.i32))
return {NumPyType::NP_TYPE_ARR_I32, ndim};
if (dtype->is(T.u32))
return {NumPyType::NP_TYPE_ARR_U32, ndim};
if (dtype->is(T.i64))
return {NumPyType::NP_TYPE_ARR_I64, ndim};
if (dtype->is(T.u64))
return {NumPyType::NP_TYPE_ARR_U64, ndim};
if (dtype->is(T.f16))
return {NumPyType::NP_TYPE_ARR_F16, ndim};
if (dtype->is(T.f32))
return {NumPyType::NP_TYPE_ARR_F32, ndim};
if (dtype->is(T.f64))
return {NumPyType::NP_TYPE_ARR_F64, ndim};
if (dtype->is(T.c64))
return {NumPyType::NP_TYPE_ARR_C64, ndim};
if (dtype->is(T.c128))
return {NumPyType::NP_TYPE_ARR_C128, ndim};
}
return {};
}
types::Type *NumPyType::getIRBaseType(NumPyPrimitiveTypes &T) const {
switch (dtype) {
case NP_TYPE_NONE:
seqassertn(false, "unexpected type code (NONE)");
return nullptr;
case NP_TYPE_BOOL:
return T.bool_;
case NP_TYPE_I8:
return T.i8;
case NP_TYPE_U8:
return T.u8;
case NP_TYPE_I16:
return T.i16;
case NP_TYPE_U16:
return T.u16;
case NP_TYPE_I32:
return T.i32;
case NP_TYPE_U32:
return T.u32;
case NP_TYPE_I64:
return T.i64;
case NP_TYPE_U64:
return T.u64;
case NP_TYPE_F16:
return T.f16;
case NP_TYPE_F32:
return T.f32;
case NP_TYPE_F64:
return T.f64;
case NP_TYPE_C64:
return T.c64;
case NP_TYPE_C128:
return T.c128;
case NP_TYPE_SCALAR_END:
seqassertn(false, "unexpected type code (SCALAR_END)");
return nullptr;
case NP_TYPE_ARR_BOOL:
return T.bool_;
case NP_TYPE_ARR_I8:
return T.i8;
case NP_TYPE_ARR_U8:
return T.u8;
case NP_TYPE_ARR_I16:
return T.i16;
case NP_TYPE_ARR_U16:
return T.u16;
case NP_TYPE_ARR_I32:
return T.i32;
case NP_TYPE_ARR_U32:
return T.u32;
case NP_TYPE_ARR_I64:
return T.i64;
case NP_TYPE_ARR_U64:
return T.u64;
case NP_TYPE_ARR_F16:
return T.f16;
case NP_TYPE_ARR_F32:
return T.f32;
case NP_TYPE_ARR_F64:
return T.f64;
case NP_TYPE_ARR_C64:
return T.c64;
case NP_TYPE_ARR_C128:
return T.c128;
default:
seqassertn(false, "unexpected type code (?)");
return nullptr;
}
}
std::ostream &operator<<(std::ostream &os, NumPyType const &type) {
static const std::unordered_map<NumPyType::Type, std::string> typestrings = {
{NumPyType::NP_TYPE_NONE, "none"}, {NumPyType::NP_TYPE_BOOL, "bool"},
{NumPyType::NP_TYPE_I8, "i8"}, {NumPyType::NP_TYPE_U8, "u8"},
{NumPyType::NP_TYPE_I16, "i16"}, {NumPyType::NP_TYPE_U16, "u16"},
{NumPyType::NP_TYPE_I32, "i32"}, {NumPyType::NP_TYPE_U32, "u32"},
{NumPyType::NP_TYPE_I64, "i64"}, {NumPyType::NP_TYPE_U64, "u64"},
{NumPyType::NP_TYPE_F16, "f16"}, {NumPyType::NP_TYPE_F32, "f32"},
{NumPyType::NP_TYPE_F64, "f64"}, {NumPyType::NP_TYPE_C64, "c64"},
{NumPyType::NP_TYPE_C128, "c128"}, {NumPyType::NP_TYPE_SCALAR_END, ""},
{NumPyType::NP_TYPE_ARR_BOOL, "bool"}, {NumPyType::NP_TYPE_ARR_I8, "i8"},
{NumPyType::NP_TYPE_ARR_U8, "u8"}, {NumPyType::NP_TYPE_ARR_I16, "i16"},
{NumPyType::NP_TYPE_ARR_U16, "u16"}, {NumPyType::NP_TYPE_ARR_I32, "i32"},
{NumPyType::NP_TYPE_ARR_U32, "u32"}, {NumPyType::NP_TYPE_ARR_I64, "i64"},
{NumPyType::NP_TYPE_ARR_U64, "u64"}, {NumPyType::NP_TYPE_ARR_F16, "f16"},
{NumPyType::NP_TYPE_ARR_F32, "f32"}, {NumPyType::NP_TYPE_ARR_F64, "f64"},
{NumPyType::NP_TYPE_ARR_C64, "c64"}, {NumPyType::NP_TYPE_ARR_C128, "c128"},
};
auto it = typestrings.find(type.dtype);
seqassertn(it != typestrings.end(), "type not found");
auto s = it->second;
if (type.isArray())
os << "array[" << s << ", " << type.ndim << "]";
else
os << s;
return os;
}
std::string NumPyType::str() const {
std::stringstream buffer;
buffer << *this;
return buffer.str();
}
CodegenContext::CodegenContext(Module *M, SeriesFlow *series, BodiedFunc *func,
NumPyPrimitiveTypes &T)
: M(M), series(series), func(func), vars(), T(T) {}
std::unique_ptr<NumPyExpr> parse(Value *v,
std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
NumPyPrimitiveTypes &T) {
struct NumPyMagicMethod {
std::string name;
NumPyExpr::Op op;
int args;
bool right;
};
struct NumPyUFunc {
std::string name;
NumPyExpr::Op op;
int args;
};
static std::vector<NumPyMagicMethod> magics = {
{Module::POS_MAGIC_NAME, NumPyExpr::NP_OP_POS, 1, false},
{Module::NEG_MAGIC_NAME, NumPyExpr::NP_OP_NEG, 1, false},
{Module::INVERT_MAGIC_NAME, NumPyExpr::NP_OP_INVERT, 1, false},
{Module::ABS_MAGIC_NAME, NumPyExpr::NP_OP_ABS, 1, false},
{Module::ADD_MAGIC_NAME, NumPyExpr::NP_OP_ADD, 2, false},
{Module::SUB_MAGIC_NAME, NumPyExpr::NP_OP_SUB, 2, false},
{Module::MUL_MAGIC_NAME, NumPyExpr::NP_OP_MUL, 2, false},
{Module::MATMUL_MAGIC_NAME, NumPyExpr::NP_OP_MATMUL, 2, false},
{Module::TRUE_DIV_MAGIC_NAME, NumPyExpr::NP_OP_TRUE_DIV, 2, false},
{Module::FLOOR_DIV_MAGIC_NAME, NumPyExpr::NP_OP_FLOOR_DIV, 2, false},
{Module::MOD_MAGIC_NAME, NumPyExpr::NP_OP_MOD, 2, false},
{Module::POW_MAGIC_NAME, NumPyExpr::NP_OP_POW, 2, false},
{Module::LSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_LSHIFT, 2, false},
{Module::RSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_RSHIFT, 2, false},
{Module::AND_MAGIC_NAME, NumPyExpr::NP_OP_AND, 2, false},
{Module::OR_MAGIC_NAME, NumPyExpr::NP_OP_OR, 2, false},
{Module::XOR_MAGIC_NAME, NumPyExpr::NP_OP_XOR, 2, false},
{Module::RADD_MAGIC_NAME, NumPyExpr::NP_OP_ADD, 2, true},
{Module::RSUB_MAGIC_NAME, NumPyExpr::NP_OP_SUB, 2, true},
{Module::RMUL_MAGIC_NAME, NumPyExpr::NP_OP_MUL, 2, true},
{Module::RMATMUL_MAGIC_NAME, NumPyExpr::NP_OP_MATMUL, 2, true},
{Module::RTRUE_DIV_MAGIC_NAME, NumPyExpr::NP_OP_TRUE_DIV, 2, true},
{Module::RFLOOR_DIV_MAGIC_NAME, NumPyExpr::NP_OP_FLOOR_DIV, 2, true},
{Module::RMOD_MAGIC_NAME, NumPyExpr::NP_OP_MOD, 2, true},
{Module::RPOW_MAGIC_NAME, NumPyExpr::NP_OP_POW, 2, true},
{Module::RLSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_LSHIFT, 2, true},
{Module::RRSHIFT_MAGIC_NAME, NumPyExpr::NP_OP_RSHIFT, 2, true},
{Module::RAND_MAGIC_NAME, NumPyExpr::NP_OP_AND, 2, true},
{Module::ROR_MAGIC_NAME, NumPyExpr::NP_OP_OR, 2, true},
{Module::RXOR_MAGIC_NAME, NumPyExpr::NP_OP_XOR, 2, true},
{Module::EQ_MAGIC_NAME, NumPyExpr::NP_OP_EQ, 2, false},
{Module::NE_MAGIC_NAME, NumPyExpr::NP_OP_NE, 2, false},
{Module::LT_MAGIC_NAME, NumPyExpr::NP_OP_LT, 2, false},
{Module::LE_MAGIC_NAME, NumPyExpr::NP_OP_LE, 2, false},
{Module::GT_MAGIC_NAME, NumPyExpr::NP_OP_GT, 2, false},
{Module::GE_MAGIC_NAME, NumPyExpr::NP_OP_GE, 2, false},
};
static std::vector<NumPyUFunc> ufuncs = {
{"positive", NumPyExpr::NP_OP_POS, 1},
{"negative", NumPyExpr::NP_OP_NEG, 1},
{"invert", NumPyExpr::NP_OP_INVERT, 1},
{"abs", NumPyExpr::NP_OP_ABS, 1},
{"absolute", NumPyExpr::NP_OP_ABS, 1},
{"add", NumPyExpr::NP_OP_ADD, 2},
{"subtract", NumPyExpr::NP_OP_SUB, 2},
{"multiply", NumPyExpr::NP_OP_MUL, 2},
{"divide", NumPyExpr::NP_OP_TRUE_DIV, 2},
{"floor_divide", NumPyExpr::NP_OP_FLOOR_DIV, 2},
{"remainder", NumPyExpr::NP_OP_MOD, 2},
{"fmod", NumPyExpr::NP_OP_FMOD, 2},
{"power", NumPyExpr::NP_OP_POW, 2},
{"left_shift", NumPyExpr::NP_OP_LSHIFT, 2},
{"right_shift", NumPyExpr::NP_OP_RSHIFT, 2},
{"bitwise_and", NumPyExpr::NP_OP_AND, 2},
{"bitwise_or", NumPyExpr::NP_OP_OR, 2},
{"bitwise_xor", NumPyExpr::NP_OP_XOR, 2},
{"logical_and", NumPyExpr::NP_OP_LOGICAL_AND, 2},
{"logical_or", NumPyExpr::NP_OP_LOGICAL_OR, 2},
{"logical_xor", NumPyExpr::NP_OP_LOGICAL_XOR, 2},
{"equal", NumPyExpr::NP_OP_EQ, 2},
{"not_equal", NumPyExpr::NP_OP_NE, 2},
{"less", NumPyExpr::NP_OP_LT, 2},
{"less_equal", NumPyExpr::NP_OP_LE, 2},
{"greater", NumPyExpr::NP_OP_GT, 2},
{"greater_equal", NumPyExpr::NP_OP_GE, 2},
{"minimum", NumPyExpr::NP_OP_MIN, 2},
{"maximum", NumPyExpr::NP_OP_MAX, 2},
{"fmin", NumPyExpr::NP_OP_FMIN, 2},
{"fmax", NumPyExpr::NP_OP_FMAX, 2},
{"sin", NumPyExpr::NP_OP_SIN, 1},
{"cos", NumPyExpr::NP_OP_COS, 1},
{"tan", NumPyExpr::NP_OP_TAN, 1},
{"arcsin", NumPyExpr::NP_OP_ARCSIN, 1},
{"arccos", NumPyExpr::NP_OP_ARCCOS, 1},
{"arctan", NumPyExpr::NP_OP_ARCTAN, 1},
{"arctan2", NumPyExpr::NP_OP_ARCTAN2, 2},
{"hypot", NumPyExpr::NP_OP_HYPOT, 2},
{"sinh", NumPyExpr::NP_OP_SINH, 1},
{"cosh", NumPyExpr::NP_OP_COSH, 1},
{"tanh", NumPyExpr::NP_OP_TANH, 1},
{"arcsinh", NumPyExpr::NP_OP_ARCSINH, 1},
{"arccosh", NumPyExpr::NP_OP_ARCCOSH, 1},
{"arctanh", NumPyExpr::NP_OP_ARCTANH, 1},
{"conjugate", NumPyExpr::NP_OP_CONJ, 1},
{"exp", NumPyExpr::NP_OP_EXP, 1},
{"exp2", NumPyExpr::NP_OP_EXP2, 1},
{"log", NumPyExpr::NP_OP_LOG, 1},
{"log2", NumPyExpr::NP_OP_LOG2, 1},
{"log10", NumPyExpr::NP_OP_LOG10, 1},
{"expm1", NumPyExpr::NP_OP_EXPM1, 1},
{"log1p", NumPyExpr::NP_OP_LOG1P, 1},
{"sqrt", NumPyExpr::NP_OP_SQRT, 1},
{"square", NumPyExpr::NP_OP_SQUARE, 1},
{"cbrt", NumPyExpr::NP_OP_CBRT, 1},
{"logaddexp", NumPyExpr::NP_OP_LOGADDEXP, 2},
{"logaddexp2", NumPyExpr::NP_OP_LOGADDEXP2, 2},
{"reciprocal", NumPyExpr::NP_OP_RECIPROCAL, 1},
{"rint", NumPyExpr::NP_OP_RINT, 1},
{"floor", NumPyExpr::NP_OP_FLOOR, 1},
{"ceil", NumPyExpr::NP_OP_CEIL, 1},
{"trunc", NumPyExpr::NP_OP_TRUNC, 1},
{"isnan", NumPyExpr::NP_OP_ISNAN, 1},
{"isinf", NumPyExpr::NP_OP_ISINF, 1},
{"isfinite", NumPyExpr::NP_OP_ISFINITE, 1},
{"sign", NumPyExpr::NP_OP_SIGN, 1},
{"signbit", NumPyExpr::NP_OP_SIGNBIT, 1},
{"copysign", NumPyExpr::NP_OP_COPYSIGN, 2},
{"spacing", NumPyExpr::NP_OP_SPACING, 1},
{"nextafter", NumPyExpr::NP_OP_NEXTAFTER, 2},
{"deg2rad", NumPyExpr::NP_OP_DEG2RAD, 1},
{"radians", NumPyExpr::NP_OP_DEG2RAD, 1},
{"rad2deg", NumPyExpr::NP_OP_RAD2DEG, 1},
{"degrees", NumPyExpr::NP_OP_RAD2DEG, 1},
{"heaviside", NumPyExpr::NP_OP_HEAVISIDE, 2},
};
auto getNumPyExprType = [](types::Type *t, NumPyPrimitiveTypes &T) -> NumPyType {
if (t->is(T.bool_))
return {NumPyType::NP_TYPE_BOOL};
if (t->is(T.i8))
return {NumPyType::NP_TYPE_I8};
if (t->is(T.u8))
return {NumPyType::NP_TYPE_U8};
if (t->is(T.i16))
return {NumPyType::NP_TYPE_I16};
if (t->is(T.u16))
return {NumPyType::NP_TYPE_U16};
if (t->is(T.i32))
return {NumPyType::NP_TYPE_I32};
if (t->is(T.u32))
return {NumPyType::NP_TYPE_U32};
if (t->is(T.i64))
return {NumPyType::NP_TYPE_I64};
if (t->is(T.u64))
return {NumPyType::NP_TYPE_U64};
if (t->is(T.f16))
return {NumPyType::NP_TYPE_F16};
if (t->is(T.f32))
return {NumPyType::NP_TYPE_F32};
if (t->is(T.f64))
return {NumPyType::NP_TYPE_F64};
if (t->is(T.c64))
return {NumPyType::NP_TYPE_C64};
if (t->is(T.c128))
return {NumPyType::NP_TYPE_C128};
if (isArrayType(t)) {
auto generics = t->getGenerics();
seqassertn(generics.size() == 2 && generics[0].isType() && generics[1].isStatic(),
"unrecognized ndarray generics");
auto *dtype = generics[0].getTypeValue();
auto ndim = generics[1].getStaticValue();
if (dtype->is(T.bool_))
return {NumPyType::NP_TYPE_ARR_BOOL, ndim};
if (dtype->is(T.i8))
return {NumPyType::NP_TYPE_ARR_I8, ndim};
if (dtype->is(T.u8))
return {NumPyType::NP_TYPE_ARR_U8, ndim};
if (dtype->is(T.i16))
return {NumPyType::NP_TYPE_ARR_I16, ndim};
if (dtype->is(T.u16))
return {NumPyType::NP_TYPE_ARR_U16, ndim};
if (dtype->is(T.i32))
return {NumPyType::NP_TYPE_ARR_I32, ndim};
if (dtype->is(T.u32))
return {NumPyType::NP_TYPE_ARR_U32, ndim};
if (dtype->is(T.i64))
return {NumPyType::NP_TYPE_ARR_I64, ndim};
if (dtype->is(T.u64))
return {NumPyType::NP_TYPE_ARR_U64, ndim};
if (dtype->is(T.f16))
return {NumPyType::NP_TYPE_ARR_F16, ndim};
if (dtype->is(T.f32))
return {NumPyType::NP_TYPE_ARR_F32, ndim};
if (dtype->is(T.f64))
return {NumPyType::NP_TYPE_ARR_F64, ndim};
if (dtype->is(T.c64))
return {NumPyType::NP_TYPE_ARR_C64, ndim};
if (dtype->is(T.c128))
return {NumPyType::NP_TYPE_ARR_C128, ndim};
}
return {};
};
auto type = getNumPyExprType(v->getType(), T);
if (!type)
return {};
// Don't break up expressions that result in scalars or 0-dim arrays since those
// should only be computed once
if (type.ndim == 0) {
auto res = std::make_unique<NumPyExpr>(type, v);
leaves.emplace_back(res.get(), v);
return std::move(res);
}
if (auto *c = cast<CallInstr>(v)) {
auto *f = util::getFunc(c->getCallee());
// Check for matmul
if (f && c->numArgs() == 3 && isNoneType(c->back()->getType(), T) &&
(f->getName().rfind("std.numpy.linalg_sym.matmul:0[", 0) == 0 ||
(f->getName().rfind("std.numpy.linalg_sym.dot:0[", 0) == 0 &&
type.ndim == 2))) {
std::vector<Value *> args(c->begin(), c->end());
auto op = NumPyExpr::NP_OP_MATMUL;
auto lhs = parse(args[0], leaves, T);
if (!lhs)
return {};
auto rhs = parse(args[1], leaves, T);
if (!rhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs), std::move(rhs));
}
// Check for builtin abs()
if (f && c->numArgs() == 1 &&
(f->getName().rfind("std.internal.builtin.abs:0[", 0) == 0)) {
auto op = NumPyExpr::NP_OP_ABS;
auto lhs = parse(c->front(), leaves, T);
if (!lhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
}
// Check for transpose
if (f && isArrayType(f->getParentType()) && c->numArgs() == 1 &&
f->getUnmangledName() == "T") {
auto op = NumPyExpr::NP_OP_TRANSPOSE;
auto lhs = parse(c->front(), leaves, T);
if (!lhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
}
// Check for ufunc (e.g. "np.exp()") call
if (f && f->getUnmangledName() == Module::CALL_MAGIC_NAME &&
isUFuncType(f->getParentType())) {
auto ufuncGenerics = f->getParentType()->getGenerics();
seqassertn(!ufuncGenerics.empty() && ufuncGenerics[0].isStaticStr(),
"unrecognized ufunc class generics");
auto ufunc = ufuncGenerics[0].getStaticStringValue();
auto callGenerics = f->getType()->getGenerics();
seqassertn(!callGenerics.empty() && callGenerics[0].isType(),
"unrecognized ufunc call generics");
auto *dtype = callGenerics[0].getTypeValue();
if (dtype->is(T.none)) {
for (auto &u : ufuncs) {
if (u.name == ufunc) {
seqassertn(u.args == 1 || u.args == 2,
"unexpected number of arguments (ufunc)");
// Argument order:
// - ufunc self
// - operand 1
// - (if binary) operand 2
// - 'out'
// - 'where'
std::vector<Value *> args(c->begin(), c->end());
seqassertn(args.size() == u.args + 3, "unexpected call of {}", u.name);
auto *where = args[args.size() - 1];
auto *out = args[args.size() - 2];
if (auto *whereConst = cast<BoolConst>(where)) {
if (!whereConst->getVal())
break;
} else {
break;
}
if (!isNoneType(out->getType(), T))
break;
auto op = u.op;
auto lhs = parse(args[1], leaves, T);
if (!lhs)
return {};
if (u.args == 1)
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
auto rhs = parse(args[2], leaves, T);
if (!rhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
}
}
}
}
// Check for magic method call
if (f && isArrayType(f->getParentType())) {
for (auto &m : magics) {
if (f->getUnmangledName() == m.name && c->numArgs() == m.args) {
seqassertn(m.args == 1 || m.args == 2,
"unexpected number of arguments (magic)");
std::vector<Value *> args(c->begin(), c->end());
auto op = m.op;
auto lhs = parse(args[0], leaves, T);
if (!lhs)
return {};
if (m.args == 1)
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs));
auto rhs = parse(args[1], leaves, T);
if (!rhs)
return {};
return m.right ? std::make_unique<NumPyExpr>(type, v, op, std::move(rhs),
std::move(lhs))
: std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
}
}
}
}
// Check for right-hand-side magic method call
// Right-hand-side magics (e.g. __radd__) are compiled into FlowInstr:
// <lhs_expr> + <rhs_expr>
// becomes:
// { v1 = <lhs expr> ; v2 = <rhs expr> ; return rhs_class.__radd__(v2, v1) }
// So we need to check for this to detect r-magics.
if (auto *flow = cast<FlowInstr>(v)) {
auto *series = cast<SeriesFlow>(flow->getFlow());
auto *value = cast<CallInstr>(flow->getValue());
auto *f = value ? util::getFunc(value->getCallee()) : nullptr;
if (series && f && value->numArgs() == 2) {
std::vector<Value *> assignments(series->begin(), series->end());
auto *arg1 = value->front();
auto *arg2 = value->back();
auto *vv1 = cast<VarValue>(arg1);
auto *vv2 = cast<VarValue>(arg2);
auto *arg1Var = vv1 ? vv1->getVar() : nullptr;
auto *arg2Var = vv2 ? vv2->getVar() : nullptr;
for (auto &m : magics) {
if (f->getUnmangledName() == m.name && value->numArgs() == m.args && m.right) {
auto op = m.op;
if (assignments.size() == 0) {
// Case 1: Degenerate flow instruction
return parse(value, leaves, T);
} else if (assignments.size() == 1) {
// Case 2: One var -- check if it's either of the r-magic operands
auto *a1 = cast<AssignInstr>(assignments.front());
if (a1 && a1->getLhs() == arg1Var) {
auto rhs = parse(a1->getRhs(), leaves, T);
if (!rhs)
return {};
auto lhs = parse(arg2, leaves, T);
if (!lhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
} else if (a1 && a1->getLhs() == arg2Var) {
auto lhs = parse(a1->getRhs(), leaves, T);
if (!lhs)
return {};
auto rhs = parse(arg1, leaves, T);
if (!rhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
}
} else if (assignments.size() == 2) {
// Case 2: Two vars -- check both permutations
auto *a1 = cast<AssignInstr>(assignments.front());
auto *a2 = cast<AssignInstr>(assignments.back());
if (a1 && a2 && a1->getLhs() == arg1Var && a2->getLhs() == arg2Var) {
auto rhs = parse(a1->getRhs(), leaves, T);
if (!rhs)
return {};
auto lhs = parse(a2->getRhs(), leaves, T);
if (!lhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
} else if (a1 && a2 && a2->getLhs() == arg1Var && a1->getLhs() == arg2Var) {
auto lhs = parse(a1->getRhs(), leaves, T);
if (!lhs)
return {};
auto rhs = parse(a2->getRhs(), leaves, T);
if (!rhs)
return {};
return std::make_unique<NumPyExpr>(type, v, op, std::move(lhs),
std::move(rhs));
}
}
break;
}
}
}
}
auto res = std::make_unique<NumPyExpr>(type, v);
leaves.emplace_back(res.get(), v);
return std::move(res);
}
namespace {
Var *optimizeHelper(NumPyOptimizationUnit &unit, NumPyExpr *expr, CodegenContext &C) {
auto *M = unit.value->getModule();
auto *series = C.series;
// Remove some operations that cannot be done element-wise easily by optimizing them
// separately, recursively.
expr->apply([&](NumPyExpr &e) {
if (!e.type.isArray())
return;
if (e.op == NumPyExpr::NP_OP_TRANSPOSE) {
auto *lv = optimizeHelper(unit, e.lhs.get(), C);
auto *transposeFunc =
M->getOrRealizeFunc("_transpose", {lv->getType()}, {}, FUSION_MODULE);
seqassertn(transposeFunc, "transpose func not found");
auto *var = util::makeVar(util::call(transposeFunc, {M->Nr<VarValue>(lv)}),
C.series, C.func);
C.vars[&e] = var;
NumPyExpr replacement(e.type, M->Nr<VarValue>(var));
replacement.freeable = e.lhs->freeable;
e.replace(replacement);
}
if (e.op == NumPyExpr::NP_OP_MATMUL) {
auto *lv = optimizeHelper(unit, e.lhs.get(), C);
auto *rv = optimizeHelper(unit, e.rhs.get(), C);
auto *matmulFunc = M->getOrRealizeFunc("_matmul", {lv->getType(), rv->getType()},
{}, FUSION_MODULE);
seqassertn(matmulFunc, "matmul func not found");
auto *var = util::makeVar(
util::call(matmulFunc, {M->Nr<VarValue>(lv), M->Nr<VarValue>(rv)}), C.series,
C.func);
C.vars[&e] = var;
NumPyExpr replacement(e.type, M->Nr<VarValue>(var));
replacement.freeable = true;
e.replace(replacement);
}
});
// Optimize the given expression
bool changed;
do {
changed = false;
expr->apply([&](NumPyExpr &e) {
if (e.depth() <= 2)
return;
auto cost = e.cost();
auto bcinfo = e.getBroadcastInfo();
Var *result = nullptr;
if (cost <= AlwaysFuseCostThreshold ||
(cost <= NeverFuseCostThreshold && bcinfo == BroadcastInfo::NO)) {
// Don't care about broadcasting; just fuse.
XLOG("-> static fuse:\n{}", e.str());
result = e.codegenFusedEval(C);
} else if (cost <= NeverFuseCostThreshold && bcinfo != BroadcastInfo::YES) {
// Check at runtime if we're broadcasting and fuse conditionally.
XLOG("-> conditional fuse:\n{}", e.str());
auto *broadcasts = e.codegenBroadcasts(C);
auto *seqtSeries = M->Nr<SeriesFlow>();
auto *fuseSeries = M->Nr<SeriesFlow>();
auto *branch = M->Nr<IfFlow>(broadcasts, seqtSeries, fuseSeries);
C.series = seqtSeries;
auto *seqtResult = e.codegenSequentialEval(C);
C.series = fuseSeries;
auto *fuseResult = e.codegenFusedEval(C);
seqassertn(seqtResult->getType()->is(fuseResult->getType()),
"types are not the same: {} {}", seqtResult->getType()->getName(),
fuseResult->getType()->getName());
result = M->Nr<Var>(seqtResult->getType(), false);
unit.func->push_back(result);
seqtSeries->push_back(M->Nr<AssignInstr>(result, M->Nr<VarValue>(seqtResult)));
fuseSeries->push_back(M->Nr<AssignInstr>(result, M->Nr<VarValue>(fuseResult)));
C.series = series;
series->push_back(branch);
}
if (result) {
NumPyExpr tmp(e.type, M->Nr<VarValue>(result));
e.replace(tmp);
e.freeable = true;
C.vars[&e] = result;
changed = true;
}
});
} while (changed);
XLOG("-> sequential eval:\n{}", expr->str());
return expr->codegenSequentialEval(C);
}
} // namespace
bool NumPyOptimizationUnit::optimize(NumPyPrimitiveTypes &T) {
if (!expr->type.isArray() || expr->depth() <= 2)
return false;
XLOG("Optimizing expression at {}\n{}", value->getSrcInfo(), expr->str());
auto *M = value->getModule();
auto *series = M->Nr<SeriesFlow>();
CodegenContext C(M, series, func, T);
util::CloneVisitor cv(M);
for (auto &p : leaves) {
auto *var = util::makeVar(cv.clone(p.second), series, func);
C.vars.emplace(p.first, var);
}
auto *result = optimizeHelper(*this, expr.get(), C);
auto *replacement = M->Nr<FlowInstr>(C.series, M->Nr<VarValue>(result));
value->replaceAll(replacement);
return true;
}
struct ExtractArrayExpressions : public util::Operator {
BodiedFunc *func;
NumPyPrimitiveTypes types;
std::vector<NumPyOptimizationUnit> exprs;
std::unordered_set<id_t> extracted;
explicit ExtractArrayExpressions(BodiedFunc *func)
: util::Operator(), func(func), types(func->getModule()), exprs(), extracted() {}
void extract(Value *v, AssignInstr *assign = nullptr) {
if (extracted.count(v->getId()))
return;
std::vector<std::pair<NumPyExpr *, Value *>> leaves;
auto expr = parse(v, leaves, types);
if (expr) {
int64_t numArrayNodes = 0;
expr->apply([&](NumPyExpr &e) {
if (e.type.isArray())
++numArrayNodes;
extracted.emplace(e.val->getId());
});
if (numArrayNodes > 0 && expr->depth() > 1) {
exprs.push_back({v, func, std::move(expr), std::move(leaves), assign});
}
}
}
void preHook(Node *n) override {
if (auto *v = cast<AssignInstr>(n)) {
extract(v->getRhs(), v->getLhs()->isGlobal() ? nullptr : v);
} else if (auto *v = cast<Value>(n)) {
extract(v);
}
}
};
const std::string NumPyFusionPass::KEY = "core-numpy-fusion";
void NumPyFusionPass::visit(BodiedFunc *func) {
ExtractArrayExpressions extractor(func);
func->accept(extractor);
if (extractor.exprs.empty())
return;
auto *rdres = getAnalysisResult<analyze::dataflow::RDResult>(reachingDefKey);
auto it = rdres->results.find(func->getId());
if (it == rdres->results.end())
return;
auto *rd = it->second.get();
auto *se = getAnalysisResult<analyze::module::SideEffectResult>(sideEffectsKey);
auto *cfg = rdres->cfgResult->graphs.find(func->getId())->second.get();
auto fwd = getForwardingDAGs(func, rd, cfg, se, extractor.exprs);
for (auto &dag : fwd) {
std::vector<AssignInstr *> assignsToDelete;
auto *e = doForwarding(dag, assignsToDelete);
if (e->optimize(extractor.types)) {
for (auto *a : assignsToDelete)
a->replaceAll(func->getModule()->Nr<SeriesFlow>());
}
}
}
} // namespace numpy
} // namespace transform
} // namespace ir
} // namespace codon

View File

@ -0,0 +1,313 @@
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once
#include "codon/cir/analyze/dataflow/reaching.h"
#include "codon/cir/analyze/module/global_vars.h"
#include "codon/cir/analyze/module/side_effect.h"
#include "codon/cir/transform/pass.h"
#include "codon/cir/types/types.h"
#include <functional>
#include <memory>
#include <vector>
namespace codon {
namespace ir {
namespace transform {
namespace numpy {
extern const std::string FUSION_MODULE;
/// NumPy operator fusion pass.
class NumPyFusionPass : public OperatorPass {
private:
/// Key of the reaching definition analysis
std::string reachingDefKey;
/// Key of the side effect analysis
std::string sideEffectsKey;
public:
static const std::string KEY;
/// Constructs a NumPy fusion pass.
/// @param reachingDefKey the reaching definition analysis' key
/// @param sideEffectsKey side effect analysis' key
NumPyFusionPass(const std::string &reachingDefKey, const std::string &sideEffectsKey)
: OperatorPass(), reachingDefKey(reachingDefKey), sideEffectsKey(sideEffectsKey) {
}
std::string getKey() const override { return KEY; }
void visit(BodiedFunc *f) override;
};
struct NumPyPrimitiveTypes {
types::Type *none;
types::Type *optnone;
types::Type *bool_;
types::Type *i8;
types::Type *u8;
types::Type *i16;
types::Type *u16;
types::Type *i32;
types::Type *u32;
types::Type *i64;
types::Type *u64;
types::Type *f16;
types::Type *f32;
types::Type *f64;
types::Type *c64;
types::Type *c128;
explicit NumPyPrimitiveTypes(Module *M);
};
struct NumPyType {
enum Type {
NP_TYPE_NONE = -1,
NP_TYPE_BOOL,
NP_TYPE_I8,
NP_TYPE_U8,
NP_TYPE_I16,
NP_TYPE_U16,
NP_TYPE_I32,
NP_TYPE_U32,
NP_TYPE_I64,
NP_TYPE_U64,
NP_TYPE_F16,
NP_TYPE_F32,
NP_TYPE_F64,
NP_TYPE_C64,
NP_TYPE_C128,
NP_TYPE_SCALAR_END, // separator value
NP_TYPE_ARR_BOOL,
NP_TYPE_ARR_I8,
NP_TYPE_ARR_U8,
NP_TYPE_ARR_I16,
NP_TYPE_ARR_U16,
NP_TYPE_ARR_I32,
NP_TYPE_ARR_U32,
NP_TYPE_ARR_I64,
NP_TYPE_ARR_U64,
NP_TYPE_ARR_F16,
NP_TYPE_ARR_F32,
NP_TYPE_ARR_F64,
NP_TYPE_ARR_C64,
NP_TYPE_ARR_C128,
} dtype;
int64_t ndim;
NumPyType(Type dtype, int64_t ndim = 0);
NumPyType();
static NumPyType get(types::Type *t, NumPyPrimitiveTypes &T);
types::Type *getIRBaseType(NumPyPrimitiveTypes &T) const;
operator bool() const { return dtype != NP_TYPE_NONE; }
bool isArray() const { return dtype > NP_TYPE_SCALAR_END; }
friend std::ostream &operator<<(std::ostream &os, NumPyType const &type);
std::string str() const;
};
struct NumPyExpr;
struct CodegenContext {
Module *M;
SeriesFlow *series;
BodiedFunc *func;
std::unordered_map<NumPyExpr *, Var *> vars;
NumPyPrimitiveTypes &T;
CodegenContext(Module *M, SeriesFlow *series, BodiedFunc *func,
NumPyPrimitiveTypes &T);
};
enum BroadcastInfo {
UNKNOWN,
YES,
NO,
MAYBE,
};
struct NumPyExpr {
NumPyType type;
Value *val;
enum Op {
NP_OP_NONE,
NP_OP_POS,
NP_OP_NEG,
NP_OP_INVERT,
NP_OP_ABS,
NP_OP_TRANSPOSE,
NP_OP_ADD,
NP_OP_SUB,
NP_OP_MUL,
NP_OP_MATMUL,
NP_OP_TRUE_DIV,
NP_OP_FLOOR_DIV,
NP_OP_MOD,
NP_OP_FMOD,
NP_OP_POW,
NP_OP_LSHIFT,
NP_OP_RSHIFT,
NP_OP_AND,
NP_OP_OR,
NP_OP_XOR,
NP_OP_LOGICAL_AND,
NP_OP_LOGICAL_OR,
NP_OP_LOGICAL_XOR,
NP_OP_EQ,
NP_OP_NE,
NP_OP_LT,
NP_OP_LE,
NP_OP_GT,
NP_OP_GE,
NP_OP_MIN,
NP_OP_MAX,
NP_OP_FMIN,
NP_OP_FMAX,
NP_OP_SIN,
NP_OP_COS,
NP_OP_TAN,
NP_OP_ARCSIN,
NP_OP_ARCCOS,
NP_OP_ARCTAN,
NP_OP_ARCTAN2,
NP_OP_HYPOT,
NP_OP_SINH,
NP_OP_COSH,
NP_OP_TANH,
NP_OP_ARCSINH,
NP_OP_ARCCOSH,
NP_OP_ARCTANH,
NP_OP_CONJ,
NP_OP_EXP,
NP_OP_EXP2,
NP_OP_LOG,
NP_OP_LOG2,
NP_OP_LOG10,
NP_OP_EXPM1,
NP_OP_LOG1P,
NP_OP_SQRT,
NP_OP_SQUARE,
NP_OP_CBRT,
NP_OP_LOGADDEXP,
NP_OP_LOGADDEXP2,
NP_OP_RECIPROCAL,
NP_OP_RINT,
NP_OP_FLOOR,
NP_OP_CEIL,
NP_OP_TRUNC,
NP_OP_ISNAN,
NP_OP_ISINF,
NP_OP_ISFINITE,
NP_OP_SIGN,
NP_OP_SIGNBIT,
NP_OP_COPYSIGN,
NP_OP_SPACING,
NP_OP_NEXTAFTER,
NP_OP_DEG2RAD,
NP_OP_RAD2DEG,
NP_OP_HEAVISIDE,
} op;
std::unique_ptr<NumPyExpr> lhs;
std::unique_ptr<NumPyExpr> rhs;
bool freeable;
NumPyExpr(NumPyType type, Value *val)
: type(std::move(type)), val(val), op(NP_OP_NONE), lhs(), rhs(), freeable(false) {
}
NumPyExpr(NumPyType type, Value *val, NumPyExpr::Op op,
std::unique_ptr<NumPyExpr> lhs)
: type(std::move(type)), val(val), op(op), lhs(std::move(lhs)), rhs(),
freeable(false) {}
NumPyExpr(NumPyType type, Value *val, NumPyExpr::Op op,
std::unique_ptr<NumPyExpr> lhs, std::unique_ptr<NumPyExpr> rhs)
: type(std::move(type)), val(val), op(op), lhs(std::move(lhs)),
rhs(std::move(rhs)), freeable(false) {}
static std::unique_ptr<NumPyExpr>
parse(Value *v, std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
NumPyPrimitiveTypes &T);
void replace(NumPyExpr &e);
bool haveVectorizedLoop() const;
int64_t opcost() const;
int64_t cost() const;
std::string opstring() const;
void dump(std::ostream &os, int level, int &leafId) const;
friend std::ostream &operator<<(std::ostream &os, NumPyExpr const &expr);
std::string str() const;
bool isLeaf() const { return !lhs && !rhs; }
int depth() const {
return std::max(lhs ? lhs->depth() : 0, rhs ? rhs->depth() : 0) + 1;
}
int nodes() const { return (lhs ? lhs->nodes() : 0) + (rhs ? rhs->nodes() : 0) + 1; }
void apply(std::function<void(NumPyExpr &)> f);
Value *codegenBroadcasts(CodegenContext &C);
Var *codegenFusedEval(CodegenContext &C);
Var *codegenSequentialEval(CodegenContext &C);
BroadcastInfo getBroadcastInfo();
Value *codegenScalarExpr(CodegenContext &C,
const std::unordered_map<NumPyExpr *, Var *> &args,
const std::unordered_map<NumPyExpr *, unsigned> &scalarMap,
Var *scalars);
};
std::unique_ptr<NumPyExpr> parse(Value *v,
std::vector<std::pair<NumPyExpr *, Value *>> &leaves,
NumPyPrimitiveTypes &T);
struct NumPyOptimizationUnit {
/// Original IR value being corresponding to expression
Value *value;
/// Function in which the value exists
BodiedFunc *func;
/// Root expression
std::unique_ptr<NumPyExpr> expr;
/// Leaves ordered by execution in original expression
std::vector<std::pair<NumPyExpr *, Value *>> leaves;
/// AssignInstr in which RHS is represented by this expression, or null if none
AssignInstr *assign;
bool optimize(NumPyPrimitiveTypes &T);
};
struct Forwarding {
NumPyOptimizationUnit *dst;
NumPyOptimizationUnit *src;
Var *var;
NumPyExpr *dstLeaf;
int64_t dstId;
int64_t srcId;
};
using ForwardingDAG =
std::unordered_map<NumPyOptimizationUnit *, std::vector<Forwarding>>;
NumPyOptimizationUnit *doForwarding(ForwardingDAG &dag,
std::vector<AssignInstr *> &assignsToDelete);
std::vector<ForwardingDAG> getForwardingDAGs(BodiedFunc *func,
analyze::dataflow::RDInspector *rd,
analyze::dataflow::CFGraph *cfg,
analyze::module::SideEffectResult *se,
std::vector<NumPyOptimizationUnit> &exprs);
} // namespace numpy
} // namespace transform
} // namespace ir
} // namespace codon

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "openmp.h"
@ -402,7 +402,8 @@ struct ReductionIdentifier : public util::Operator {
static void extractAssociativeOpChain(Value *v, const std::string &op,
types::Type *type,
std::vector<Value *> &result) {
if (util::isCallOf(v, op, {type, type}, type, /*method=*/true)) {
if (util::isCallOf(v, op, {type, nullptr}, type, /*method=*/true) ||
util::isCallOf(v, op, {nullptr, type}, type, /*method=*/true)) {
auto *call = cast<CallInstr>(v);
extractAssociativeOpChain(call->front(), op, type, result);
extractAssociativeOpChain(call->back(), op, type, result);
@ -450,7 +451,8 @@ struct ReductionIdentifier : public util::Operator {
for (auto &rf : reductionFunctions) {
if (rf.method) {
if (!util::isCallOf(item, rf.name, {type, type}, type, /*method=*/true))
if (!(util::isCallOf(item, rf.name, {type, nullptr}, type, /*method=*/true) ||
util::isCallOf(item, rf.name, {nullptr, type}, type, /*method=*/true)))
continue;
} else {
if (!util::isCallOf(item, rf.name,
@ -464,8 +466,7 @@ struct ReductionIdentifier : public util::Operator {
if (rf.method) {
std::vector<Value *> opChain;
extractAssociativeOpChain(callRHS, rf.name, callRHS->front()->getType(),
opChain);
extractAssociativeOpChain(callRHS, rf.name, type, opChain);
if (opChain.size() < 2)
continue;
@ -640,10 +641,11 @@ struct ParallelLoopTemplateReplacer : public LoopTemplateReplacer {
auto *series = M->Nr<SeriesFlow>();
auto *tupleVal = util::makeVar(reductionTuple, series, parent);
auto *reduceCode = util::call(
reduceNoWait, {M->Nr<VarValue>(reductionLocRef), M->Nr<VarValue>(gtid),
tupleVal, rawReducer, M->Nr<PointerValue>(lck)});
auto *codeVar = util::makeVar(reduceCode, series, parent)->getVar();
auto *reduceCode =
util::call(reduceNoWait,
{M->Nr<VarValue>(reductionLocRef), M->Nr<VarValue>(gtid),
M->Nr<VarValue>(tupleVal), rawReducer, M->Nr<PointerValue>(lck)});
auto *codeVar = util::makeVar(reduceCode, series, parent);
seqassertn(codeVar->getType()->is(M->getIntType()), "wrong reduce code type");
auto *sectionNonAtomic = M->Nr<SeriesFlow>();
@ -740,11 +742,11 @@ struct ImperativeLoopTemplateReplacer : public ParallelLoopTemplateReplacer {
"unknown reduction init value");
}
VarValue *newVar = util::makeVar(
initVal, cast<SeriesFlow>(parent->getBody()), parent, /*prepend=*/true);
sharedInfo.push_back({next, newVar->getVar(), reduction});
auto *newVar = util::makeVar(initVal, cast<SeriesFlow>(parent->getBody()),
parent, /*prepend=*/true);
sharedInfo.push_back({next, newVar, reduction});
newArg = M->Nr<PointerValue>(newVar->getVar());
newArg = M->Nr<PointerValue>(newVar);
++next;
} else {
newArg = util::tupleGet(M->Nr<VarValue>(extras), next++);
@ -918,9 +920,9 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
for (auto *val : shareds) {
if (getVarFromOutlinedArg(val)->getId() != loopVar->getId()) {
if (auto &reduction = sharedRedux[sharedsNext]) {
Var *newVar = util::getVar(util::makeVar(
reduction.getInitial(), cast<SeriesFlow>(parent->getBody()), parent,
/*prepend=*/true));
auto *newVar = util::makeVar(reduction.getInitial(),
cast<SeriesFlow>(parent->getBody()), parent,
/*prepend=*/true);
sharedInfo.push_back({sharedsNext, newVar, reduction});
}
}
@ -1050,7 +1052,7 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
seqassertn(irArrayType, "could not find 'TaskReductionInputArray' type");
auto *taskRedInputsArray = util::makeVar(
M->Nr<StackAllocInstr>(irArrayType, numRed), taskRedInitSeries, parent);
array = util::getVar(taskRedInputsArray);
array = taskRedInputsArray;
auto *taskRedInputsArrayType = taskRedInputsArray->getType();
auto *taskRedSetItem = M->getOrRealizeMethod(
@ -1081,7 +1083,7 @@ struct TaskLoopRoutineStubReplacer : public ParallelLoopTemplateReplacer {
M->Nr<VarValue>(gtid),
M->getInt(numRed), arrayPtr}),
taskRedInitSeries, parent);
tskgrp = util::getVar(taskRedInitResult);
tskgrp = taskRedInitResult;
v->replaceAll(taskRedInitSeries);
}
@ -1345,14 +1347,13 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le
for (auto *loop : loopNests) {
LoopRange range;
range.loop = loop;
range.start = util::makeVar(loop->getStart(), setup, parent)->getVar();
range.stop = util::makeVar(loop->getEnd(), setup, parent)->getVar();
range.start = util::makeVar(loop->getStart(), setup, parent);
range.stop = util::makeVar(loop->getEnd(), setup, parent);
range.step = loop->getStep();
range.len = util::makeVar(util::call(lenCalc, {M->Nr<VarValue>(range.start),
M->Nr<VarValue>(range.stop),
M->getInt(range.step)}),
setup, parent)
->getVar();
range.len = util::makeVar(
util::call(lenCalc, {M->Nr<VarValue>(range.start), M->Nr<VarValue>(range.stop),
M->getInt(range.step)}),
setup, parent);
ranges.push_back(range);
}
@ -1374,11 +1375,9 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le
for (auto it = ranges.rbegin(); it != ranges.rend(); ++it) {
auto *k = lastDiv ? lastDiv : collapsedVar;
auto *div =
util::makeVar(*M->Nr<VarValue>(k) / *M->Nr<VarValue>(it->len), body, parent)
->getVar();
util::makeVar(*M->Nr<VarValue>(k) / *M->Nr<VarValue>(it->len), body, parent);
auto *mod =
util::makeVar(*M->Nr<VarValue>(k) % *M->Nr<VarValue>(it->len), body, parent)
->getVar();
util::makeVar(*M->Nr<VarValue>(k) % *M->Nr<VarValue>(it->len), body, parent);
auto *i =
*M->Nr<VarValue>(it->start) + *(*M->Nr<VarValue>(mod) * *M->getInt(it->step));
body->push_back(M->Nr<AssignInstr>(it->loop->getVar(), i));

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "schedule.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "pass.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "dict.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "generator.h"
@ -150,7 +150,7 @@ Func *genToSum(BodiedFunc *gen, types::Type *startType, types::Type *outType) {
if (!init || !init->getType()->is(outType))
return nullptr;
auto *accumulator = util::makeVar(init, body, fn, /*prepend=*/true)->getVar();
auto *accumulator = util::makeVar(init, body, fn, /*prepend=*/true);
GeneratorSumTransformer xgen(accumulator);
fn->accept(xgen);
body->push_back(M->Nr<ReturnInstr>(M->Nr<VarValue>(accumulator)));

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "io.h"

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "list.h"
@ -45,7 +45,7 @@ struct ElementHandler {
void doSetup(const std::vector<Value *> &values, SeriesFlow *block,
BodiedFunc *parent) {
for (auto *v : values) {
vars.push_back(util::makeVar(v, block, parent)->getVar());
vars.push_back(util::makeVar(v, block, parent));
}
}
@ -226,7 +226,7 @@ Value *optimize(BodiedFunc *parent, InspectionResult &r) {
}
auto *opt = M->Nr<SeriesFlow>();
auto *len = util::makeVar(M->getInt(0), opt, parent)->getVar();
auto *len = util::makeVar(M->getInt(0), opt, parent);
for (auto &h : handlers) {
h->setup(opt, parent);
@ -238,8 +238,7 @@ Value *optimize(BodiedFunc *parent, InspectionResult &r) {
auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_opt_new", {M->getIntType()});
seqassertn(fn, "could not find list new helper");
auto *result =
util::makeVar(util::call(fn, {M->Nr<VarValue>(len)}), opt, parent)->getVar();
auto *result = util::makeVar(util::call(fn, {M->Nr<VarValue>(len)}), opt, parent);
for (auto &h : handlers) {
opt->push_back(h->append(M->Nr<VarValue>(result)));

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#pragma once

View File

@ -1,4 +1,4 @@
// Copyright (C) 2022-2024 Exaloop Inc. <https://exaloop.io>
// Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
#include "str.h"

Some files were not shown because too many files have changed in this diff Show More