From fd43d67f28af893010e8532d8f5c8501eecb4303 Mon Sep 17 00:00:00 2001 From: "A. R. Shajii" Date: Fri, 30 Dec 2022 23:04:29 -0500 Subject: [PATCH] Optimize list additions (#143) * Optimize list additions * Fix helper bug * Add tests * Add more magic name constants * Minor API cleanup * Format * Slightly improve appends --- CMakeLists.txt | 2 + codon/sir/module.cpp | 28 ++ codon/sir/module.h | 28 ++ codon/sir/transform/manager.cpp | 2 + codon/sir/transform/pythonic/list.cpp | 271 +++++++++++++++++++ codon/sir/transform/pythonic/list.h | 24 ++ codon/sir/transform/pythonic/str.cpp | 7 +- stdlib/internal/types/collections/list.codon | 44 +++ test/core/containers.codon | 1 + test/main.cpp | 1 + test/transform/list_opt.codon | 42 +++ 11 files changed, 446 insertions(+), 4 deletions(-) create mode 100644 codon/sir/transform/pythonic/list.cpp create mode 100644 codon/sir/transform/pythonic/list.h create mode 100644 test/transform/list_opt.codon diff --git a/CMakeLists.txt b/CMakeLists.txt index 45bfee24..0b80feaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,6 +198,7 @@ set(CODON_HPPFILES codon/sir/transform/pass.h codon/sir/transform/pythonic/dict.h codon/sir/transform/pythonic/io.h + codon/sir/transform/pythonic/list.h codon/sir/transform/pythonic/str.h codon/sir/transform/rewrite.h codon/sir/types/types.h @@ -304,6 +305,7 @@ set(CODON_CPPFILES codon/sir/transform/pass.cpp codon/sir/transform/pythonic/dict.cpp codon/sir/transform/pythonic/io.cpp + codon/sir/transform/pythonic/list.cpp codon/sir/transform/pythonic/str.cpp codon/sir/types/types.cpp codon/sir/util/cloning.cpp diff --git a/codon/sir/module.cpp b/codon/sir/module.cpp index 33ca5ae4..6e013282 100644 --- a/codon/sir/module.cpp +++ b/codon/sir/module.cpp @@ -89,6 +89,34 @@ const std::string Module::AND_MAGIC_NAME = "__and__"; const std::string Module::OR_MAGIC_NAME = "__or__"; const std::string Module::XOR_MAGIC_NAME = "__xor__"; +const std::string Module::IADD_MAGIC_NAME = "__iadd__"; +const std::string Module::ISUB_MAGIC_NAME = "__isub__"; +const std::string Module::IMUL_MAGIC_NAME = "__imul__"; +const std::string Module::IMATMUL_MAGIC_NAME = "__imatmul__"; +const std::string Module::ITRUE_DIV_MAGIC_NAME = "__itruediv__"; +const std::string Module::IFLOOR_DIV_MAGIC_NAME = "__ifloordiv__"; +const std::string Module::IMOD_MAGIC_NAME = "__imod__"; +const std::string Module::IPOW_MAGIC_NAME = "__ipow__"; +const std::string Module::ILSHIFT_MAGIC_NAME = "__ilshift__"; +const std::string Module::IRSHIFT_MAGIC_NAME = "__irshift__"; +const std::string Module::IAND_MAGIC_NAME = "__iand__"; +const std::string Module::IOR_MAGIC_NAME = "__ior__"; +const std::string Module::IXOR_MAGIC_NAME = "__ixor__"; + +const std::string Module::RADD_MAGIC_NAME = "__radd__"; +const std::string Module::RSUB_MAGIC_NAME = "__rsub__"; +const std::string Module::RMUL_MAGIC_NAME = "__rmul__"; +const std::string Module::RMATMUL_MAGIC_NAME = "__rmatmul__"; +const std::string Module::RTRUE_DIV_MAGIC_NAME = "__rtruediv__"; +const std::string Module::RFLOOR_DIV_MAGIC_NAME = "__rfloordiv__"; +const std::string Module::RMOD_MAGIC_NAME = "__rmod__"; +const std::string Module::RPOW_MAGIC_NAME = "__rpow__"; +const std::string Module::RLSHIFT_MAGIC_NAME = "__rlshift__"; +const std::string Module::RRSHIFT_MAGIC_NAME = "__rrshift__"; +const std::string Module::RAND_MAGIC_NAME = "__rand__"; +const std::string Module::ROR_MAGIC_NAME = "__ror__"; +const std::string Module::RXOR_MAGIC_NAME = "__rxor__"; + const std::string Module::INT_MAGIC_NAME = "__int__"; const std::string Module::FLOAT_MAGIC_NAME = "__float__"; const std::string Module::BOOL_MAGIC_NAME = "__bool__"; diff --git a/codon/sir/module.h b/codon/sir/module.h index 21cc33c4..a7e83de0 100644 --- a/codon/sir/module.h +++ b/codon/sir/module.h @@ -61,6 +61,34 @@ public: static const std::string OR_MAGIC_NAME; static const std::string XOR_MAGIC_NAME; + static const std::string IADD_MAGIC_NAME; + static const std::string ISUB_MAGIC_NAME; + static const std::string IMUL_MAGIC_NAME; + static const std::string IMATMUL_MAGIC_NAME; + static const std::string ITRUE_DIV_MAGIC_NAME; + static const std::string IFLOOR_DIV_MAGIC_NAME; + static const std::string IMOD_MAGIC_NAME; + static const std::string IPOW_MAGIC_NAME; + static const std::string ILSHIFT_MAGIC_NAME; + static const std::string IRSHIFT_MAGIC_NAME; + static const std::string IAND_MAGIC_NAME; + static const std::string IOR_MAGIC_NAME; + static const std::string IXOR_MAGIC_NAME; + + static const std::string RADD_MAGIC_NAME; + static const std::string RSUB_MAGIC_NAME; + static const std::string RMUL_MAGIC_NAME; + static const std::string RMATMUL_MAGIC_NAME; + static const std::string RTRUE_DIV_MAGIC_NAME; + static const std::string RFLOOR_DIV_MAGIC_NAME; + static const std::string RMOD_MAGIC_NAME; + static const std::string RPOW_MAGIC_NAME; + static const std::string RLSHIFT_MAGIC_NAME; + static const std::string RRSHIFT_MAGIC_NAME; + static const std::string RAND_MAGIC_NAME; + static const std::string ROR_MAGIC_NAME; + static const std::string RXOR_MAGIC_NAME; + static const std::string INT_MAGIC_NAME; static const std::string FLOAT_MAGIC_NAME; static const std::string BOOL_MAGIC_NAME; diff --git a/codon/sir/transform/manager.cpp b/codon/sir/transform/manager.cpp index 514b7869..0fe26aa9 100644 --- a/codon/sir/transform/manager.cpp +++ b/codon/sir/transform/manager.cpp @@ -19,6 +19,7 @@ #include "codon/sir/transform/pass.h" #include "codon/sir/transform/pythonic/dict.h" #include "codon/sir/transform/pythonic/io.h" +#include "codon/sir/transform/pythonic/list.h" #include "codon/sir/transform/pythonic/str.h" #include "codon/util/common.h" @@ -159,6 +160,7 @@ void PassManager::registerStandardPasses(PassManager::Init init) { case Init::JIT: { // Pythonic registerPass(std::make_unique()); + registerPass(std::make_unique()); registerPass(std::make_unique()); registerPass(std::make_unique()); diff --git a/codon/sir/transform/pythonic/list.cpp b/codon/sir/transform/pythonic/list.cpp new file mode 100644 index 00000000..d987478d --- /dev/null +++ b/codon/sir/transform/pythonic/list.cpp @@ -0,0 +1,271 @@ +// Copyright (C) 2022 Exaloop Inc. + +#include "list.h" + +#include + +#include "codon/sir/util/cloning.h" +#include "codon/sir/util/irtools.h" + +namespace codon { +namespace ir { +namespace transform { +namespace pythonic { +namespace { + +static const std::string LIST = "std.internal.types.ptr.List"; +static const std::string SLICE = "std.internal.types.slice.Slice"; + +bool isList(Value *v) { return v->getType()->getName().rfind(LIST + "[", 0) == 0; } +bool isSlice(Value *v) { return v->getType()->getName() == SLICE; } + +// The following "handlers" account for the possible sub-expressions we might +// see when optimizing list1 + list2 + ... listN. Currently, we optimize: +// - Slices: x[a:b:c] (avoid constructing the temporary sliced list) +// - Literals: [a, b, c] (just append elements directly) +// - Default: (append by iterating over the list) +// It is easy to handle new sub-expression types by adding new handlers. +// There are three stages in the optimized code: +// - Setup: assign all the relevant expressions to variables, making +// sure they're evaluated in the same order as before +// - Count: figure out the total length of the resulting list +// - Create: initialize a new list with the appropriate capacity and +// append all the elements +// The handlers have virtual functions to generate IR for each of these steps. + +struct ElementHandler { + std::vector vars; + + ElementHandler() : vars() {} + virtual ~ElementHandler() {} + virtual void setup(SeriesFlow *block, BodiedFunc *parent) = 0; + virtual Value *length(Module *M) = 0; + virtual Value *append(Value *result) = 0; + + void doSetup(const std::vector &values, SeriesFlow *block, + BodiedFunc *parent) { + for (auto *v : values) { + vars.push_back(util::makeVar(v, block, parent)->getVar()); + } + } + + static std::unique_ptr get(Value *v, types::Type *ty); +}; + +struct DefaultHandler : public ElementHandler { + Value *element; + + DefaultHandler(Value *element) : ElementHandler(), element(element) {} + + void setup(SeriesFlow *block, BodiedFunc *parent) override { + doSetup({element}, block, parent); + } + + Value *length(Module *M) override { + auto *e = M->Nr(vars[0]); + auto *ty = element->getType(); + auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_default_len", {ty}); + seqassertn(fn, "could not find default list length helper"); + return util::call(fn, {e}); + } + + Value *append(Value *result) override { + auto *M = result->getModule(); + auto *e = M->Nr(vars[0]); + auto *ty = result->getType(); + auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_default_append", {ty, ty}); + seqassertn(fn, "could not find default list append helper"); + return util::call(fn, {result, e}); + } + + static std::unique_ptr get(Value *v, types::Type *ty) { + if (!v->getType()->is(ty)) + return {}; + return std::make_unique(v); + } +}; + +struct SliceHandler : public ElementHandler { + Value *element; + Value *slice; + + SliceHandler(Value *element, Value *slice) + : ElementHandler(), element(element), slice(slice) {} + + void setup(SeriesFlow *block, BodiedFunc *parent) override { + doSetup({element, slice}, block, parent); + } + + Value *length(Module *M) override { + auto *e = M->Nr(vars[0]); + auto *s = M->Nr(vars[1]); + auto *ty = element->getType(); + auto *fn = + M->getOrRealizeMethod(ty, "_list_add_opt_slice_len", {ty, slice->getType()}); + seqassertn(fn, "could not find slice list length helper"); + return util::call(fn, {e, s}); + } + + Value *append(Value *result) override { + auto *M = result->getModule(); + auto *e = M->Nr(vars[0]); + auto *s = M->Nr(vars[1]); + auto *ty = result->getType(); + auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_slice_append", + {ty, ty, slice->getType()}); + seqassertn(fn, "could not find slice list append helper"); + return util::call(fn, {result, e, s}); + } + + static std::unique_ptr get(Value *v, types::Type *ty) { + if (!v->getType()->is(ty)) + return {}; + + if (auto *c = cast(v)) { + auto *func = util::getFunc(c->getCallee()); + if (func && func->getUnmangledName() == Module::GETITEM_MAGIC_NAME && + std::distance(c->begin(), c->end()) == 2 && isList(c->front()) && + isSlice(c->back())) { + return std::make_unique(c->front(), c->back()); + } + } + + return {}; + } +}; + +struct LiteralHandler : public ElementHandler { + std::vector elements; + + LiteralHandler(std::vector elements) + : ElementHandler(), elements(std::move(elements)) {} + + void setup(SeriesFlow *block, BodiedFunc *parent) override { + doSetup(elements, block, parent); + } + + Value *length(Module *M) override { return M->getInt(elements.size()); } + + Value *append(Value *result) override { + auto *M = result->getModule(); + auto *ty = result->getType(); + auto *block = M->Nr(); + if (vars.empty()) + return block; + auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_literal_append", + {ty, elements[0]->getType()}); + seqassertn(fn, "could not find literal list append helper"); + for (auto *var : vars) { + block->push_back(util::call(fn, {result, M->Nr(var)})); + } + return block; + } + + static std::unique_ptr get(Value *v, types::Type *ty) { + if (!v->getType()->is(ty)) + return {}; + + if (auto *attr = v->getAttribute()) { + std::vector elements; + for (auto &element : attr->elements) { + if (element.star) + return {}; + elements.push_back(element.value); + } + return std::make_unique(std::move(elements)); + } + + return {}; + } +}; + +std::unique_ptr ElementHandler::get(Value *v, types::Type *ty) { + if (auto h = SliceHandler::get(v, ty)) + return std::move(h); + + if (auto h = LiteralHandler::get(v, ty)) + return std::move(h); + + return DefaultHandler::get(v, ty); +} + +struct InspectionResult { + bool valid = true; + std::vector args; +}; + +void inspect(Value *v, InspectionResult &r) { + // check if add first then go from there + if (isList(v)) { + if (auto *c = cast(v)) { + auto *func = util::getFunc(c->getCallee()); + if (func && func->getUnmangledName() == Module::ADD_MAGIC_NAME && + c->numArgs() == 2 && isList(c->front()) && isList(c->back())) { + inspect(c->front(), r); + inspect(c->back(), r); + return; + } + } + r.args.push_back(v); + } else { + r.valid = false; + } +} + +Value *optimize(BodiedFunc *parent, InspectionResult &r) { + if (!r.valid || r.args.size() <= 1) + return nullptr; + + auto *M = parent->getModule(); + auto *ty = r.args[0]->getType(); + util::CloneVisitor cv(M); + std::vector> handlers; + + for (auto *v : r.args) { + handlers.push_back(ElementHandler::get(cv.clone(v), ty)); + } + + auto *opt = M->Nr(); + auto *len = util::makeVar(M->getInt(0), opt, parent)->getVar(); + + for (auto &h : handlers) { + h->setup(opt, parent); + } + + for (auto &h : handlers) { + opt->push_back(M->Nr(len, *M->Nr(len) + *h->length(M))); + } + + auto *fn = M->getOrRealizeMethod(ty, "_list_add_opt_opt_new", {M->getIntType()}); + seqassertn(fn, "could not find list new helper"); + auto *result = + util::makeVar(util::call(fn, {M->Nr(len)}), opt, parent)->getVar(); + + for (auto &h : handlers) { + opt->push_back(h->append(M->Nr(result))); + } + + return M->Nr(opt, M->Nr(result)); +} +} // namespace + +const std::string ListAdditionOptimization::KEY = "core-pythonic-list-addition-opt"; + +void ListAdditionOptimization::handle(CallInstr *v) { + auto *M = v->getModule(); + + auto *f = util::getFunc(v->getCallee()); + if (!f || f->getUnmangledName() != Module::ADD_MAGIC_NAME) + return; + + InspectionResult r; + inspect(v, r); + auto *parent = cast(getParentFunc()); + if (auto *opt = optimize(parent, r)) + v->replaceAll(opt); +} + +} // namespace pythonic +} // namespace transform +} // namespace ir +} // namespace codon diff --git a/codon/sir/transform/pythonic/list.h b/codon/sir/transform/pythonic/list.h new file mode 100644 index 00000000..ac7e2476 --- /dev/null +++ b/codon/sir/transform/pythonic/list.h @@ -0,0 +1,24 @@ +// Copyright (C) 2022 Exaloop Inc. + +#pragma once + +#include "codon/sir/transform/pass.h" + +namespace codon { +namespace ir { +namespace transform { +namespace pythonic { + +/// Pass to optimize list1 + list2 + ... +/// Also handles list slices and list literals efficiently. +class ListAdditionOptimization : public OperatorPass { +public: + static const std::string KEY; + std::string getKey() const override { return KEY; } + void handle(CallInstr *v) override; +}; + +} // namespace pythonic +} // namespace transform +} // namespace ir +} // namespace codon diff --git a/codon/sir/transform/pythonic/str.cpp b/codon/sir/transform/pythonic/str.cpp index 9eb4e644..ad5114c1 100644 --- a/codon/sir/transform/pythonic/str.cpp +++ b/codon/sir/transform/pythonic/str.cpp @@ -27,9 +27,8 @@ void inspect(Value *v, InspectionResult &r) { if (isString(v)) { if (auto *c = cast(v)) { auto *func = util::getFunc(c->getCallee()); - if (func && func->getUnmangledName() == "__add__" && - std::distance(c->begin(), c->end()) == 2 && isString(c->front()) && - isString(c->back())) { + if (func && func->getUnmangledName() == Module::ADD_MAGIC_NAME && + c->numArgs() == 2 && isString(c->front()) && isString(c->back())) { inspect(c->front(), r); inspect(c->back(), r); return; @@ -48,7 +47,7 @@ void StrAdditionOptimization::handle(CallInstr *v) { auto *M = v->getModule(); auto *f = util::getFunc(v->getCallee()); - if (!f || f->getUnmangledName() != "__add__") + if (!f || f->getUnmangledName() != Module::ADD_MAGIC_NAME) return; InspectionResult r; diff --git a/stdlib/internal/types/collections/list.codon b/stdlib/internal/types/collections/list.codon index c1968592..d5afafef 100644 --- a/stdlib/internal/types/collections/list.codon +++ b/stdlib/internal/types/collections/list.codon @@ -242,6 +242,9 @@ class List: i += 1 return v + def __rmul__(self, n: int) -> List[T]: + return self.__mul__(n) + def __imul__(self, n: int) -> List[T]: if n == 1: return self @@ -468,4 +471,45 @@ class List: def __ge__(self, other: List[T]): return self._cmp(other) >= 0 + # list addition optimization helpers + + def _list_add_opt_default_len(v: List[T]): + return v.__len__() + + def _list_add_opt_default_append(ans: List[T], v: List[T]): + from internal.gc import sizeof + n = v.__len__() + str.memcpy((ans.arr.ptr + ans.len).as_byte(), v.arr.ptr.as_byte(), n * sizeof(T)) + ans.len += n + + def _list_add_opt_slice_len(v: List[T], s: Slice): + if s.start is None and s.stop is None and s.step is None: + return v.__len__() + start, stop, step, length = s.adjust_indices(v.__len__()) + return length + + def _list_add_opt_slice_append(ans: List[T], v: List[T], s: Slice): + from internal.gc import sizeof + if s.start is None and s.stop is None and s.step is None: + n = v.__len__() + str.memcpy((ans.arr.ptr + ans.len).as_byte(), v.arr.ptr.as_byte(), n * sizeof(T)) + ans.len += n + elif s.step is None: + start, stop, step, length = s.adjust_indices(v.__len__()) + n = stop - start + str.memcpy((ans.arr.ptr + ans.len).as_byte(), (v.arr.ptr + start).as_byte(), n * sizeof(T)) + ans.len += n + else: + start, stop, step, length = s.adjust_indices(v.__len__()) + for i in range(start, stop, step): + ans.arr.ptr[ans.len] = v._get(i) + ans.len += 1 + + def _list_add_opt_literal_append(ans: List[T], elem: T): + ans.arr.ptr[ans.len] = elem + ans.len += 1 + + def _list_add_opt_opt_new(capacity: int): + return List[T](capacity=capacity) + list = List diff --git a/test/core/containers.codon b/test/core/containers.codon index 883808c9..add7a0a5 100644 --- a/test/core/containers.codon +++ b/test/core/containers.codon @@ -170,6 +170,7 @@ def test_list(): assert [a for a in l1] == [99, 100] assert [a for a in l2] == [1, 2, 1, 2] + assert 2 * [1, 2] == l2 l1 = [i*2 for i in range(3)] l1.insert(0, 99) diff --git a/test/main.cpp b/test/main.cpp index 377f77f3..d263b97e 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -497,6 +497,7 @@ INSTANTIATE_TEST_SUITE_P( "transform/for_lowering.codon", "transform/io_opt.codon", "transform/inlining.codon", + "transform/list_opt.codon", "transform/omp.codon", "transform/outlining.codon", "transform/str_opt.codon" diff --git a/test/transform/list_opt.codon b/test/transform/list_opt.codon new file mode 100644 index 00000000..6fc0f5f6 --- /dev/null +++ b/test/transform/list_opt.codon @@ -0,0 +1,42 @@ +add_count = 0 + +@extend +class List: + def __add__(self, other: List[T]) -> List[T]: + global add_count + add_count += 1 + n = self.len + other.len + v = List[T](n) + v.len = n + p = v.arr.ptr + str.memcpy(p.as_byte(), + self.arr.ptr.as_byte(), + self.len * gc.sizeof(T)) + str.memcpy((p + self.len).as_byte(), + other.arr.ptr.as_byte(), + other.len * gc.sizeof(T)) + return v + +@test +def test_list_optimization(): + add_count0 = add_count + A = list(range(3)) + B = list(range(10)) + assert [0] + [1] == [0, 1] + assert A + B == [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + assert (A + B[:] + B[7:] + B[:3] + B[3:7] + B[7:3:-1] + A[::-1] + [11, 22, 33] == + [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 2, 1, 0, 11, 22, 33]) + + def f(a, tag, order): + order.append(tag) + return a + + order = [] + X = (f([1, 2], 'a', order) + + [f(3, 'b', order), f(4, 'c', order)] + + f(list(range(10)), 'd', order)[f(5, 'e', order):f(2, 'f', order):f(-1, 'g', order)]) + assert X == [1, 2, 3, 4, 5, 4, 3] + assert order == ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + assert add_count == add_count0 + +test_list_optimization()