From c6db6543d33a7ca237f5f88021345759b0ca1f3e Mon Sep 17 00:00:00 2001 From: "A. R. Shajii" Date: Wed, 27 Jul 2022 13:42:13 -0400 Subject: [PATCH] Fix OpenMP lock initialization issue in JIT mode (#40) --- codon/sir/transform/parallel/openmp.cpp | 62 +++++++++++++------------ codon/sir/util/irtools.cpp | 2 +- stdlib/openmp.codon | 20 ++++---- 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/codon/sir/transform/parallel/openmp.cpp b/codon/sir/transform/parallel/openmp.cpp index 7a99b994..629ecc39 100644 --- a/codon/sir/transform/parallel/openmp.cpp +++ b/codon/sir/transform/parallel/openmp.cpp @@ -21,19 +21,11 @@ struct OMPTypes { types::Type *i32 = nullptr; types::Type *i8ptr = nullptr; types::Type *i32ptr = nullptr; - types::Type *routine = nullptr; - types::Type *ident = nullptr; - types::Type *task = nullptr; explicit OMPTypes(Module *M) { i32 = M->getIntNType(32, /*sign=*/true); i8ptr = M->getPointerType(M->getByteType()); i32ptr = M->getPointerType(i32); - routine = M->getFuncType(i32, {i32ptr, i8ptr}); - ident = M->getOrRealizeType("Ident", {}, ompModule); - task = M->getOrRealizeType("Task", {}, ompModule); - seqassertn(ident, "openmp.Ident type not found"); - seqassertn(task, "openmp.Task type not found"); } }; @@ -63,11 +55,19 @@ struct ReductionLocks { Var *critLock = nullptr; // lock used in reduction critical sections Var *createLock(Module *M) { - auto *main = cast(M->getMainFunc()); - auto *lck = util::alloc(M->getByteType(), 32); - auto *val = util::makeVar(lck, cast(main->getBody()), - /*parent=*/nullptr, /*prepend=*/true); - return val->getVar(); + auto *lockType = M->getOrRealizeType("Lock", {}, ompModule); + seqassertn(lockType, "openmp.Lock type not found"); + auto *var = M->Nr(lockType, /*global=*/true); + static int counter = 1; + var->setName(".omp_lock." + std::to_string(counter++)); + + // add it to main function so it doesn't get demoted by IR pass + auto *series = cast(cast(M->getMainFunc())->getBody()); + auto *init = (*lockType)(); + seqassertn(init, "could not initialize openmp.Lock"); + series->insert(series->begin(), M->Nr(var, init)); + + return var; } Var *getMainLock(Module *M) { @@ -283,20 +283,21 @@ struct Reduction { seqassertn(loc && gtid, "loc and/or gtid are null"); auto *lck = locks.getCritLock(M); - auto *critBegin = M->getOrRealizeFunc( - "_critical_begin", {loc->getType(), gtid->getType(), lck->getType()}, {}, - ompModule); + auto *lckPtrType = M->getPointerType(lck->getType()); + auto *critBegin = M->getOrRealizeFunc("_critical_begin", + {loc->getType(), gtid->getType(), lckPtrType}, + {}, ompModule); seqassertn(critBegin, "critical begin function not found"); auto *critEnd = M->getOrRealizeFunc( - "_critical_end", {loc->getType(), gtid->getType(), lck->getType()}, {}, - ompModule); + "_critical_end", {loc->getType(), gtid->getType(), lckPtrType}, {}, ompModule); seqassertn(critEnd, "critical end function not found"); - auto *critEnter = util::call( - critBegin, {M->Nr(loc), M->Nr(gtid), M->Nr(lck)}); + auto *critEnter = + util::call(critBegin, {M->Nr(loc), M->Nr(gtid), + M->Nr(lck)}); auto *operation = generateNonAtomicReduction(ptr, arg); - auto *critExit = util::call( - critEnd, {M->Nr(loc), M->Nr(gtid), M->Nr(lck)}); + auto *critExit = util::call(critEnd, {M->Nr(loc), M->Nr(gtid), + M->Nr(lck)}); // make sure the unlock is in a finally-block return util::series(critEnter, M->Nr(util::series(operation), util::series(critExit))); @@ -570,22 +571,23 @@ struct ParallelLoopTemplateReplacer : public util::Operator { auto *lck = locks.getMainLock(M); auto *rawReducer = ptrFromFunc(reducer); + auto *lckPtrType = M->getPointerType(lck->getType()); auto *reduceNoWait = M->getOrRealizeFunc( "_reduce_nowait", {reductionLocRef->getType(), gtid->getType(), reductionTuple->getType(), - rawReducer->getType(), lck->getType()}, + rawReducer->getType(), lckPtrType}, {}, ompModule); seqassertn(reduceNoWait, "reduce nowait function not found"); auto *reduceNoWaitEnd = M->getOrRealizeFunc( "_end_reduce_nowait", - {reductionLocRef->getType(), gtid->getType(), lck->getType()}, {}, ompModule); + {reductionLocRef->getType(), gtid->getType(), lckPtrType}, {}, ompModule); seqassertn(reduceNoWaitEnd, "end reduce nowait function not found"); auto *series = M->Nr(); auto *tupleVal = util::makeVar(reductionTuple, series, parent); - auto *reduceCode = util::call(reduceNoWait, {M->Nr(reductionLocRef), - M->Nr(gtid), tupleVal, - rawReducer, M->Nr(lck)}); + auto *reduceCode = util::call( + reduceNoWait, {M->Nr(reductionLocRef), M->Nr(gtid), + tupleVal, rawReducer, M->Nr(lck)}); auto *codeVar = util::makeVar(reduceCode, series, parent)->getVar(); seqassertn(codeVar->getType()->is(M->getIntType()), "wrong reduce code type"); @@ -600,9 +602,9 @@ struct ParallelLoopTemplateReplacer : public util::Operator { info.reduction.generateNonAtomicReduction(ptr, arg)); } } - sectionNonAtomic->push_back( - util::call(reduceNoWaitEnd, {M->Nr(reductionLocRef), - M->Nr(gtid), M->Nr(lck)})); + sectionNonAtomic->push_back(util::call( + reduceNoWaitEnd, {M->Nr(reductionLocRef), M->Nr(gtid), + M->Nr(lck)})); for (auto &info : sharedInfo) { if (info.reduction) { diff --git a/codon/sir/util/irtools.cpp b/codon/sir/util/irtools.cpp index 35c3296a..5e579f47 100644 --- a/codon/sir/util/irtools.cpp +++ b/codon/sir/util/irtools.cpp @@ -115,7 +115,7 @@ VarValue *makeVar(Value *x, SeriesFlow *flow, BodiedFunc *parent, bool prepend) auto *v = M->Nr(x->getType(), global); if (global) { static int counter = 1; - v->setName("_anon_global_" + std::to_string(counter++)); + v->setName(".anon_global." + std::to_string(counter++)); } auto *a = M->Nr(v, x); if (prepend) { diff --git a/stdlib/openmp.codon b/stdlib/openmp.codon index 06d5a1c0..d9014b84 100644 --- a/stdlib/openmp.codon +++ b/stdlib/openmp.codon @@ -126,13 +126,13 @@ def _reduction_loc(): _reduction_loc() -def _critical_begin(loc_ref: Ptr[Ident], gtid: int, lck: cobj): - from C import __kmpc_critical(Ptr[Ident], i32, cobj) +def _critical_begin(loc_ref: Ptr[Ident], gtid: int, lck: Ptr[Lock]): + from C import __kmpc_critical(Ptr[Ident], i32, Ptr[Lock]) __kmpc_critical(loc_ref, i32(gtid), lck) -def _critical_end(loc_ref: Ptr[Ident], gtid: int, lck: cobj): - from C import __kmpc_end_critical(Ptr[Ident], i32, cobj) +def _critical_end(loc_ref: Ptr[Ident], gtid: int, lck: Ptr[Lock]): + from C import __kmpc_end_critical(Ptr[Ident], i32, Ptr[Lock]) __kmpc_end_critical(loc_ref, i32(gtid), lck) @@ -325,12 +325,12 @@ def _reduce_nowait( gtid: int, reduce_data: T, reduce_func: cobj, - lck: cobj, + lck: Ptr[Lock], T: type, ): from internal.gc import sizeof - from C import __kmpc_reduce_nowait(Ptr[Ident], i32, i32, int, cobj, cobj, cobj) -> i32 + from C import __kmpc_reduce_nowait(Ptr[Ident], i32, i32, int, cobj, cobj, Ptr[Lock]) -> i32 num_vars = staticlen(reduce_data) reduce_size = sizeof(T) return int( @@ -346,8 +346,8 @@ def _reduce_nowait( ) -def _end_reduce_nowait(loc_ref: Ptr[Ident], gtid: int, lck: cobj): - from C import __kmpc_end_reduce_nowait(Ptr[Ident], i32, cobj) +def _end_reduce_nowait(loc_ref: Ptr[Ident], gtid: int, lck: Ptr[Lock]): + from C import __kmpc_end_reduce_nowait(Ptr[Ident], i32, Ptr[Lock]) __kmpc_end_reduce_nowait(loc_ref, i32(gtid), lck) @@ -875,11 +875,11 @@ def critical(func): def _wrapper(*args, **kwargs): gtid = get_thread_num() loc = _default_loc() - _critical_begin(loc, gtid, __ptr__(_default_lock).as_byte()) + _critical_begin(loc, gtid, __ptr__(_default_lock)) try: func(*args, **kwargs) finally: - _critical_end(loc, gtid, __ptr__(_default_lock).as_byte()) + _critical_end(loc, gtid, __ptr__(_default_lock)) return _wrapper