From 170b3e6e07613f83afa320bec3e0e0e6da53f583 Mon Sep 17 00:00:00 2001 From: Sameera Deshpande Date: Fri, 15 Feb 2019 07:46:16 +0530 Subject: [PATCH 1/8] Add support for FNMADD and FNMSUB. --- src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 5b40f4cc..cc842b53 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -361,6 +361,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) return 0; } +/* Fuse FP neg-multiply-add/sub. */ +static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef ref = ir->op1; + IRIns *irn = IR(ref); + if (irn->o != IR_ADD && irn->o != IR_SUB) + return 0; + + if (!mayfuse(as, ref)) + return 0; + + IRRef lref = irn->op1, rref = irn->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + /* Fuse BAND + BSHL/BSHR into UBFM. */ static int asm_fuseandshift(ASMState *as, IRIns *ir) { @@ -1461,7 +1490,8 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_neg(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, A64I_FNEGd); + if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) + asm_fpunary(as, ir, A64I_FNEGd); return; } asm_intneg(as, ir); -- 2.43.2 From 09b3c908b4e6397655e0c476ca7d3528d2b4773d Mon Sep 17 00:00:00 2001 From: Vivien HENRIET Date: Wed, 30 Jan 2019 23:44:51 +0100 Subject: [PATCH 2/8] Fix os.date() for timezone change awareness On POSIX target, system timezone change are not taken into account. To reproduce, 1. call os.date() 2. change your timezone 3. call os.date() within the same luajit instance On POSIX target, os.date use localtime_r to retrieve time. On other target, the function localtime is used. But there is a behaviour diference between these two function. localtime acts as if it called tzset which localtime_r don't. To fix the issue tzset is called before localtime_r. --- src/lib_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib_os.c b/src/lib_os.c index cf0df281..d9dda853 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -185,6 +185,7 @@ LJLIB_CF(os_date) #endif } else { #if LJ_TARGET_POSIX + tzset(); stm = localtime_r(&t, &rtm); #else stm = localtime(&t); -- 2.43.2 From 78ec3dbc5a3f6c0198e3dbe7901d80f7feb77344 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Thu, 14 Mar 2019 23:08:24 +0530 Subject: [PATCH 3/8] Revert "FFI: Make FP to U64 conversions match JIT backend behavior." This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8. The patch breaks test 279, i.e. assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL") The patch was put in to make the JIT and interpreter behaviour consistent[1] for float to unsigned int conversions but it ended up making things worse. There needs to be a better fix for this. [1] https://github.com/LuaJIT/LuaJIT/pull/415 --- src/lj_obj.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/lj_obj.h b/src/lj_obj.h index 2d4386e1..d40f7264 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -996,22 +996,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) #define lj_num2int(n) ((int32_t)(n)) -/* -** This must match the JIT backend behavior. In particular for archs -** that don't have a common hardware instruction for this conversion. -** Note that signed FP to unsigned int conversions have an undefined -** result and should never be relied upon in portable FFI code. -** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). -*/ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) { -#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS - int64_t i = (int64_t)n; - if (i < 0) i = (int64_t)(n - 18446744073709551616.0); - return (uint64_t)i; -#else - return (uint64_t)n; +#ifdef _MSC_VER + if (n >= 9223372036854775808.0) /* They think it's a feature. */ + return (uint64_t)(int64_t)(n - 18446744073709551616.0); + else #endif + return (uint64_t)n; } static LJ_AINLINE int32_t numberVint(cTValue *o) -- 2.43.2 From b71187a97405c03c64caa9a295e94c7708ffab35 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Sun, 17 Mar 2019 11:34:04 +0530 Subject: [PATCH 4/8] Guard against undefined behaviour when casting from float to unsigned Only range (-1.0, UINT64_MAX) can be safely converted to unsigned directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first. The remaining range is undefined. TODO: Do the same for JIT as well as for float to other ranges. --- src/lj_obj.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lj_obj.h b/src/lj_obj.h index d40f7264..f79cd02c 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -998,12 +998,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) { + /* Undefined behaviour. This is deliberately not a full check because we + don't want to slow down compliant code. */ + lj_assertX(n >= -9223372036854775809.0, "Overflow"); #ifdef _MSC_VER if (n >= 9223372036854775808.0) /* They think it's a feature. */ return (uint64_t)(int64_t)(n - 18446744073709551616.0); else #endif - return (uint64_t)n; + if (n > -1.0) + return (uint64_t)n; + else + return (uint64_t)(int64_t)n; } static LJ_AINLINE int32_t numberVint(cTValue *o) -- 2.43.2 From 19908a818ccd8d5888bab6f4a4d518701570eaf8 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Mon, 25 Mar 2019 17:56:53 +0530 Subject: [PATCH 5/8] Fix build erro with fnmsub fusing --- src/lj_asm_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index cc842b53..d56a376a 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1490,7 +1490,7 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_neg(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) { - if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) + if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd)) asm_fpunary(as, ir, A64I_FNEGd); return; } -- 2.43.2 From e8279fb7d556553adb6f645f481ba399f144c80b Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Thu, 28 Mar 2019 09:19:34 +0530 Subject: [PATCH 6/8] aarch64: better float to unsigned int conversion A straight float to unsigned conversion has a limited range of (-1.0, UTYPE_MAX) which should be fine in general but for the sake of consistency across the interpreter and the JIT compiler, it is necessary to work a wee bit harder to expand this range to (TYPE_MIN, UTYPE_MAX), which can be done with a simple range check. This adds a couple of branches but only one of the branches should have a noticeable performance impact on most processors with branch predictors, and that too only if the input number varies wildly in range. This currently works only for 64-bit conversions, 32-bit is still WIP. --- src/lj_asm_arm64.h | 30 ++++++++++++++++++++++-------- src/lj_target_arm64.h | 1 + 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index d56a376a..a1b44ec2 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -665,14 +665,28 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg left = ra_alloc1(as, lref, RSET_FPR); Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); + + A64Ins ai_signed = st == IRT_NUM ? + (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) : + (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32); + + if (irt_isi64(ir->t) || irt_isint(ir->t)) + emit_dn(as, ai_signed, dest, (left & 31)); + else { + A64Ins ai_unsigned = st == IRT_NUM ? + (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) : + (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32); + + MCLabel l_done = emit_label(as); + emit_dn(as, ai_unsigned, dest, (left & 31)); + MCLabel l_signed = emit_label(as); + emit_jmp(as, l_done); + emit_dn(as, ai_signed, dest, (left & 31)); + /* The valid range for float to unsigned int conversion is (-1.0, + UINT{,64}_MAX-1), but we just compare with 0 to save a load. */ + emit_cond_branch(as, CC_PL, l_signed); + emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0); + } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg dest = ra_dest(as, ir, RSET_GPR); diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index c34f1e59..1e2f19ea 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -288,6 +288,7 @@ typedef enum A64Ins { A64I_STPs = 0x2d000000, A64I_STPd = 0x6d000000, A64I_FCMPd = 0x1e602000, + A64I_FCMPZs = 0x1e202008, A64I_FCMPZd = 0x1e602008, A64I_FCSELd = 0x1e600c00, A64I_FRINTMd = 0x1e654000, -- 2.43.2 From f37d7e3dac8648771171038c13408811ab3e2694 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Thu, 28 Mar 2019 10:50:23 +0530 Subject: [PATCH 7/8] Better behaviour for float to uint32_t conversions This is the uint32_t part of the float to unsigned int conversions for the interpreter. The cast ends up working correctly for x86 but not for aarch64 since fcvtzu sets the result to zero on negative inputs. Work slightly harder to make sure that negative number inputs behave like x86. This fixes the interpreter but not the JIT compiler, which errors out during the narrowing pass. --- src/lj_cconv.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 419a8f45..ebb98521 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -203,7 +203,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, else if (dsize == 2) *(int16_t *)dp = (int16_t)i; else *(int8_t *)dp = (int8_t)i; } else if (dsize == 4) { - *(uint32_t *)dp = (uint32_t)n; + /* Undefined behaviour. This is deliberately not a full check because we + * don't want to slow down compliant code. */ + lj_assertX(n >= -2147483649.0, "Overflow"); + if (n > -1.0) + *(uint32_t *)dp = (uint32_t)n; + else + *(uint32_t *)dp = (uint32_t)(int32_t)n; } else if (dsize == 8) { if (!(dinfo & CTF_UNSIGNED)) *(int64_t *)dp = (int64_t)n; -- 2.43.2 From 9d79974a1e059f34ad9d2ad38419be34acd6c343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 19 Nov 2015 16:29:02 +0200 Subject: [PATCH 8/8] Get rid of LUAJIT_VERSION_SYM that changes ABI on every patch release --- src/lj_dispatch.c | 5 ----- src/luajit.c | 1 - src/luajit_rolling.h | 3 --- 3 files changed, 9 deletions(-) diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index b9748bba..d09238f8 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -318,11 +318,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) return 1; /* OK. */ } -/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */ -LUA_API void LUAJIT_VERSION_SYM(void) -{ -} - /* -- Hooks --------------------------------------------------------------- */ /* This function can be called asynchronously (e.g. during a signal). */ diff --git a/src/luajit.c b/src/luajit.c index 73e29d44..31fdba18 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -515,7 +515,6 @@ static int pmain(lua_State *L) int argn; int flags = 0; globalL = L; - LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ argn = collectargs(argv, &flags); if (argn < 0) { /* Invalid args? */ diff --git a/src/luajit_rolling.h b/src/luajit_rolling.h index 2d04402c..5ab4167d 100644 --- a/src/luajit_rolling.h +++ b/src/luajit_rolling.h @@ -73,8 +73,5 @@ LUA_API void luaJIT_profile_stop(lua_State *L); LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, int depth, size_t *len); -/* Enforce (dynamic) linker error for version mismatches. Call from main. */ -LUA_API void LUAJIT_VERSION_SYM(void); - #error "DO NOT USE luajit_rolling.h -- only include build-generated luajit.h" #endif -- 2.43.2