sim/aarch64/ * simulator.c (do_vec_MOV_immediate, case 0x8): Add missing break. (do_vec_MLS): Reverse order of subtraction operands. sim/testsuite/sim/aarch64/ * mls.s: New. diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c index be3d6c7..65a0e2e 100644 --- a/sim/aarch64/simulator.c +++ b/sim/aarch64/simulator.c @@ -3221,7 +3221,8 @@ do_vec_MOV_immediate (sim_cpu *cpu) case 0x8: /* 16-bit, no shift. */ for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, val); - /* Fall through. */ + break; + case 0xd: /* 32-bit, mask shift by 16. */ val <<= 8; val |= 0xFF; @@ -6346,25 +6347,25 @@ do_vec_MLS (sim_cpu *cpu) case 0: for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, - (aarch64_get_vec_u8 (cpu, vn, i) - * aarch64_get_vec_u8 (cpu, vm, i)) - - aarch64_get_vec_u8 (cpu, vd, i)); + aarch64_get_vec_u8 (cpu, vd, i) + - (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i))); return; case 1: for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, - (aarch64_get_vec_u16 (cpu, vn, i) - * aarch64_get_vec_u16 (cpu, vm, i)) - - aarch64_get_vec_u16 (cpu, vd, i)); + aarch64_get_vec_u16 (cpu, vd, i) + - (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i))); return; case 2: for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, - (aarch64_get_vec_u32 (cpu, vn, i) - * aarch64_get_vec_u32 (cpu, vm, i)) - - aarch64_get_vec_u32 (cpu, vd, i)); + aarch64_get_vec_u32 (cpu, vd, i) + - (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i))); return; default: diff --git a/sim/testsuite/sim/aarch64/mls.s b/sim/testsuite/sim/aarch64/mls.s new file mode 100644 index 0000000..a34a1aa --- /dev/null +++ b/sim/testsuite/sim/aarch64/mls.s @@ -0,0 +1,103 @@ +# mach: aarch64 + +# Check the vector multiply subtract instruction: mls. + +.include "testutils.inc" + +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d +m8b: + .word 0xf1f8fd00 + .word 0xc1d0dde8 +m16b: + .word 0xf1f8fd00 + .word 0xc1d0dde8 + .word 0x71889db0 + .word 0x01203d58 +m4h: + .word 0xe7f8fc00 + .word 0x8fd0c3e8 +m8h: + .word 0xe7f8fc00 + .word 0x8fd0c3e8 + .word 0xf7884bb0 + .word 0x1f209358 +m2s: + .word 0xebf5fc00 + .word 0x5b95c3e8 +m4s: + .word 0xebf5fc00 + .word 0x5b95c3e8 + .word 0x4ad54bb0 + .word 0xb9b49358 + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + + movi v1.8b, #1 + mls v1.8b, v0.8b, v0.8b + mov x1, v1.d[0] + adrp x3, m8b + ldr x4, [x3, #:lo12:m8b] + cmp x1, x4 + bne .Lfailure + + movi v1.16b, #1 + mls v1.16b, v0.16b, v0.16b + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m16b + ldr x4, [x3, #:lo12:m16b] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m16b+8] + cmp x2, x5 + bne .Lfailure + + movi v1.4h, #1 + mls v1.4h, v0.4h, v0.4h + mov x1, v1.d[0] + adrp x3, m4h + ldr x4, [x3, #:lo12:m4h] + cmp x1, x4 + bne .Lfailure + + movi v1.8h, #1 + mls v1.8h, v0.8h, v0.8h + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m8h + ldr x4, [x3, #:lo12:m8h] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m8h+8] + cmp x2, x5 + bne .Lfailure + + movi v1.2s, #1 + mls v1.2s, v0.2s, v0.2s + mov x1, v1.d[0] + adrp x3, m2s + ldr x4, [x3, #:lo12:m2s] + cmp x1, x4 + bne .Lfailure + + movi v1.4s, #1 + mls v1.4s, v0.4s, v0.4s + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m4s + ldr x4, [x3, #:lo12:m4s] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m4s+8] + cmp x2, x5 + bne .Lfailure + + pass +.Lfailure: + fail