Skip to content

Commit

Permalink
[mono] Adding support for Vector128::ExtractMostSignificantBits intri…
Browse files Browse the repository at this point in the history
…nsics on amd64 (#89997)

* Extract MSB amd64

* add SSSE3 check
  • Loading branch information
matouskozak committed Aug 10, 2023
1 parent 23886f1 commit f465d33
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/mono/mono/arch/amd64/amd64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ typedef union {

#define amd64_sse_movsldup_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0xf3, 0x0f, 0x12)

#define amd64_sse_pshufb_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x00)

#define amd64_sse_pshufhw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm((inst), (dreg), (reg), 0xf3, 0x0f, 0x70, (imm))

Expand Down Expand Up @@ -947,6 +948,10 @@ typedef union {

#define amd64_sse_pmovmskb_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xd7)

#define amd64_sse_movmskps_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op2((inst), (dreg), (reg), 0x0f, 0x50)

#define amd64_sse_movmskpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0x50)


#define amd64_sse_pand_reg_reg(inst, dreg, reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xdb)

Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/cpu-amd64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,8 @@ expand_r4: dest:x src1:f len:16
expand_r8: dest:x src1:f len:13
xop_x_x_x: dest:x src1:x src2:x len:16 clob:1
xop_x_x: dest:x src1:x len:16 clob:1
sse_movmsk: dest:i src1:x len:5
ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1
sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1
sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1
vector_andnot: dest:x src1:x src2:x len:7 clob:1
Expand Down
17 changes: 17 additions & 0 deletions src/mono/mono/mini/mini-amd64.c
Original file line number Diff line number Diff line change
Expand Up @@ -7521,6 +7521,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
break;
case OP_SSE_MOVMSK: {
switch (ins->inst_c1) {
case MONO_TYPE_R4:
amd64_sse_movmskps_reg_reg (code, ins->dreg, ins->sreg1);
break;
case MONO_TYPE_R8:
amd64_sse_movmskpd_reg_reg (code, ins->dreg, ins->sreg1);
break;
default:
amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
break;
}
break;
}
case OP_SSSE3_SHUFFLE:
amd64_sse_pshufb_reg_reg (code, ins->dreg, ins->sreg2);
break;
case OP_SSE41_ROUNDP: {
if (ins->inst_c1 == MONO_TYPE_R8)
amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
Expand Down
46 changes: 45 additions & 1 deletion src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,51 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}
return result_ins;
#elif defined(TARGET_AMD64)
return NULL;
int type = MONO_TYPE_I1;

switch (arg0_type) {
case MONO_TYPE_U2:
case MONO_TYPE_I2: {
if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3))
return NULL;

type = type_enum_is_unsigned (arg0_type) ? MONO_TYPE_U1 : MONO_TYPE_I1;
MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]);

guint64 shuffle_mask[2];
shuffle_mask[0] = 0x0F0D0B0907050301; // Place odd bytes in the lower half of vector
shuffle_mask[1] = 0x8080808080808080; // Zero the upper half

MonoInst* shuffle_vec = emit_xconst_v128 (cfg, arg_class, (guint8*)shuffle_mask);
shuffle_vec->klass = arg_class;

args [0] = emit_simd_ins (cfg, klass, OP_SSSE3_SHUFFLE, args [0]->dreg, shuffle_vec->dreg);
args [0]->inst_c1 = type;
break;
}
#if TARGET_SIZEOF_VOID_P == 4
case MONO_TYPE_I:
case MONO_TYPE_U:
#endif
case MONO_TYPE_U4:
case MONO_TYPE_I4:
case MONO_TYPE_R4: {
type = MONO_TYPE_R4;
break;
}
#if TARGET_SIZEOF_VOID_P == 8
case MONO_TYPE_I:
case MONO_TYPE_U:
#endif
case MONO_TYPE_U8:
case MONO_TYPE_I8:
case MONO_TYPE_R8: {
type = MONO_TYPE_R8;
break;
}
}

return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args);
#endif
}
case SN_GetElement: {
Expand Down

0 comments on commit f465d33

Please sign in to comment.