Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono] Intrinsify multiple LoadVector API's #98077

Merged
merged 12 commits into from
Feb 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -2258,6 +2258,7 @@ internal Arm64() { }
/// A64: LD4 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
/// </summary>
public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4AndUnzip(double* address) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// A64: LD1 { Vn.16B, Vn+1.16B }, [Xn]
Expand Down Expand Up @@ -2408,7 +2409,6 @@ internal Arm64() { }
/// A64: LD1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
/// </summary>
public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4(double* address) { throw new PlatformNotSupportedException(); }
#endif
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// float64x2_t vmaxq_f64 (float64x2_t a, float64x2_t b)
Expand Down Expand Up @@ -9632,9 +9632,6 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector128<ulong> LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); }

#if false
// Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081

/// <summary>
/// A64: LD2 { Vn.8B, Vn+1.8B }, [Xn]
/// </summary>
Expand Down Expand Up @@ -9844,7 +9841,6 @@ internal Arm64() { }
/// A64: LD1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
/// </summary>
public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadVector64x4(float* address) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// uint8x8_t vmax_u8 (uint8x8_t a, uint8x8_t b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2103,9 +2103,6 @@ internal Arm64() { }
/// </summary>
public static unsafe (Vector128<ulong> Value1, Vector128<ulong> Value2) LoadPairVector128NonTemporal(ulong* address) => LoadPairVector128NonTemporal(address);

#if false
// Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081

/// <summary>
/// A64: LD2 { Vn.16B, Vn+1.16B }, [Xn]
/// </summary>
Expand Down Expand Up @@ -2405,7 +2402,6 @@ internal Arm64() { }
/// A64: LD1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
/// </summary>
public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4(double* address) => LoadVector128x4(address);
#endif

/// <summary>
/// float64x2_t vmaxq_f64 (float64x2_t a, float64x2_t b)
Expand Down Expand Up @@ -9628,9 +9624,6 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector128<ulong> LoadVector128(ulong* address) => LoadVector128(address);

#if false
// Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081

/// <summary>
/// A64: LD2 { Vn.8B, Vn+1.8B }, [Xn]
/// </summary>
Expand Down Expand Up @@ -9840,7 +9833,6 @@ internal Arm64() { }
/// A64: LD1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
/// </summary>
public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadVector64x4(float* address) => LoadVector64x4(address);
#endif

/// <summary>
/// uint8x8_t vmax_u8 (uint8x8_t a, uint8x8_t b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1990,8 +1990,6 @@ internal AdvSimd() { }
public static unsafe System.Runtime.Intrinsics.Vector64<ushort> LoadVector64(ushort* address) { throw null; }
public static unsafe System.Runtime.Intrinsics.Vector64<uint> LoadVector64(uint* address) { throw null; }
public static unsafe System.Runtime.Intrinsics.Vector64<ulong> LoadVector64(ulong* address) { throw null; }
#if false
// Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
public static unsafe (System.Runtime.Intrinsics.Vector64<byte> Value1, System.Runtime.Intrinsics.Vector64<byte> Value2) LoadVector64x2AndUnzip(byte* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector64<sbyte> Value1, System.Runtime.Intrinsics.Vector64<sbyte> Value2) LoadVector64x2AndUnzip(sbyte* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector64<short> Value1, System.Runtime.Intrinsics.Vector64<short> Value2) LoadVector64x2AndUnzip(short* address) { throw null; }
Expand Down Expand Up @@ -2034,7 +2032,6 @@ internal AdvSimd() { }
public static unsafe (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) LoadVector64x4(int* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) LoadVector64x4(uint* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) LoadVector64x4(float* address) { throw null; }
#endif
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
public static System.Runtime.Intrinsics.Vector128<byte> Max(System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
public static System.Runtime.Intrinsics.Vector128<short> Max(System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
public static System.Runtime.Intrinsics.Vector128<int> Max(System.Runtime.Intrinsics.Vector128<int> left, System.Runtime.Intrinsics.Vector128<int> right) { throw null; }
Expand Down Expand Up @@ -3486,6 +3483,7 @@ internal Arm64() { }
public static unsafe (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) LoadVector128x4AndUnzip(ulong* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) LoadVector128x4AndUnzip(float* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) LoadVector128x4AndUnzip(double* address) { throw null; }
#endif
public static unsafe (System.Runtime.Intrinsics.Vector128<byte> Value1, System.Runtime.Intrinsics.Vector128<byte> Value2) LoadVector128x2(byte* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<sbyte> Value1, System.Runtime.Intrinsics.Vector128<sbyte> Value2) LoadVector128x2(sbyte* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<short> Value1, System.Runtime.Intrinsics.Vector128<short> Value2) LoadVector128x2(short* address) { throw null; }
Expand Down Expand Up @@ -3516,7 +3514,6 @@ internal Arm64() { }
public static unsafe (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) LoadVector128x4(ulong* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) LoadVector128x4(float* address) { throw null; }
public static unsafe (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) LoadVector128x4(double* address) { throw null; }
#endif
public static System.Runtime.Intrinsics.Vector128<double> Max(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right) { throw null; }
public static System.Runtime.Intrinsics.Vector64<byte> MaxAcross(System.Runtime.Intrinsics.Vector128<byte> value) { throw null; }
public static System.Runtime.Intrinsics.Vector64<short> MaxAcross(System.Runtime.Intrinsics.Vector128<short> value) { throw null; }
Expand Down
13 changes: 13 additions & 0 deletions src/mono/mono/mini/llvm-intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define WidenAcross INTRIN_kind_widen_across
#define Across INTRIN_kind_across
#define Arm64DotProd INTRIN_kind_arm64_dot_prod
#define AddPointer INTRIN_kind_add_pointer
#if !defined(Generic)
#define Generic
#endif
Expand Down Expand Up @@ -361,6 +362,18 @@ INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SADDV, aarch64_neon_saddv, Arm64, Across,
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UADDV, aarch64_neon_uaddv, Arm64, Across, V64 | V128 | I1 | I2 | I4 | I8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_FADDV, aarch64_neon_faddv, Arm64, Across, V64 | V128 | R4 | R8)

INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X2_V64, aarch64_neon_ld1x2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X3_V64, aarch64_neon_ld1x3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X4_V64, aarch64_neon_ld1x4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X2_V128, aarch64_neon_ld1x2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X3_V128, aarch64_neon_ld1x3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X4_V128, aarch64_neon_ld1x4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2_V64, aarch64_neon_ld2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3_V64, aarch64_neon_ld3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4_V64, aarch64_neon_ld4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2_V128, aarch64_neon_ld2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3_V128, aarch64_neon_ld3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4_V128, aarch64_neon_ld4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMAXV, aarch64_neon_smaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UMAXV, aarch64_neon_umaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMINV, aarch64_neon_sminv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/mini-llvm-cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ mono_llvm_register_intrinsic (LLVMModuleRef module, IntrinsicId id, LLVMTypeRef
}

/*
* mono_llvm_register_intrinsic:
* mono_llvm_register_overloaded_intrinsic:
*
* Register an overloaded LLVM intrinsic identified by ID using the supplied types.
*/
Expand Down
29 changes: 25 additions & 4 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ static const llvm_ovr_tag_t intrin_arm64_ovr [] = {
#define INTRINS_OVR_2_ARG(sym, ...) 0,
#define INTRINS_OVR_3_ARG(sym, ...) 0,
#define INTRINS_OVR_TAG(sym, _, arch, spec) spec,
#define INTRINS_OVR_TAG_KIND(sym, _, kind, arch, spec) spec,
#define INTRINS_OVR_TAG_KIND(sym, _, arch, kind, spec) spec,
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
#include "llvm-intrinsics.h"
};

Expand All @@ -404,6 +404,7 @@ enum {
INTRIN_kind_widen_across,
INTRIN_kind_across,
INTRIN_kind_arm64_dot_prod,
INTRIN_kind_add_pointer,
};

static const uint8_t intrin_kind [] = {
Expand Down Expand Up @@ -661,13 +662,14 @@ static LLVMTypeRef
simd_valuetuple_to_llvm_type (EmitContext *ctx, MonoClass *klass)
{
const char *klass_name = m_class_get_name (klass);
if (!strcmp (klass_name, "ValueTuple`2")) {
MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
if (strstr (klass_name, "ValueTuple") != NULL) {
MonoGenericInst *classInst = mono_class_get_generic_class (klass)->context.class_inst;
MonoType *etype = classInst->type_argv [0];
vargaz marked this conversation as resolved.
Show resolved Hide resolved
if (etype->type != MONO_TYPE_GENERICINST)
g_assert_not_reached ();
MonoClass *eklass = etype->data.generic_class->cached_class;
LLVMTypeRef ltype = simd_class_to_llvm_type (ctx, eklass);
return LLVMArrayType (ltype, 2);
return LLVMArrayType (ltype, classInst->type_argc);
}
g_assert_not_reached ();
}
Expand Down Expand Up @@ -11616,6 +11618,21 @@ MONO_RESTORE_WARNING
values [ins->dreg] = vec_sz == 64 ? val : NULL;
break;
}
case OP_ARM64_LDM: {
const char *oname = "arm64_ldm";
LLVMTypeRef ret_t = simd_valuetuple_to_llvm_type (ctx, ins->klass);
if (!addresses [ins->dreg])
addresses [ins->dreg] = create_address (ctx->module, build_named_alloca (ctx, m_class_get_byval_arg (ins->klass), oname), ret_t);
LLVMTypeRef vec_t = LLVMGetElementType (ret_t);
IntrinsicId iid = (IntrinsicId) ins->inst_c0;
llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);
LLVMValueRef result = call_overloaded_intrins (ctx, iid, ovr_tag, &lhs, oname);
LLVMTypeRef retptr_t = pointer_type (ret_t);
LLVMValueRef dst = convert (ctx, addresses [ins->dreg]->value, retptr_t);
LLVMBuildStore (builder, result, dst);
values [ins->dreg] = result;
break;
}
case OP_ARM64_ST1: {
LLVMTypeRef t = LLVMTypeOf (rhs);
LLVMValueRef address = convert (ctx, lhs, pointer_type (t));
Expand Down Expand Up @@ -13546,6 +13563,10 @@ add_intrinsic (EmitContext *ctx, int id)
*/
LLVMTypeRef associated_type = intrin_types [vw][0];
intrins = add_intrins2 (module, id, distinguishing_type, associated_type, &intrins_type);
} else if (kind == INTRIN_kind_add_pointer) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this change about and why we have to track this separately? Mono supported APIs that takes pointers, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is related to calling LLVM intrinsics. Yes, Mono supports pointer type.

LLVMTypeRef elem_type = LLVMGetElementType (distinguishing_type);
LLVMTypeRef src_t = pointer_type (elem_type);
intrins = add_intrins2 (module, id, distinguishing_type, src_t, &intrins_type);
} else
intrins = add_intrins1 (module, id, distinguishing_type, &intrins_type);
int key = key_from_id_and_tag (id, test);
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,8 @@ MINI_OP(OP_ARM64_LDNP_SCALAR, "arm64_ldnp_scalar", VREG, IREG, NONE)
MINI_OP(OP_ARM64_LDP, "arm64_ldp", VREG, IREG, NONE)
MINI_OP(OP_ARM64_LDP_SCALAR, "arm64_ldp_scalar", VREG, IREG, NONE)

MINI_OP(OP_ARM64_LDM, "arm64_ldm", VREG, IREG, NONE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these custom names specific for mono? Can we write a comment for them because there will be more coming for store? Also, should arm64_ldm reused for LoadVector* and LoadAndUnzip?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the custom names used in this file are specific for Mono IR. For LLVM codegen, they follows similar logic, so I use the same string for both LoadVector* and LoadAndUnzip


MINI_OP(OP_ARM64_ST1, "arm64_st1", NONE, IREG, XREG)
MINI_OP(OP_ARM64_SXTL, "arm64_sxtl", XREG, XREG, NONE)
MINI_OP(OP_ARM64_SXTL2, "arm64_sxtl2", XREG, XREG, NONE)
Expand Down
12 changes: 12 additions & 0 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -3658,7 +3658,19 @@ static SimdIntrinsic advsimd_methods [] = {
{SN_LoadPairVector64, OP_ARM64_LDP},
{SN_LoadPairVector64NonTemporal, OP_ARM64_LDNP},
{SN_LoadVector128, OP_ARM64_LD1},
{SN_LoadVector128x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X2_V128},
{SN_LoadVector128x2AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2_V128},
{SN_LoadVector128x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X3_V128},
{SN_LoadVector128x3AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3_V128},
{SN_LoadVector128x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X4_V128},
{SN_LoadVector128x4AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4_V128},
{SN_LoadVector64, OP_ARM64_LD1},
{SN_LoadVector64x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X2_V64},
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
{SN_LoadVector64x2AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2_V64},
{SN_LoadVector64x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X3_V64},
{SN_LoadVector64x3AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3_V64},
{SN_LoadVector64x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X4_V64},
{SN_LoadVector64x4AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4_V64},
{SN_Max, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SMAX, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_UMAX, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_FMAX},
{SN_MaxAcross, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_SMAXV, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_UMAXV, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_FMAXV},
{SN_MaxNumber, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_FMAXNM},
Expand Down
12 changes: 12 additions & 0 deletions src/mono/mono/mini/simd-methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ METHOD(Extract)
METHOD(LoadHigh)
METHOD(LoadLow)
METHOD(LoadVector128)
METHOD(LoadVector128x2)
METHOD(LoadVector128x3)
METHOD(LoadVector128x4)
METHOD(LoadVector128x2AndUnzip)
METHOD(LoadVector128x3AndUnzip)
METHOD(LoadVector128x4AndUnzip)
METHOD(LoadScalarVector128)
METHOD(MoveHighToLow)
METHOD(MoveLowToHigh)
Expand Down Expand Up @@ -436,6 +442,12 @@ METHOD(LoadPairVector128NonTemporal)
METHOD(LoadPairVector64)
METHOD(LoadPairVector64NonTemporal)
METHOD(LoadVector64)
METHOD(LoadVector64x2)
METHOD(LoadVector64x3)
METHOD(LoadVector64x4)
METHOD(LoadVector64x2AndUnzip)
METHOD(LoadVector64x3AndUnzip)
METHOD(LoadVector64x4AndUnzip)
METHOD(MaxAcross)
METHOD(MaxNumber)
METHOD(MaxNumberAcross)
Expand Down
Loading
Loading