Skip to content

8359419: AArch64: Relax min vector length to 32-bit for short vectors #26057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2362,17 +2362,34 @@ int Matcher::max_vector_size(const BasicType bt) {
}

int Matcher::min_vector_size(const BasicType bt) {
int max_size = max_vector_size(bt);
// Limit the min vector size to 8 bytes.
int size = 8 / type2aelembytes(bt);
if (bt == T_BYTE) {
// To support vector api shuffle/rearrange.
size = 4;
} else if (bt == T_BOOLEAN) {
// To support vector api load/store mask.
size = 2;
// Theoretically, the minimal vector length supported by AArch64
// ISA and Vector API species is 64-bit. However, 32-bit or 16-bit
// vector length is also allowed for special Vector API usages.
Comment on lines +2365 to +2367
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Theoretically, the minimal vector length supported by AArch64
// ISA and Vector API species is 64-bit. However, 32-bit or 16-bit
// vector length is also allowed for special Vector API usages.
// Usually, the shortest vector length supported by AArch64
// ISA and Vector API species is 64 bits. However, we allow
// 32-bit or 16-bit vectors in a few special cases.

Reason for change: it wasn't clear what "supported" meant. Supported by the hardware, or by HotSpot. And why do we only support it in a few special cases? This comment raises more questions than it answers.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks so much for your suggestion! Looks better to me. I will update soon.

int size;
switch(bt) {
case T_BOOLEAN:
// Load/store a vector mask with only 2 elements for vector types
// such as "2I/2F/2L/2D".
size = 2;
break;
case T_BYTE:
// Generate a "4B" vector, to support vector cast between "8B/16B"
// and "4S/4I/4L/4F/4D".
size = 4;
break;
case T_SHORT:
// Generate a "2S" vector, to support vector cast between "4S/8S"
// and "2I/2L/2F/2D".
size = 2;
break;
default:
// Limit the min vector length to 64-bit.
size = 8 / type2aelembytes(bt);
// The number of elements in a vector should be at least 2.
size = MAX2(size, 2);
}
if (size < 2) size = 2;

int max_size = max_vector_size(bt);
return MIN2(size, max_size);
}

Expand Down
96 changes: 59 additions & 37 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ source %{
return false;
}
break;
case Op_AddReductionVI:
case Op_AndReductionV:
case Op_OrReductionV:
case Op_XorReductionV:
case Op_MinReductionV:
case Op_MaxReductionV:
// Reductions with less than 8 bytes vector length are
// not supported.
if (length_in_bytes < 8) {
return false;
}
break;
case Op_MulReductionVD:
case Op_MulReductionVF:
case Op_MulReductionVI:
Expand Down Expand Up @@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{
assert(bt == T_INT || bt == T_LONG, "must be");
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S to 4I
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
// 2S to 2I/2L, 4S to 4I
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
$src$$FloatRegister, T_SHORT, /* is_unsigned */ true);
} else {
assert(UseSVE > 0, "must be sve");
Expand All @@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 2I to 2L
__ neon_vector_extend($dst$$FloatRegister, T_LONG, length_in_bytes,
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
$src$$FloatRegister, T_INT, /* is_unsigned */ true);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_vector_extend($dst$$FloatRegister, __ D,
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$src$$FloatRegister, __ S, /* is_unsigned */ true);
}
%}
Expand Down Expand Up @@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S to 4I/4F
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
$src$$FloatRegister, T_SHORT);
if (bt == T_FLOAT) {
__ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
if (is_floating_point_type(bt)) {
// 2S to 2F/2D, 4S to 4F
__ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
length_in_bytes, $src$$FloatRegister, T_SHORT);
__ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
} else {
// 2S to 2I/2L, 4S to 4I
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
$src$$FloatRegister, T_SHORT);
}
} else {
assert(UseSVE > 0, "must be sve");
Expand All @@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
effect(TEMP_DEF dst);
format %{ "vcvtItoX_narrow_neon $dst, $src" %}
ins_encode %{
// 4I to 4B/4S
// 2I to 2S, 4I to 4B/4S
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ neon_vector_narrow($dst$$FloatRegister, bt,
Expand Down Expand Up @@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{

// VectorCastL2X

instruct vcvtLtoI_neon(vReg dst, vReg src) %{
predicate(Matcher::vector_element_basic_type(n) == T_INT &&
instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT ||
Matcher::vector_element_basic_type(n) == T_SHORT) &&
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
match(Set dst (VectorCastL2X src));
format %{ "vcvtLtoI_neon $dst, $src" %}
format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
ins_encode %{
// 2L to 2I
// 2L to 2S/2I
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ neon_vector_narrow($dst$$FloatRegister, T_INT,
__ neon_vector_narrow($dst$$FloatRegister, bt,
$src$$FloatRegister, T_LONG, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}

instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT &&
!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
Matcher::vector_element_basic_type(n) == T_BYTE ||
Matcher::vector_element_basic_type(n) == T_SHORT);
instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
!is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
match(Set dst (VectorCastL2X src));
effect(TEMP_DEF dst, TEMP tmp);
format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
BasicType bt = Matcher::vector_element_basic_type(this);
Expand Down Expand Up @@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
effect(TEMP_DEF dst);
format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
ins_encode %{
// 4F to 4B/4S
// 2F to 2S, 4F to 4B/4S
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
__ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
$src$$FloatRegister);
__ neon_vector_narrow($dst$$FloatRegister, bt,
$dst$$FloatRegister, T_INT, length_in_bytes);
%}
Expand Down Expand Up @@ -4590,19 +4608,25 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
// VectorCastD2X

instruct vcvtDtoI_neon(vReg dst, vReg src) %{
predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
predicate(UseSVE == 0 &&
(Matcher::vector_element_basic_type(n) == T_INT ||
Matcher::vector_element_basic_type(n) == T_SHORT));
match(Set dst (VectorCastD2X src));
effect(TEMP_DEF dst);
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %}
ins_encode %{
// 2D to 2I
// 2D to 2S/2I
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
// We can't use fcvtzs(vector, integer) instruction here because we need
// saturation arithmetic. See JDK-8276151.
__ fcvtzdw(rscratch1, $src$$FloatRegister);
__ fcvtzdw(rscratch2, $dst$$FloatRegister);
__ fmovs($dst$$FloatRegister, rscratch1);
__ mov($dst$$FloatRegister, __ S, 1, rscratch2);
if (Matcher::vector_element_basic_type(this) == T_SHORT) {
__ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
$dst$$FloatRegister, T_INT, 8);
}
%}
ins_pipe(pipe_slow);
%}
Expand Down Expand Up @@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
} else {
assert(bt == T_SHORT || bt == T_INT, "unsupported");
if (UseSVE == 0) {
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
__ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister);
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$dst$$FloatRegister);
assert(length_in_bytes <= 16, "unsupported");
bool isQ = length_in_bytes == 16;
__ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
__ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
if (bt == T_INT) {
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
$dst$$FloatRegister);
__ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
}
} else {
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
Expand Down Expand Up @@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
format %{ "vblend_neon $dst, $src1, $src2" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
assert(length_in_bytes <= 16, "must be");
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
Expand Down Expand Up @@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
} else {
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
if (UseSVE == 0) {
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
assert(length_in_bytes <= 16, "unsupported");
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
bt, /* isQ */ length_in_bytes == 16);
if (bt != T_LONG) {
Expand Down Expand Up @@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{
} else {
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
if (UseSVE == 0) {
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
assert(length_in_bytes <= 16, "unsupported");
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
bt, /* isQ */ length_in_bytes == 16);
} else {
Expand Down Expand Up @@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
assert(length_in_bytes <= 16, "unsupported");
if (bt == T_BYTE) {
if ($dst$$FloatRegister != $src$$FloatRegister) {
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
Expand Down
Loading