Skip to content

Commit

Permalink
Fix NestedLoopJoin with empty build or probe with no join condition (f…
Browse files Browse the repository at this point in the history
…acebookincubator#9892)

Summary:

Join with no condition is a cross product. The existing code avoid 
adding mismatch to the result after cross product because cross 
product should have matched all input rows (facebookincubator#6010). But there 
is an exception. When the build or probe side is empty, this cross 
product is empty too. Hence for left, right, and full join, mismatch 
should still be produced. This diff fixes this bug by still adding the 
mismatch to the result if either build or probe side is empty.

Reviewed By: Yuhta

Differential Revision: D57681090
  • Loading branch information
kagamiori authored and facebook-github-bot committed May 24, 2024
1 parent 6f57b9c commit 1e5ae32
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 1 deletion.
11 changes: 10 additions & 1 deletion velox/exec/NestedLoopJoinProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ void NestedLoopJoinProbe::addInput(RowVectorPtr input) {
child->loadedVector();
}
input_ = std::move(input);
if (input_->size() > 0) {
probeSideEmpty_ = false;
}
VELOX_CHECK_EQ(buildIndex_, 0);
if (needsProbeMismatch(joinType_)) {
probeMatched_.resizeFill(input_->size(), false);
Expand Down Expand Up @@ -238,7 +241,12 @@ RowVectorPtr NestedLoopJoinProbe::getMismatchedOutput(
BufferPtr& unmatchedMapping,
const std::vector<IdentityProjection>& projections,
const std::vector<IdentityProjection>& nullProjections) {
if (matched.isAllSelected() || joinCondition_ == nullptr) {
// If data is all matched or the join is a cross product, there is no
// mismatched rows. But there is an exception that if the join is a cross
// product but the build or probe side is empty, there could still be
// mismatched rows from the other side.
if (matched.isAllSelected() ||
(joinCondition_ == nullptr && !probeSideEmpty_ && !buildSideEmpty_)) {
return nullptr;
}

Expand Down Expand Up @@ -311,6 +319,7 @@ void NestedLoopJoinProbe::beginBuildMismatch() {
VELOX_CHECK_NOT_NULL(probe);
for (auto i = 0; i < buildMatched_.size(); ++i) {
buildMatched_[i].select(probe->buildMatched_[i]);
probeSideEmpty_ &= probe->probeSideEmpty_;
}
}
peers.clear();
Expand Down
1 change: 1 addition & 0 deletions velox/exec/NestedLoopJoinProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class NestedLoopJoinProbe : public Operator {
std::vector<IdentityProjection> filterProbeProjections_;
BufferPtr probeOutMapping_;
BufferPtr probeIndices_;
bool probeSideEmpty_{true};

// Build side state
std::optional<std::vector<RowVectorPtr>> buildVectors_;
Expand Down
67 changes: 67 additions & 0 deletions velox/exec/tests/NestedLoopJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
#include "velox/exec/tests/utils/PlanBuilder.h"
#include "velox/exec/tests/utils/VectorTestUtil.h"
Expand Down Expand Up @@ -131,6 +132,72 @@ class NestedLoopJoinTest : public HiveConnectorTestBase {
buildKeyName_)};
};

TEST_F(NestedLoopJoinTest, aaa) {
auto empty = makeRowVector({"u0"}, {makeFlatVector<StringView>({})});
auto nonEmpty = makeRowVector({"t0"}, {makeFlatVector<StringView>({"foo"})});
auto expected = makeRowVector({makeFlatVector<StringView>({"foo", "foo"})});

auto testJoin = [&](const std::vector<RowVectorPtr>& leftVectors,
const std::vector<RowVectorPtr>& rightVectors,
core::JoinType joinType,
const std::vector<std::string>& outputLayout,
const VectorPtr& expected) {
auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
auto plan = PlanBuilder(planNodeIdGenerator)
.values(leftVectors)
.localPartitionRoundRobinRow()
.nestedLoopJoin(
PlanBuilder(planNodeIdGenerator)
.values(rightVectors)
.localPartition({})
.planNode(),
"",
outputLayout,
joinType)
.planNode();
AssertQueryBuilder builder{plan};
auto result = builder.copyResults(pool());
facebook::velox::test::assertEqualVectors(expected, result);
};

testJoin({nonEmpty}, {empty}, core::JoinType::kLeft, {"t0"}, nonEmpty);
testJoin(
{nonEmpty, nonEmpty},
{empty, empty},
core::JoinType::kLeft,
{"t0"},
expected);
testJoin({empty}, {nonEmpty}, core::JoinType::kLeft, {"u0"}, empty);

testJoin({empty}, {nonEmpty}, core::JoinType::kRight, {"t0"}, nonEmpty);
testJoin(
{empty, empty},
{nonEmpty, nonEmpty},
core::JoinType::kRight,
{"t0"},
expected);
testJoin({nonEmpty}, {empty}, core::JoinType::kRight, {"u0"}, empty);

testJoin(
{nonEmpty, nonEmpty},
{empty, empty},
core::JoinType::kFull,
{"t0", "u0"},
makeRowVector({
makeFlatVector<StringView>({"foo", "foo"}),
makeNullableFlatVector<StringView>({std::nullopt, std::nullopt}),
}));
testJoin(
{empty, empty},
{nonEmpty, nonEmpty},
core::JoinType::kFull,
{"u0", "t0"},
makeRowVector({
makeNullableFlatVector<StringView>({std::nullopt, std::nullopt}),
makeFlatVector<StringView>({"foo", "foo"}),
}));
}

TEST_F(NestedLoopJoinTest, basic) {
auto probeVectors = makeBatches(20, 5, probeType_, pool_.get());
auto buildVectors = makeBatches(18, 5, buildType_, pool_.get());
Expand Down

0 comments on commit 1e5ae32

Please sign in to comment.