Skip to content

Commit

Permalink
Merge Pull Request #10706 from brian-kelley/Trilinos/SpeedupAdditiveS…
Browse files Browse the repository at this point in the history
…chwarz

Automatically Merged using Trilinos Pull Request AutoTester
PR Title: Ifpack2: Speedup additive schwarz, fix #9606
PR Author: brian-kelley
  • Loading branch information
trilinos-autotester committed Jul 14, 2022
2 parents e599bf1 + f8d10ff commit 6c842cb
Show file tree
Hide file tree
Showing 16 changed files with 1,692 additions and 265 deletions.
1 change: 1 addition & 0 deletions packages/ifpack2/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ IF(Ifpack2_ENABLE_EXPLICIT_INSTANTIATION)
SparsityFilter
ContainerFactory
TriDiContainer
Details::AdditiveSchwarzFilter
Details::Chebyshev
Details::ChebyshevKernel
Details::DenseSolver
Expand Down
9 changes: 8 additions & 1 deletion packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "Ifpack2_Preconditioner.hpp"
#include "Ifpack2_Details_CanChangeMatrix.hpp"
#include "Ifpack2_Details_NestedPreconditioner.hpp"
#include "Ifpack2_OverlappingRowMatrix.hpp"
#include "Tpetra_Map.hpp"
#include "Tpetra_MultiVector.hpp"
#include "Tpetra_RowMatrix.hpp"
Expand Down Expand Up @@ -329,6 +330,12 @@ class AdditiveSchwarz :
using row_matrix_type =
Tpetra::RowMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

//! The Tpetra::CrsMatrix specialization that is a subclass of MatrixType.
using crs_matrix_type =
Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

//@}
// \name Constructors and destructor
//@{
Expand Down Expand Up @@ -774,7 +781,7 @@ class AdditiveSchwarz :
/// \brief The overlapping matrix.
///
/// If nonnull, this is an instance of OverlappingRowMatrix<row_matrix_type>.
Teuchos::RCP<row_matrix_type> OverlappingMatrix_;
Teuchos::RCP<OverlappingRowMatrix<row_matrix_type>> OverlappingMatrix_;

//! The reordered matrix.
Teuchos::RCP<row_matrix_type> ReorderedLocalizedMatrix_;
Expand Down
440 changes: 284 additions & 156 deletions packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp

Large diffs are not rendered by default.

436 changes: 436 additions & 0 deletions packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp

Large diffs are not rendered by default.

769 changes: 769 additions & 0 deletions packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ setMatrix (const Teuchos::RCP<const operator_type>& A)
using Teuchos::rcp_dynamic_cast;
A_crs_ = rcp_dynamic_cast<const crs_matrix_type> (A);

TEUCHOS_TEST_FOR_EXCEPTION
(A_crs_.is_null(), std::logic_error,
"Ifpack2::Details::InverseDiagonalKernel: operator A must be a Tpetra::CrsMatrix.");

const size_t lclNumRows = A_crs_->getRowMap ()->getLocalNumElements ();

if (offsets_.extent (0) < lclNumRows) {
Expand Down
72 changes: 72 additions & 0 deletions packages/ifpack2/src/Ifpack2_Details_getCrsMatrix.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*@HEADER
// ***********************************************************************
//
// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
// Copyright (2009) Sandia Corporation
//
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
// license for use of this work by or on behalf of the U.S. Government.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
//
// ***********************************************************************
//@HEADER
*/

#ifndef IFPACK2_DETAILS_GETCRSMATRIX_HPP
#define IFPACK2_DETAILS_GETCRSMATRIX_HPP

#include "Ifpack2_ConfigDefs.hpp"
#include "Tpetra_CrsMatrix.hpp"
#include "Ifpack2_Details_AdditiveSchwarzFilter.hpp"

namespace Ifpack2 {
namespace Details {

//Helper to get A as a Tpetra::CrsMatrix, if that is possible cheaply and without copying.
//In the simplest case (when A is already a CrsMatrix), this is just a dynamic cast.
template<typename SC, typename LO, typename GO, typename NO>
Teuchos::RCP<const Tpetra::CrsMatrix<SC, LO, GO, NO>> getCrsMatrix(const Teuchos::RCP<const Tpetra::RowMatrix<SC, LO, GO, NO>>& A)
{
using row_matrix_type = Tpetra::RowMatrix<SC, LO, GO, NO>;
using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NO>;
auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
if(!Acrs.is_null())
return Acrs;
auto Aasf = Teuchos::rcp_dynamic_cast<const AdditiveSchwarzFilter<row_matrix_type>>(A);
if(!Aasf.is_null())
return Aasf->getFilteredMatrix();
return Teuchos::null;
}

} // namespace Details
} // namespace Ifpack2

#endif
24 changes: 18 additions & 6 deletions packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ class OverlappingRowMatrix :

using row_matrix_type = Tpetra::RowMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;
using crs_matrix_type = Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

// device typedefs
typedef typename MatrixType::node_type::device_type device_type;
typedef typename device_type::execution_space execution_space;
typedef typename MatrixType::local_inds_device_view_type local_inds_device_view_type;
typedef typename MatrixType::global_inds_device_view_type global_inds_device_view_type;
typedef typename MatrixType::values_device_view_type values_device_view_type;

static_assert(std::is_same<MatrixType, row_matrix_type>::value, "Ifpack2::OverlappingRowMatrix: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine.");

Expand Down Expand Up @@ -341,18 +350,20 @@ class OverlappingRowMatrix :

void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const;

virtual Teuchos::RCP<const row_matrix_type> getUnderlyingMatrix() const;
Teuchos::RCP<const crs_matrix_type> getUnderlyingMatrix() const;

Teuchos::RCP<const crs_matrix_type> getExtMatrix() const;

Teuchos::RCP<const row_matrix_type> getExtMatrix() const;
Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type> getExtHaloStarts() const;
typename Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type>::HostMirror getExtHaloStartsHost() const;

Teuchos::ArrayView<const size_t> getExtHaloStarts() const;
void doExtImport();

private:
typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
typedef Tpetra::Export<local_ordinal_type, global_ordinal_type, node_type> export_type;
typedef Tpetra::RowGraph<local_ordinal_type, global_ordinal_type, node_type> row_graph_type;
typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
typedef Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> vector_type;

//! The input matrix to the constructor.
Expand All @@ -369,10 +380,11 @@ class OverlappingRowMatrix :
Teuchos::RCP<const import_type> Importer_;

//! The matrix containing only the overlap rows.
Teuchos::RCP<const crs_matrix_type> ExtMatrix_;
Teuchos::RCP<crs_matrix_type> ExtMatrix_;
Teuchos::RCP<const map_type> ExtMap_;
Teuchos::RCP<const import_type> ExtImporter_;
Teuchos::Array<size_t> ExtHaloStarts_;
Kokkos::View<size_t*, device_type> ExtHaloStarts_;
typename Kokkos::View<size_t*, device_type>::HostMirror ExtHaloStarts_h;

//! Graph of the matrix (as returned by getGraph()).
Teuchos::RCP<const row_graph_type> graph_;
Expand Down
73 changes: 39 additions & 34 deletions packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
"Ifpack2::OverlappingRowMatrix: Matrix must be "
"distributed over more than one MPI process.");


RCP<const crs_graph_type> A_crsGraph = A_->getCrsGraph ();
const size_t numMyRowsA = A_->getLocalNumRows ();
const global_ordinal_type global_invalid =
Expand All @@ -93,11 +94,12 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
RCP<crs_graph_type> TmpGraph;
RCP<import_type> TmpImporter;
RCP<const map_type> RowMap, ColMap;
ExtHaloStarts_.resize(OverlapLevel_+1);
Kokkos::resize(ExtHaloStarts_, OverlapLevel_+1);
ExtHaloStarts_h = Kokkos::create_mirror_view(ExtHaloStarts_);

// The big import loop
for (int overlap = 0 ; overlap < OverlapLevel_ ; ++overlap) {
ExtHaloStarts_[overlap] = (size_t) ExtElements.size();
ExtHaloStarts_h(overlap) = (size_t) ExtElements.size();

// Get the current maps
if (overlap == 0) {
Expand Down Expand Up @@ -128,28 +130,24 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
}
}

// mfh 24 Nov 2013: We don't need TmpMap, TmpGraph, or
// TmpImporter after this loop, so we don't have to construct them
// on the last round.
// On last import round, TmpMap, TmpGraph, and TmpImporter are unneeded,
// so don't build them.
if (overlap + 1 < OverlapLevel_) {
// Allocate & import new matrices, maps, etc.
//
// FIXME (mfh 24 Nov 2013) Do we always want to use index base
// zero? It doesn't really matter, since the actual index base
// (in the current implementation of Map) will always be the
// globally least GID.
//map consisting of GIDs that are in the current halo level-set
TmpMap = rcp (new map_type (global_invalid, mylist (0, count),
Teuchos::OrdinalTraits<global_ordinal_type>::zero (),
A_->getComm ()));
//graph whose rows are the current halo level-set to import
TmpGraph = rcp (new crs_graph_type (TmpMap, 0));
TmpImporter = rcp (new import_type (A_->getRowMap (), TmpMap));

//import from original matrix graph to current halo level-set graph
TmpGraph->doImport (*A_crsGraph, *TmpImporter, Tpetra::INSERT);
TmpGraph->fillComplete (A_->getDomainMap (), TmpMap);
}
} // end overlap loop
ExtHaloStarts_[OverlapLevel_] = (size_t) ExtElements.size();

ExtHaloStarts_h[OverlapLevel_] = (size_t) ExtElements.size();
Kokkos::deep_copy(ExtHaloStarts_,ExtHaloStarts_h);

// build the map containing all the nodes (original
// matrix + extended matrix)
Expand All @@ -161,6 +159,7 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
mylist[i + numMyRowsA] = ExtElements[i];
}


RowMap_ = rcp (new map_type (global_invalid, mylist (),
Teuchos::OrdinalTraits<global_ordinal_type>::zero (),
A_->getComm ()));
Expand All @@ -175,11 +174,9 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_));

{
RCP<crs_matrix_type> ExtMatrix_nc =
rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_nc->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_nc->fillComplete (A_->getDomainMap (), RowMap_);
ExtMatrix_ = ExtMatrix_nc; // we only need the const version after here
ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_);
}

// fix indices for overlapping matrix
Expand Down Expand Up @@ -370,7 +367,7 @@ getNumEntriesInLocalRow (local_ordinal_type localRow) const
template<class MatrixType>
size_t OverlappingRowMatrix<MatrixType>::getGlobalMaxNumRowEntries() const
{
throw std::runtime_error("Ifpack2::OverlappingRowMatrix::getGlobalMaxNumRowEntries() not supported.");
return std::max<size_t>(A_->getGlobalMaxNumRowEntries(), ExtMatrix_->getGlobalMaxNumRowEntries());
}


Expand Down Expand Up @@ -417,16 +414,7 @@ OverlappingRowMatrix<MatrixType>::
nonconst_values_host_view_type &Values,
size_t& NumEntries) const
{
const local_ordinal_type LocalRow = RowMap_->getLocalElement (GlobalRow);
if (LocalRow == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) {
NumEntries = Teuchos::OrdinalTraits<size_t>::invalid ();
} else {
if (Teuchos::as<size_t> (LocalRow) < A_->getLocalNumRows ()) {
A_->getGlobalRowCopy (GlobalRow, Indices, Values, NumEntries);
} else {
ExtMatrix_->getGlobalRowCopy (GlobalRow, Indices, Values, NumEntries);
}
}
throw std::runtime_error("Ifpack2::OverlappingRowMatrix::getGlobalRowCopy() not supported.");
}

template<class MatrixType>
Expand Down Expand Up @@ -483,7 +471,6 @@ OverlappingRowMatrix<MatrixType>::

}


template<class MatrixType>
void
OverlappingRowMatrix<MatrixType>::
Expand Down Expand Up @@ -819,25 +806,43 @@ void OverlappingRowMatrix<MatrixType>::describe(Teuchos::FancyOStream &out,
}

template<class MatrixType>
Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
Teuchos::RCP<const Tpetra::CrsMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
OverlappingRowMatrix<MatrixType>::getUnderlyingMatrix() const
{
return A_;
}

template<class MatrixType>
Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
Teuchos::RCP<const Tpetra::CrsMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
OverlappingRowMatrix<MatrixType>::getExtMatrix() const
{
return ExtMatrix_;
}

template<class MatrixType>
Teuchos::ArrayView<const size_t> OverlappingRowMatrix<MatrixType>::getExtHaloStarts() const
Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type>
OverlappingRowMatrix<MatrixType>::getExtHaloStarts() const
{
return ExtHaloStarts_;
}

template<class MatrixType>
typename Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type>::HostMirror
OverlappingRowMatrix<MatrixType>::getExtHaloStartsHost() const
{
return ExtHaloStarts_();
return ExtHaloStarts_h;
}

template<class MatrixType>
void OverlappingRowMatrix<MatrixType>::doExtImport()
{
//TODO: CrsMatrix can't doImport after resumeFill (see #9720). Ideally, this import could
//happen using combine mode REPLACE without reconstructing the matrix.
//Maybe even without another fillComplete since this doesn't change structure - see #9655.
ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_);
}

} // namespace Ifpack2

Expand Down
11 changes: 5 additions & 6 deletions packages/ifpack2/src/Ifpack2_RILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "Teuchos_StandardParameterEntryValidators.hpp"
#include "Ifpack2_LocalSparseTriangularSolver.hpp"
#include "Ifpack2_Details_getParamTryingTypes.hpp"
#include "Ifpack2_Details_getCrsMatrix.hpp"
#include "Kokkos_Sort.hpp"
#include "KokkosKernels_SparseUtils.hpp"
#include "KokkosKernels_Sorting.hpp"
Expand Down Expand Up @@ -513,9 +514,8 @@ void RILUK<MatrixType>::initialize ()
}

{
RCP<const crs_matrix_type> A_local_crs =
rcp_dynamic_cast<const crs_matrix_type> (A_local_);
if (A_local_crs.is_null ()) {
RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
if(A_local_crs.is_null()) {
local_ordinal_type numRows = A_local_->getLocalNumRows();
Array<size_t> entriesPerRow(numRows);
for(local_ordinal_type i = 0; i < numRows; i++) {
Expand Down Expand Up @@ -907,9 +907,8 @@ void RILUK<MatrixType>::compute ()
}
else {
{//Make sure values in A is picked up even in case of pattern reuse
RCP<const crs_matrix_type> A_local_crs =
rcp_dynamic_cast<const crs_matrix_type> (A_local_);
if (A_local_crs.is_null ()) {
RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
if(A_local_crs.is_null()) {
local_ordinal_type numRows = A_local_->getLocalNumRows();
Array<size_t> entriesPerRow(numRows);
for(local_ordinal_type i = 0; i < numRows; i++) {
Expand Down
Loading

0 comments on commit 6c842cb

Please sign in to comment.