Skip to content

Commit

Permalink
Merge 'trilinos/Trilinos:develop' (85a5ff2) into 'tcad-charon/Trilino…
Browse files Browse the repository at this point in the history
…s:develop' (8e7a547).

* trilinos-develop:
  Tpetra: Fixing WrappedDualView issues
  Xpetra: Undoing change
  MueLu: Removing BlockCrs transpose placeholder behavior
  Tpetra: Minor fix for transpose behavior
  Tpetra: Adding BlockCrsMatrix Transpose capability
  • Loading branch information
Charonops Jenkins Pipeline committed Oct 4, 2022
2 parents 8e7a547 + 85a5ff2 commit c748ef0
Show file tree
Hide file tree
Showing 9 changed files with 450 additions and 50 deletions.
16 changes: 5 additions & 11 deletions packages/muelu/src/Utils/MueLu_Utilities_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,25 +919,19 @@ namespace MueLu {
using BCRS = Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
using CRS = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
const BCRS & tpetraOp = Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(Op);

if(!Op.getRowMap()->getComm()->getRank())
std::cout<<"WARNING: Utilities::Transpose(): Using inefficient placeholder algorithm for Transpose"<<std::endl;

RCP<BCRS> At;
RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpetraOp);
{
Tpetra::RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(Acrs,label);

Tpetra::BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(rcpFromRef(tpetraOp),label);
using Teuchos::ParameterList;
using Teuchos::rcp;
RCP<ParameterList> transposeParams = params.is_null () ?
rcp (new ParameterList) :
rcp (new ParameterList (*params));
rcp (new ParameterList (*params));
transposeParams->set ("sort", false);
RCP<CRS> Atcrs = transposer.createTranspose(transposeParams);

At = Tpetra::convertToBlockCrsMatrix(*Atcrs,Op.GetStorageBlockSize());
At = transposer.createTranspose(transposeParams);
}

RCP<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > AA = rcp(new Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>(At));
RCP<CrsMatrix> AAA = rcp_implicit_cast<CrsMatrix>(AA);
RCP<Matrix> AAAA = rcp( new CrsMatrixWrap(AAA));
Expand Down
11 changes: 3 additions & 8 deletions packages/muelu/src/Utils/MueLu_Utilities_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,24 +594,19 @@ namespace MueLu {
using CRS = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
const BCRS & tpetraOp = Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(Op);

if(!Op.getRowMap()->getComm()->getRank())
std::cout<<"WARNING: Utilities::Transpose(): Using inefficient placeholder algorithm for Transpose"<<std::endl;

RCP<BCRS> At;
RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpetraOp);
{
Tpetra::RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(Acrs,label);
Tpetra::BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(rcpFromRef(tpetraOp),label);

using Teuchos::ParameterList;
using Teuchos::rcp;
RCP<ParameterList> transposeParams = params.is_null () ?
rcp (new ParameterList) :
rcp (new ParameterList (*params));
transposeParams->set ("sort", false);
RCP<CRS> Atcrs = transposer.createTranspose(transposeParams);

At = Tpetra::convertToBlockCrsMatrix(*Atcrs,Op.GetStorageBlockSize());
At = transposer.createTranspose(transposeParams);
}

RCP<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > AA = rcp(new Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>(At));
RCP<XCrsMatrix> AAA = rcp_implicit_cast<XCrsMatrix>(AA);
RCP<XMatrix> AAAA = rcp( new XCrsMatrixWrap(AAA));
Expand Down
34 changes: 34 additions & 0 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ importAndFillCompleteBlockCrsMatrix (const Teuchos::RCP<const BlockCrsMatrixType
const Import<typename BlockCrsMatrixType::local_ordinal_type,
typename BlockCrsMatrixType::global_ordinal_type,
typename BlockCrsMatrixType::node_type>& importer);
template<class BlockCrsMatrixType>
Teuchos::RCP<BlockCrsMatrixType>
exportAndFillCompleteBlockCrsMatrix (const Teuchos::RCP<const BlockCrsMatrixType>& sourceMatrix,
const Export<typename BlockCrsMatrixType::local_ordinal_type,
typename BlockCrsMatrixType::global_ordinal_type,
typename BlockCrsMatrixType::node_type>& exporter);

/// \class BlockCrsMatrix
/// \brief Sparse matrix whose entries are small dense square blocks,
Expand Down Expand Up @@ -408,6 +414,13 @@ class BlockCrsMatrix :
importAndFillComplete (Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >& destMatrix,
const Import<LO, GO, Node>& importer) const;

/// \brief Import from <tt>this</tt> to the given destination
/// matrix, and make the result fill complete.
void
exportAndFillComplete (Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >& destMatrix,
const Export<LO, GO, Node>& exporter) const;


/// \brief Replace values at the given (mesh, i.e., block) column
/// indices, in the given (mesh, i.e., block) row.
///
Expand Down Expand Up @@ -1239,6 +1252,14 @@ class BlockCrsMatrix :
const Import<typename BlockCrsMatrixType::local_ordinal_type,
typename BlockCrsMatrixType::global_ordinal_type,
typename BlockCrsMatrixType::node_type>& importer);
// Friend declaration for nonmember function.
template<class BlockCrsMatrixType>
friend Teuchos::RCP<BlockCrsMatrixType>
Tpetra::exportAndFillCompleteBlockCrsMatrix (const Teuchos::RCP<const BlockCrsMatrixType>& sourceMatrix,
const Export<typename BlockCrsMatrixType::local_ordinal_type,
typename BlockCrsMatrixType::global_ordinal_type,
typename BlockCrsMatrixType::node_type>& exporter);

};

template<class BlockCrsMatrixType>
Expand All @@ -1253,6 +1274,19 @@ importAndFillCompleteBlockCrsMatrix (const Teuchos::RCP<const BlockCrsMatrixType
return destMatrix;
}


template<class BlockCrsMatrixType>
Teuchos::RCP<BlockCrsMatrixType>
exportAndFillCompleteBlockCrsMatrix (const Teuchos::RCP<const BlockCrsMatrixType>& sourceMatrix,
const Export<typename BlockCrsMatrixType::local_ordinal_type,
typename BlockCrsMatrixType::global_ordinal_type,
typename BlockCrsMatrixType::node_type>& exporter)
{
Teuchos::RCP<BlockCrsMatrixType> destMatrix;
sourceMatrix->exportAndFillComplete (destMatrix, exporter);
return destMatrix;
}

} // namespace Tpetra

#endif // TPETRA_BLOCKCRSMATRIX_DECL_HPP
29 changes: 29 additions & 0 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,35 @@ class GetLocalDiagCopy {
destMatrix->doImport(*this, importer, Tpetra::INSERT);
}


template<class Scalar, class LO, class GO, class Node>
void
BlockCrsMatrix<Scalar, LO, GO, Node>::
exportAndFillComplete (Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >& destMatrix,
const Export<LO, GO, Node>& exporter) const
{
using Teuchos::RCP;
using Teuchos::rcp;
using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;

// Right now, we make many assumptions...
TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
"destMatrix is required to be null.");

// BlockCrsMatrix requires a complete graph at construction.
// So first step is to import and fill complete the destGraph.
RCP<crs_graph_type> srcGraph = rcp (new crs_graph_type(this->getCrsGraph()));
RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
srcGraph->getDomainMap(),
srcGraph->getRangeMap());


// Final step, create and import the destMatrix.
destMatrix = rcp (new this_type (*destGraph, getBlockSize()));
destMatrix->doExport(*this, exporter, Tpetra::INSERT);
}


template<class Scalar, class LO, class GO, class Node>
void
BlockCrsMatrix<Scalar, LO, GO, Node>::
Expand Down
56 changes: 56 additions & 0 deletions packages/tpetra/core/src/Tpetra_RowMatrixTransposer_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

#include "Tpetra_RowMatrixTransposer_fwd.hpp"
#include "Tpetra_CrsMatrix_fwd.hpp"
#include "Tpetra_BlockCrsMatrix_fwd.hpp"
#include "Tpetra_Map_fwd.hpp"
#include "Teuchos_RCP.hpp"
#include <string>
Expand Down Expand Up @@ -120,6 +121,61 @@ class RowMatrixTransposer {
std::string label_;
};


/// \class BlockCrsMatrixTransposer
/// \brief Construct and (optionally) redistribute the explicitly
/// stored transpose of a BlockCrsMatrix.
///
/// This class takes the same template parameters as BlockCrsMatrix.
template<class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
class BlockCrsMatrixTransposer {
public:
//! @name Typedefs
//@{
typedef Scalar scalar_type;
typedef LocalOrdinal local_ordinal_type;
typedef GlobalOrdinal global_ordinal_type;
typedef Node node_type;

typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> bcrs_matrix_type;

//@}
//! @name Constructors
//@{

//! Constructor that takes the matrix to transpose.
BlockCrsMatrixTransposer (const Teuchos::RCP<const bcrs_matrix_type>& origMatrix,const std::string & label = std::string());

//@}
//! @name Methods for computing the explicit transpose.
//@{

//! Compute and return the transpose of the matrix given to the constructor.
Teuchos::RCP<bcrs_matrix_type> createTranspose(const Teuchos::RCP<Teuchos::ParameterList> &params=Teuchos::null);

/// \brief Compute and return the transpose of the matrix given to the constructor.
///
/// In this call, we (potentially) leave the matrix with an
/// overlapping row Map. This is a perfectly valid matrix, but
/// won't work correctly with some routines in Ifpack or Muelu.
///
/// \warning This routine leaves overlapping rows. Unless you're
/// sure that's OK, call createTranspose() instead.
Teuchos::RCP<bcrs_matrix_type> createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList> &params=Teuchos::null);

private:
//! The original matrix to be transposed.
Teuchos::RCP<const bcrs_matrix_type> origMatrix_;

//! Label for timers
std::string label_;
};


} // namespace Tpetra

#endif /* TPETRA_ROWMATRIXTRANSPOSER_DECL_HPP */
140 changes: 139 additions & 1 deletion packages/tpetra/core/src/Tpetra_RowMatrixTransposer_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP

#include "Tpetra_CrsMatrix.hpp"
#include "Tpetra_BlockCrsMatrix.hpp"
#include "Tpetra_Export.hpp"
#include "Tpetra_Details_computeOffsets.hpp"
#include "Tpetra_Details_shortSort.hpp"
Expand Down Expand Up @@ -202,14 +203,151 @@ createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList>& params)
origMatrix_->getDomainMap (),
myImport, myExport, graphParams));
}

/*************************************************************************/

template<class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
BlockCrsMatrixTransposer (const Teuchos::RCP<const bcrs_matrix_type>& origMatrix,
const std::string& label)
: origMatrix_ (origMatrix), label_ (label)
{}

template<class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
{
using Teuchos::RCP;
// Do the local transpose
RCP<bcrs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);

#ifdef HAVE_TPETRA_MMM_TIMINGS
const std::string prefix = std::string ("Tpetra ") + label_ + ": ";
using Teuchos::TimeMonitor;
TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose TAFC"));
#endif

// If transMatrixWithSharedRows has an exporter, that's what we
// want. If it doesn't, the rows aren't actually shared, and we're
// done!
using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
RCP<const export_type> exporter =
transMatrixWithSharedRows->getGraph ()->getExporter ();
if (exporter.is_null ()) {
return transMatrixWithSharedRows;
}
else {
Teuchos::ParameterList labelList;
#ifdef HAVE_TPETRA_MMM_TIMINGS
labelList.set("Timer Label", label_);
#endif
if(! params.is_null ()) {
const char paramName[] = "compute global constants";
labelList.set (paramName, params->get (paramName, true));
}
// Use the Export object to do a fused Export and fillComplete.
// This always sorts the local matrix after communication, so
// no need to set "sorted = false" in parameters.
return exportAndFillCompleteBlockCrsMatrix<bcrs_matrix_type>
(transMatrixWithSharedRows, *exporter);
}
}


// mfh 03 Feb 2013: In a definition outside the class like this, the
// return value is considered outside the class scope (for things like
// resolving typedefs), but the arguments are considered inside the
// class scope.
template<class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList>& params)
{
using Teuchos::Array;
using Teuchos::ArrayRCP;
using Teuchos::ArrayView;
using Teuchos::RCP;
using Teuchos::rcp;
using Teuchos::rcp_dynamic_cast;
using LO = LocalOrdinal;
using GO = GlobalOrdinal;
using import_type = Tpetra::Import<LO, GO, Node>;
using export_type = Tpetra::Export<LO, GO, Node>;
using crs_graph_type = typename bcrs_matrix_type::crs_graph_type;

#ifdef HAVE_TPETRA_MMM_TIMINGS
std::string prefix = std::string("Tpetra ") + label_ + ": ";
using Teuchos::TimeMonitor;
TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose Local"));
#endif

RCP<const bcrs_matrix_type> crsMatrix =
rcp_dynamic_cast<const bcrs_matrix_type> (origMatrix_);

if(crsMatrix.is_null())
TEUCHOS_ASSERT( false ); // not implemented

using local_matrix_device_type = typename bcrs_matrix_type::local_matrix_device_type;

typename local_matrix_device_type::values_type values ;
RCP<const crs_graph_type> graph;
{
local_matrix_device_type lclMatrix = crsMatrix->getLocalMatrixDevice ();

local_matrix_device_type lclTransposeMatrix = KokkosSparse::Impl::transpose_bsr_matrix(lclMatrix);

// BlockCrs requires that we sort stuff
KokkosSparse::sort_crs_matrix(lclTransposeMatrix);
values = lclTransposeMatrix.values;

// Prebuild the importers and exporters the no-communication way,
// flipping the importers and exporters around.
const auto origExport = origMatrix_->getGraph ()->getExporter ();
RCP<const import_type> myImport = origExport.is_null () ?
Teuchos::null : rcp (new import_type (*origExport));
const auto origImport = origMatrix_->getGraph ()->getImporter ();
RCP<const export_type> myExport = origImport.is_null () ?
Teuchos::null : rcp (new export_type (*origImport));

RCP<Teuchos::ParameterList> graphParams = Teuchos::null;

// Make the Transpose Graph
graph = rcp(new crs_graph_type(lclTransposeMatrix.graph,
origMatrix_->getColMap (),
origMatrix_->getRowMap (),
origMatrix_->getGraph()->getRangeMap (),
origMatrix_->getGraph()->getDomainMap (),
myImport,
myExport,
graphParams));
}
// Now make the matrix
return rcp (new bcrs_matrix_type (*graph,
values,
origMatrix_->getBlockSize()));
}
//


//
// Explicit instantiation macro
//
// Must be expanded from within the Tpetra namespace!
//

#define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
template class RowMatrixTransposer< SCALAR, LO , GO , NODE >;
template class RowMatrixTransposer< SCALAR, LO , GO , NODE >;\
template class BlockCrsMatrixTransposer< SCALAR, LO , GO , NODE >;

} // namespace Tpetra

Expand Down
Loading

0 comments on commit c748ef0

Please sign in to comment.