From c38291bf58eebf6f95ef0fe9e540a98ef8d060c2 Mon Sep 17 00:00:00 2001 From: Maarten Arnst Date: Tue, 3 Oct 2023 08:51:02 +0200 Subject: [PATCH 01/38] Use namespace to avoid Wdtor-name in clang --- packages/sacado/src/Sacado_Fad_VectorImp.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/sacado/src/Sacado_Fad_VectorImp.hpp b/packages/sacado/src/Sacado_Fad_VectorImp.hpp index c95fa93213b1..701331886df0 100644 --- a/packages/sacado/src/Sacado_Fad_VectorImp.hpp +++ b/packages/sacado/src/Sacado_Fad_VectorImp.hpp @@ -84,8 +84,11 @@ Vector(const Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& } } +namespace Sacado::Fad +{ + template -Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: +Vector< OrdinalType, DVFad >:: ~Vector() { // Here we must destroy the value and derivative arrays @@ -99,6 +102,8 @@ Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: } } +} // namespace Sacado::Fad + template Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: From 790f1907152780846961fa37b0d96ad375fc957e Mon Sep 17 00:00:00 2001 From: Eric Phipps Date: Thu, 5 Oct 2023 13:33:26 -0600 Subject: [PATCH 02/38] Sacado: Try to a different fix to make Intel compiler happy --- packages/sacado/src/Sacado_Fad_VectorImp.hpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/packages/sacado/src/Sacado_Fad_VectorImp.hpp b/packages/sacado/src/Sacado_Fad_VectorImp.hpp index 701331886df0..f4c7be4bfb30 100644 --- a/packages/sacado/src/Sacado_Fad_VectorImp.hpp +++ b/packages/sacado/src/Sacado_Fad_VectorImp.hpp @@ -84,12 +84,9 @@ Vector(const Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& } } -namespace Sacado::Fad -{ - template -Vector< OrdinalType, DVFad >:: -~Vector() +Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: +~Vector< OrdinalType, Sacado::Fad::DVFad >() { // Here we must destroy the value and derivative arrays if (vec_.size() > 0) { @@ -102,8 +99,6 @@ Vector< OrdinalType, DVFad >:: } } -} // namespace Sacado::Fad - template Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: From 3b71a4d130174cc75939f8b70ddaf16839136ed6 Mon Sep 17 00:00:00 2001 From: Eric Phipps Date: Fri, 6 Oct 2023 11:46:15 -0600 Subject: [PATCH 03/38] Sacado: Try a different fix --- packages/sacado/src/Sacado_Fad_VectorImp.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/sacado/src/Sacado_Fad_VectorImp.hpp b/packages/sacado/src/Sacado_Fad_VectorImp.hpp index f4c7be4bfb30..89bab43f7c2b 100644 --- a/packages/sacado/src/Sacado_Fad_VectorImp.hpp +++ b/packages/sacado/src/Sacado_Fad_VectorImp.hpp @@ -84,9 +84,11 @@ Vector(const Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& } } +namespace Sacado { +namespace Fad { template -Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: -~Vector< OrdinalType, Sacado::Fad::DVFad >() +Vector< OrdinalType, DVFad >:: +~Vector() { // Here we must destroy the value and derivative arrays if (vec_.size() > 0) { @@ -98,6 +100,8 @@ Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >:: } } } +} +} template Sacado::Fad::Vector< OrdinalType, Sacado::Fad::DVFad >& From 17f10197b7428d64200dfb3c5e3e819af00f5a3c Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 11 Sep 2024 16:05:57 -0600 Subject: [PATCH 04/38] FROSch : fix issues with multiple blocks --- .../FROSch_GDSWCoarseOperator_def.hpp | 378 +++++++++--------- .../FROSch_HarmonicCoarseOperator_def.hpp | 89 +++-- .../FROSch_IPOUHarmonicCoarseOperator_def.hpp | 39 +- .../FROSch_RGDSWCoarseOperator_def.hpp | 29 +- 4 files changed, 266 insertions(+), 269 deletions(-) diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_GDSWCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_GDSWCoarseOperator_def.hpp index 8b9bd5feaa0e..0ad1c90b789d 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_GDSWCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_GDSWCoarseOperator_def.hpp @@ -247,14 +247,7 @@ namespace FROSch { // Das könnte man noch ändern // TODO: DAS SOLLTE ALLES IN EINE FUNKTION IN HARMONICCOARSEOPERATOR - this->GammaDofs_.resize(this->GammaDofs_.size()+1); - this->IDofs_.resize(this->IDofs_.size()+1); - this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); - this->DofsMaps_.resize(this->DofsMaps_.size()+1); - this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - this->NumberOfBlocks_++; - - resetCoarseSpaceBlock(this->NumberOfBlocks_-1,dimension,dofsPerNode,nodesMap,dofsMaps,dirichletBoundaryDofs,nodeList); + resetCoarseSpaceBlock(this->NumberOfBlocks_,dimension,dofsPerNode,nodesMap,dofsMaps,dirichletBoundaryDofs,nodeList); return 0; } @@ -273,13 +266,7 @@ namespace FROSch { // Das könnte man noch ändern // TODO: DAS SOLLTE ALLES IN EINE FUNKTION IN HARMONICCOARSEOPERATOR for (UN i=0; iGammaDofs_.resize(this->GammaDofs_.size()+1); - this->IDofs_.resize(this->IDofs_.size()+1); - this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); - this->DofsMaps_.resize(this->DofsMaps_.size()+1); - this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - this->NumberOfBlocks_++; - resetCoarseSpaceBlock(this->NumberOfBlocks_-1,dimension,dofsPerNodeVec[i],repeatedNodesMapVec[i],repeatedDofMapsVec[i],dirichletBoundaryDofsVec[i],nodeListVec[i]); + resetCoarseSpaceBlock(this->NumberOfBlocks_,dimension,dofsPerNodeVec[i],repeatedNodesMapVec[i],repeatedDofMapsVec[i],dirichletBoundaryDofsVec[i],nodeListVec[i]); } return 0; } @@ -296,7 +283,7 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(resetCoarseSpaceBlockTime,"GDSWCoarseOperator::resetCoarseSpaceBlock"); FROSCH_ASSERT(dofsMaps.size()==dofsPerNode,"dofsMaps.size()!=dofsPerNode"); - FROSCH_ASSERT(blockIdNumberOfBlocks_,"Block does not exist yet and can therefore not be reset."); + FROSCH_ASSERT(blockId<=this->NumberOfBlocks_,"Block does not exist yet and can therefore not be reset("+to_string(blockId)+" <= "+to_string(this->NumberOfBlocks_)+". "); if (!this->DistributionList_->get("Type","linear").compare("ZoltanDual")) { FROSCH_ASSERT(false,"RGDSWCoarseOperator:: Distribution Type ZoltanDual only works for IPOUHarmonicCoarseOperator"); @@ -356,155 +343,28 @@ namespace FROSch { useFaceRotations = false; } - this->DofsMaps_[blockId] = dofsMaps; - this->DofsPerNode_[blockId] = dofsPerNode; - - Array tmpDirichletBoundaryDofs(dirichletBoundaryDofs()); // Here, we do a copy. Maybe, this is not necessary - sortunique(tmpDirichletBoundaryDofs); - - DDInterface_.reset(new DDInterface(dimension,this->DofsPerNode_[blockId],nodesMap.getConst(),verbosity,this->LevelID_,communicationStrategy)); - DDInterface_->resetGlobalDofs(dofsMaps); - DDInterface_->removeDirichletNodes(tmpDirichletBoundaryDofs()); - - EntitySetPtr interface = this->DDInterface_->getInterface(); - EntitySetPtr interior = this->DDInterface_->getInterior(); - - if (useForCoarseSpace && (useVertexTranslations||useShortEdgeTranslations||useShortEdgeRotations||useStraightEdgeTranslations||useStraightEdgeRotations||useEdgeTranslations||useEdgeRotations||useFaceTranslations||useFaceRotations)) { - - if (this->Verbose_) { - cout - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << setw(89) << "-----------------------------------------------------------------------------------------" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " - << left << setw(74) << "GDSWCoarseOperator " << right << setw(8) << "(Level " << setw(2) << this->LevelID_ << ")" - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << setw(89) << "=========================================================================================" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(41) << "Block" << right - << " | " << setw(41) << blockId - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(41) << "Spatial dimensions" << right - << " | " << setw(41) << dimension - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(41) << "Number of degrees of freedom per node" << right - << " | " << setw(41) << dofsPerNode - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << setw(89) << "-----------------------------------------------------------------------------------------" - << endl; - } + if (useForCoarseSpace) { + this->NumberOfBlocks_++; - // Check for interface - if (interface->getEntity(0)->getNumNodes()==0) { - FROSCH_NOTIFICATION("FROSch::GDSWCoarseOperator",this->Verbose_,"No interface found => Volume functions will be used instead."); - this->computeVolumeFunctions(blockId,dimension,nodesMap,nodeList,interior); - } else { - this->GammaDofs_[blockId] = LOVecPtr(this->DofsPerNode_[blockId]*interface->getEntity(0)->getNumNodes()); - this->IDofs_[blockId] = LOVecPtr(this->DofsPerNode_[blockId]*interior->getEntity(0)->getNumNodes()); - for (UN k=0; kDofsPerNode_[blockId]; k++) { - for (UN i=0; igetEntity(0)->getNumNodes(); i++) { - this->GammaDofs_[blockId][this->DofsPerNode_[blockId]*i+k] = interface->getEntity(0)->getLocalDofID(i,k); - } - for (UN i=0; igetEntity(0)->getNumNodes(); i++) { - this->IDofs_[blockId][this->DofsPerNode_[blockId]*i+k] = interior->getEntity(0)->getLocalDofID(i,k); - } - } + this->GammaDofs_.resize(this->GammaDofs_.size()+1); + this->IDofs_.resize(this->IDofs_.size()+1); + this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); + this->DofsMaps_.resize(this->DofsMaps_.size()+1); + this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - this->InterfaceCoarseSpaces_[blockId].reset(new CoarseSpace(this->MpiComm_,this->SerialComm_)); + this->DofsMaps_[blockId] = dofsMaps; + this->DofsPerNode_[blockId] = dofsPerNode; - if (this->ParameterList_->get("Test Unconnected Interface",true)) { - DDInterface_->divideUnconnectedEntities(this->K_); - } + Array tmpDirichletBoundaryDofs(dirichletBoundaryDofs()); // Here, we do a copy. Maybe, this is not necessary + sortunique(tmpDirichletBoundaryDofs); - DDInterface_->sortVerticesEdgesFaces(nodeList); - - EntitySetPtr interface = DDInterface_->getInterface(); - EntitySetPtr interior = DDInterface_->getInterior(); - - //////////////////////////////// - // Build Processor Map Coarse // - //////////////////////////////// - DDInterface_->buildEntityMaps(useVertexTranslations, - useShortEdgeTranslations||useShortEdgeRotations, - useStraightEdgeTranslations || useStraightEdgeRotations, - useEdgeTranslations || useEdgeRotations, - useFaceTranslations || useFaceRotations, - false); - - // Vertices - if (useVertexTranslations) { - XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getVertices()); - ConstXMapPtr verticesEntityMap = DDInterface_->getVertices()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(verticesEntityMap,null,translations[i]); - } - } - // ShortEdges - if (useShortEdgeTranslations) { - XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getShortEdges()); - ConstXMapPtr shortEdgesEntityMap = DDInterface_->getShortEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(shortEdgesEntityMap,null,translations[i]); - } - } - if (useShortEdgeRotations) { - XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getShortEdges(),(dimension==3)); - ConstXMapPtr shortEdgesEntityMap = DDInterface_->getShortEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(shortEdgesEntityMap,null,rotations[i]); - } - } - // StraightEdges - if (useStraightEdgeTranslations) { - XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getStraightEdges()); - ConstXMapPtr straightEdgesEntityMap = DDInterface_->getStraightEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(straightEdgesEntityMap,null,translations[i]); - } - } - if (useStraightEdgeRotations) { - XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getStraightEdges(),(dimension==3)); - ConstXMapPtr straightEdgesEntityMap = DDInterface_->getStraightEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(straightEdgesEntityMap,null,rotations[i]); - } - } - // Edges - if (useEdgeTranslations) { - XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getEdges()); - ConstXMapPtr edgesEntityMap = DDInterface_->getEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(edgesEntityMap,null,translations[i]); - } - } - if (useEdgeRotations) { - XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getEdges()); - ConstXMapPtr edgesEntityMap = DDInterface_->getEdges()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(edgesEntityMap,null,rotations[i]); - } - } - // Faces - if (useFaceTranslations) { - XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getFaces()); - ConstXMapPtr facesEntityMap = DDInterface_->getFaces()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(facesEntityMap,null,translations[i]); - } - } - if (useFaceRotations) { - XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getFaces()); - ConstXMapPtr facesEntityMap = DDInterface_->getFaces()->getEntityMap(); - for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(facesEntityMap,null,rotations[i]); - } - } + DDInterface_.reset(new DDInterface(dimension,this->DofsPerNode_[blockId],nodesMap.getConst(),verbosity,this->LevelID_,communicationStrategy)); + DDInterface_->resetGlobalDofs(dofsMaps); + DDInterface_->removeDirichletNodes(tmpDirichletBoundaryDofs()); - this->InterfaceCoarseSpaces_[blockId]->assembleCoarseSpace(); + if (useVertexTranslations||useShortEdgeTranslations||useShortEdgeRotations||useStraightEdgeTranslations||useStraightEdgeRotations||useEdgeTranslations||useEdgeRotations||useFaceTranslations||useFaceRotations) { + EntitySetPtr interface = this->DDInterface_->getInterface(); + EntitySetPtr interior = this->DDInterface_->getInterior(); if (this->Verbose_) { cout @@ -512,50 +372,186 @@ namespace FROSch { << setw(89) << "-----------------------------------------------------------------------------------------" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " << "| " - << left << setw(74) << "> GDSW coarse space " << right << setw(8) << "(Level " << setw(2) << this->LevelID_ << ")" + << left << setw(74) << "GDSWCoarseOperator " << right << setw(8) << "(Level " << setw(2) << this->LevelID_ << ")" << " |" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " << setw(89) << "=========================================================================================" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "Vertices " << " | " << setw(19) << "Translations " << right - << " | " << setw(41) << boolalpha << useVertexTranslations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "ShortEdges " << " | " << setw(19) << "Translations " << right - << " | " << setw(41) << boolalpha << useShortEdgeTranslations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "ShortEdges " << " | " << setw(19) << "Rotations " << right - << " | " << setw(41) << boolalpha << useShortEdgeRotations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "StraightEdges " << " | " << setw(19) << "Translations " << right - << " | " << setw(41) << boolalpha << useStraightEdgeTranslations << noboolalpha + << "| " << left << setw(41) << "Block" << right + << " | " << setw(41) << blockId << " |" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "StraightEdges " << " | " << setw(19) << "Rotations " << right - << " | " << setw(41) << boolalpha << useStraightEdgeRotations << noboolalpha + << "| " << left << setw(41) << "Spatial dimensions" << right + << " | " << setw(41) << dimension << " |" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "Edges " << " | " << setw(19) << "Translations " << right - << " | " << setw(41) << boolalpha << useEdgeTranslations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "Edges " << " | " << setw(19) << "Rotations " << right - << " | " << setw(41) << boolalpha << useEdgeRotations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "Faces " << " | " << setw(19) << "Translations " << right - << " | " << setw(41) << boolalpha << useFaceTranslations << noboolalpha - << " |" - << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " - << "| " << left << setw(19) << "Faces " << " | " << setw(19) << "Rotations " << right - << " | " << setw(41) << boolalpha << useFaceRotations << noboolalpha + << "| " << left << setw(41) << "Number of degrees of freedom per node" << right + << " | " << setw(41) << dofsPerNode << " |" << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " << setw(89) << "-----------------------------------------------------------------------------------------" << endl; } + + // Check for interface + if (interface->getEntity(0)->getNumNodes()==0) { + FROSCH_NOTIFICATION("FROSch::GDSWCoarseOperator",this->Verbose_,"No interface found => Volume functions will be used instead."); + this->computeVolumeFunctions(blockId,dimension,nodesMap,nodeList,interior); + } else { + this->GammaDofs_[blockId] = LOVecPtr(this->DofsPerNode_[blockId]*interface->getEntity(0)->getNumNodes()); + this->IDofs_[blockId] = LOVecPtr(this->DofsPerNode_[blockId]*interior->getEntity(0)->getNumNodes()); + for (UN k=0; kDofsPerNode_[blockId]; k++) { + for (UN i=0; igetEntity(0)->getNumNodes(); i++) { + this->GammaDofs_[blockId][this->DofsPerNode_[blockId]*i+k] = interface->getEntity(0)->getLocalDofID(i,k); + } + for (UN i=0; igetEntity(0)->getNumNodes(); i++) { + this->IDofs_[blockId][this->DofsPerNode_[blockId]*i+k] = interior->getEntity(0)->getLocalDofID(i,k); + } + } + + this->InterfaceCoarseSpaces_[blockId].reset(new CoarseSpace(this->MpiComm_,this->SerialComm_)); + + if (this->ParameterList_->get("Test Unconnected Interface",true)) { + DDInterface_->divideUnconnectedEntities(this->K_); + } + + DDInterface_->sortVerticesEdgesFaces(nodeList); + + EntitySetPtr interface = DDInterface_->getInterface(); + EntitySetPtr interior = DDInterface_->getInterior(); + + //////////////////////////////// + // Build Processor Map Coarse // + //////////////////////////////// + DDInterface_->buildEntityMaps(useVertexTranslations, + useShortEdgeTranslations||useShortEdgeRotations, + useStraightEdgeTranslations || useStraightEdgeRotations, + useEdgeTranslations || useEdgeRotations, + useFaceTranslations || useFaceRotations, + false); + + // Vertices + if (useVertexTranslations) { + XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getVertices()); + ConstXMapPtr verticesEntityMap = DDInterface_->getVertices()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(verticesEntityMap,null,translations[i]); + } + } + // ShortEdges + if (useShortEdgeTranslations) { + XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getShortEdges()); + ConstXMapPtr shortEdgesEntityMap = DDInterface_->getShortEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(shortEdgesEntityMap,null,translations[i]); + } + } + if (useShortEdgeRotations) { + XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getShortEdges(),(dimension==3)); + ConstXMapPtr shortEdgesEntityMap = DDInterface_->getShortEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(shortEdgesEntityMap,null,rotations[i]); + } + } + // StraightEdges + if (useStraightEdgeTranslations) { + XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getStraightEdges()); + ConstXMapPtr straightEdgesEntityMap = DDInterface_->getStraightEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(straightEdgesEntityMap,null,translations[i]); + } + } + if (useStraightEdgeRotations) { + XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getStraightEdges(),(dimension==3)); + ConstXMapPtr straightEdgesEntityMap = DDInterface_->getStraightEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(straightEdgesEntityMap,null,rotations[i]); + } + } + // Edges + if (useEdgeTranslations) { + XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getEdges()); + ConstXMapPtr edgesEntityMap = DDInterface_->getEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(edgesEntityMap,null,translations[i]); + } + } + if (useEdgeRotations) { + XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getEdges()); + ConstXMapPtr edgesEntityMap = DDInterface_->getEdges()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(edgesEntityMap,null,rotations[i]); + } + } + // Faces + if (useFaceTranslations) { + XMultiVectorPtrVecPtr translations = this->computeTranslations(blockId,DDInterface_->getFaces()); + ConstXMapPtr facesEntityMap = DDInterface_->getFaces()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(facesEntityMap,null,translations[i]); + } + } + if (useFaceRotations) { + XMultiVectorPtrVecPtr rotations = this->computeRotations(blockId,dimension,nodeList,DDInterface_->getFaces()); + ConstXMapPtr facesEntityMap = DDInterface_->getFaces()->getEntityMap(); + for (UN i=0; iInterfaceCoarseSpaces_[blockId]->addSubspace(facesEntityMap,null,rotations[i]); + } + } + + this->InterfaceCoarseSpaces_[blockId]->assembleCoarseSpace(); + + if (this->Verbose_) { + cout + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << setw(89) << "-----------------------------------------------------------------------------------------" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " + << left << setw(74) << "> GDSW coarse space " << right << setw(8) << "(Level " << setw(2) << this->LevelID_ << ")" + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << setw(89) << "=========================================================================================" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "Vertices " << " | " << setw(19) << "Translations " << right + << " | " << setw(41) << boolalpha << useVertexTranslations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "ShortEdges " << " | " << setw(19) << "Translations " << right + << " | " << setw(41) << boolalpha << useShortEdgeTranslations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "ShortEdges " << " | " << setw(19) << "Rotations " << right + << " | " << setw(41) << boolalpha << useShortEdgeRotations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "StraightEdges " << " | " << setw(19) << "Translations " << right + << " | " << setw(41) << boolalpha << useStraightEdgeTranslations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "StraightEdges " << " | " << setw(19) << "Rotations " << right + << " | " << setw(41) << boolalpha << useStraightEdgeRotations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "Edges " << " | " << setw(19) << "Translations " << right + << " | " << setw(41) << boolalpha << useEdgeTranslations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "Edges " << " | " << setw(19) << "Rotations " << right + << " | " << setw(41) << boolalpha << useEdgeRotations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "Faces " << " | " << setw(19) << "Translations " << right + << " | " << setw(41) << boolalpha << useFaceTranslations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << "| " << left << setw(19) << "Faces " << " | " << setw(19) << "Rotations " << right + << " | " << setw(41) << boolalpha << useFaceRotations << noboolalpha + << " |" + << "\n" << setw(FROSCH_OUTPUT_INDENT) << " " + << setw(89) << "-----------------------------------------------------------------------------------------" + << endl; + } + } } } return 0; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp index 03dfaf46f1ef..b9b6b12303df 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp @@ -109,10 +109,10 @@ namespace FROSch { if (InterfaceCoarseSpaces_[i]->hasBasisMap()) { FROSCH_ASSERT(InterfaceCoarseSpaces_[i]->hasBasisMapUnique(),"FROSch::HarmonicCoarseOperator: !InterfaceCoarseSpaces_[i]->hasAssembledBasis()"); this->AssembledInterfaceCoarseSpace_->addSubspace(InterfaceCoarseSpaces_[i]->getBasisMap(),InterfaceCoarseSpaces_[i]->getBasisMapUnique(),InterfaceCoarseSpaces_[i]->getAssembledBasis(),ii); + ii += InterfaceCoarseSpaces_[i]->getAssembledBasis()->getLocalLength(); } + InterfaceCoarseSpaces_[i].reset(); } - ii += InterfaceCoarseSpaces_[i]->getAssembledBasis()->getLocalLength(); - InterfaceCoarseSpaces_[i].reset(); } return this->AssembledInterfaceCoarseSpace_->assembleCoarseSpace(); } @@ -121,60 +121,61 @@ namespace FROSch { int HarmonicCoarseOperator::addZeroCoarseSpaceBlock(ConstXMapPtr dofsMap) { FROSCH_DETAILTIMER_START_LEVELID(addZeroCoarseSpaceBlockTime,"HarmonicCoarseOperator::addZeroCoarseSpaceBlock"); - // Das könnte man noch ändern - GammaDofs_->resize(GammaDofs_.size()+1); - IDofs_->resize(IDofs_.size()+1); - InterfaceCoarseSpaces_->resize(InterfaceCoarseSpaces_.size()+1); - DofsMaps_->resize(DofsMaps_.size()+1); - DofsPerNode_->resize(DofsPerNode_.size()+1); + bool useForCoarseSpace = coarseSpaceList->get("Use For Coarse Space",true); - NumberOfBlocks_++; + if (useForCoarseSpace) { + // Das könnte man noch ändern + GammaDofs_->resize(GammaDofs_.size()+1); + IDofs_->resize(IDofs_.size()+1); + InterfaceCoarseSpaces_->resize(InterfaceCoarseSpaces_.size()+1); + DofsMaps_->resize(DofsMaps_.size()+1); + DofsPerNode_->resize(DofsPerNode_.size()+1); - ///// - int blockId = NumberOfBlocks_-1; + NumberOfBlocks_++; - // Process the parameter list - stringstream blockIdStringstream; - blockIdStringstream << blockId+1; - string blockIdString = blockIdStringstream.str(); - RCP coarseSpaceList = sublist(sublist(this->ParameterList_,"Blocks"),blockIdString.c_str()); + ///// + int blockId = NumberOfBlocks_-1; - bool useForCoarseSpace = coarseSpaceList->get("Use For Coarse Space",true); + // Process the parameter list + stringstream blockIdStringstream; + blockIdStringstream << blockId+1; + string blockIdString = blockIdStringstream.str(); + RCP coarseSpaceList = sublist(sublist(this->ParameterList_,"Blocks"),blockIdString.c_str()); - GammaDofs_[blockId] = LOVecPtr(0); + GammaDofs_[blockId] = LOVecPtr(0); - XMultiVectorPtr mVPhiGamma; - XMapPtr blockCoarseMap; - if (useForCoarseSpace) { - InterfaceCoarseSpaces_[blockId].reset(new CoarseSpace(this->MpiComm_,this->SerialComm_)); + XMultiVectorPtr mVPhiGamma; + XMapPtr blockCoarseMap; + if (useForCoarseSpace) { + InterfaceCoarseSpaces_[blockId].reset(new CoarseSpace(this->MpiComm_,this->SerialComm_)); - //Epetra_SerialComm serialComm; - XMapPtr serialGammaMap = MapFactory::Build(dofsMap->lib(),dofsMap->getLocalNumElements(),0,this->SerialComm_); - mVPhiGamma = MultiVectorFactory::Build(serialGammaMap,dofsMap->getLocalNumElements()); - } + //Epetra_SerialComm serialComm; + XMapPtr serialGammaMap = MapFactory::Build(dofsMap->lib(),dofsMap->getLocalNumElements(),0,this->SerialComm_); + mVPhiGamma = MultiVectorFactory::Build(serialGammaMap,dofsMap->getLocalNumElements()); + } - for (int i=0; igetLocalNumElements(); i++) { - GammaDofs_[blockId]->push_back(i); + for (int i=0; igetLocalNumElements(); i++) { + GammaDofs_[blockId]->push_back(i); - if (useForCoarseSpace) { - mVPhiGamma->replaceLocalValue(i,i,ScalarTraits::one()); + if (useForCoarseSpace) { + mVPhiGamma->replaceLocalValue(i,i,ScalarTraits::one()); + } } - } - - IDofs_[blockId] = LOVecPtr(0); - if (useForCoarseSpace) { - blockCoarseMap = MapFactory::Build(dofsMap->lib(),-1,GammaDofs_[blockId](),0,this->MpiComm_); + IDofs_[blockId] = LOVecPtr(0); - InterfaceCoarseSpaces_[blockId]->addSubspace(blockCoarseMap,mVPhiGamma); - InterfaceCoarseSpaces_[blockId]->assembleCoarseSpace(); - } + if (useForCoarseSpace) { + blockCoarseMap = MapFactory::Build(dofsMap->lib(),-1,GammaDofs_[blockId](),0,this->MpiComm_); - DofsMaps_[blockId] = XMapPtrVecPtr(0); - DofsMaps_[blockId].push_back(dofsMap); + InterfaceCoarseSpaces_[blockId]->addSubspace(blockCoarseMap,mVPhiGamma); + InterfaceCoarseSpaces_[blockId]->assembleCoarseSpace(); + } - DofsPerNode_[blockId] = 1; + DofsMaps_[blockId] = XMapPtrVecPtr(0); + DofsMaps_[blockId].push_back(dofsMap); + DofsPerNode_[blockId] = 1; + } return 0; } @@ -832,7 +833,7 @@ namespace FROSch { LO itmp = 0; ConstUNVecView numLocalBlockColumns = AssembledInterfaceCoarseSpace_->getLocalSubspaceSizes(); - FROSCH_ASSERT(numLocalBlockColumns.size()==NumberOfBlocks_,"FROSch::HarmonicCoarseOperator: numLocalBlockColumns.size()!=NumberOfBlocks_"); + FROSCH_ASSERT(numLocalBlockColumns.size()==NumberOfBlocks_,"FROSch::HarmonicCoarseOperator: numLocalBlockColumns.size()!=NumberOfBlocks_("+to_string(numLocalBlockColumns.size())+", "+to_string(NumberOfBlocks_)+") "); for (UN i=0; iisConstantStride() ? j : mvPhiICols[j]; - int col_out = mVPhiTpetraMVector->isConstantStride() ? j : mvPhiCols[j]; + int col_in = mVPhiITpetraMVector->isConstantStride() ? itmp : mvPhiICols[itmp]; + int col_out = mVPhiTpetraMVector->isConstantStride() ? itmp : mvPhiCols[itmp]; CopyPhiViewFunctor functor(col_in, indicesIDofsAllData, mvPhiIView, col_out, mvPhiView); for (UN ii=0; ii policy (bound[extensionBlocks[ii]], bound[extensionBlocks[ii]+1]); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_IPOUHarmonicCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_IPOUHarmonicCoarseOperator_def.hpp index 1d0dd5e6aecf..a9e717c4fd78 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_IPOUHarmonicCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_IPOUHarmonicCoarseOperator_def.hpp @@ -168,14 +168,7 @@ namespace FROSch { // Das könnte man noch ändern // Todo: Check the lengths of the vectors against NumberOfBlocks_ - this->GammaDofs_.resize(this->GammaDofs_.size()+1); - this->IDofs_.resize(this->IDofs_.size()+1); - this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); - this->DofsMaps_.resize(this->DofsMaps_.size()+1); - this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - this->NumberOfBlocks_++; - - return resetCoarseSpaceBlock(this->NumberOfBlocks_-1,dimension,dofsPerNode,nodesMap,dofsMaps,nullSpaceBasis,dirichletBoundaryDofs,nodeList); + return resetCoarseSpaceBlock(this->NumberOfBlocks_,dimension,dofsPerNode,nodesMap,dofsMaps,nullSpaceBasis,dirichletBoundaryDofs,nodeList); } template @@ -189,27 +182,21 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(buildCoarseSpaceTime,"IPOUHarmonicCoarseOperator::buildCoarseSpace"); - this->NumberOfBlocks_ = repeatedNodesMapVec.size(); + UN TotalNumberOfBlocks = repeatedNodesMapVec.size(); - FROSCH_ASSERT(dofsPerNodeVec.size()==this->NumberOfBlocks_,"dofsPerNodeVec.size()!=this->NumberOfBlocks_"); - FROSCH_ASSERT(repeatedDofMapsVec.size()==this->NumberOfBlocks_,"repeatedDofMapsVec.size()!=this->NumberOfBlocks_"); - FROSCH_ASSERT(nullSpaceBasisVec.size()==this->NumberOfBlocks_,"nullSpaceBasisVec.size()!=this->NumberOfBlocks_"); - FROSCH_ASSERT(dirichletBoundaryDofsVec.size()==this->NumberOfBlocks_,"dirichletBoundaryDofsVec.size()!=this->NumberOfBlocks_"); - FROSCH_ASSERT(nodeListVec.size()==this->NumberOfBlocks_,"nodeListVec.size()!=this->NumberOfBlocks_"); + FROSCH_ASSERT(dofsPerNodeVec.size()==TotalNumberOfBlocks,"dofsPerNodeVec.size()!=TotalNumberOfBlocks"); + FROSCH_ASSERT(repeatedDofMapsVec.size()==TotalNumberOfBlocks,"repeatedDofMapsVec.size()!=TotalNumberOfBlocks"); + FROSCH_ASSERT(nullSpaceBasisVec.size()==TotalNumberOfBlocks,"nullSpaceBasisVec.size()!=TotalNumberOfBlocks"); + FROSCH_ASSERT(dirichletBoundaryDofsVec.size()==TotalNumberOfBlocks,"dirichletBoundaryDofsVec.size()!=TotalNumberOfBlocks"); + FROSCH_ASSERT(nodeListVec.size()==TotalNumberOfBlocks,"nodeListVec.size()!=TotalNumberOfBlocks"); // Todo: Move this to a function in HarmonicCoarseOperator at some point - for (UN i=0; iNumberOfBlocks_; i++) { + for (UN i=0; iGammaDofs_.resize(this->GammaDofs_.size()+1); - this->IDofs_.resize(this->IDofs_.size()+1); - this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); - this->DofsMaps_.resize(this->DofsMaps_.size()+1); - this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - - resetCoarseSpaceBlock(i, + resetCoarseSpaceBlock(this->NumberOfBlocks_, dimension, dofsPerNodeVec[i], repeatedNodesMapVec[i], @@ -234,7 +221,7 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(resetCoarseSpaceBlockTime,"IPOUHarmonicCoarseOperator::resetCoarseSpaceBlock"); FROSCH_ASSERT(dofsMaps.size()==dofsPerNode,"dofsMaps.size()!=dofsPerNode"); - FROSCH_ASSERT(blockIdNumberOfBlocks_,"Block does not exist yet and can therefore not be reset."); + FROSCH_ASSERT(blockId<=this->NumberOfBlocks_,"Block does not exist yet and can therefore not be reset ("+to_string(blockId)+", "+to_string(this->NumberOfBlocks_)+")."); // Process the parameter list stringstream blockIdStringstream; @@ -254,6 +241,12 @@ namespace FROSch { bool useForCoarseSpace = coarseSpaceList->get("Use For Coarse Space",true); if (useForCoarseSpace) { + this->GammaDofs_.resize(this->GammaDofs_.size()+1); + this->IDofs_.resize(this->IDofs_.size()+1); + this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); + this->DofsMaps_.resize(this->DofsMaps_.size()+1); + this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); + this->NumberOfBlocks_++; if (this->Verbose_) { cout diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_RGDSWCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_RGDSWCoarseOperator_def.hpp index f14d2aca439f..233c5e133363 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_RGDSWCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_RGDSWCoarseOperator_def.hpp @@ -38,7 +38,7 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(resetCoarseSpaceBlockTime,"RGDSWCoarseOperator::resetCoarseSpaceBlock"); FROSCH_ASSERT(dofsMaps.size()==dofsPerNode,"dofsMaps.size()!=dofsPerNode"); - FROSCH_ASSERT(blockIdNumberOfBlocks_,"Block does not exist yet and can therefore not be reset."); + FROSCH_ASSERT(blockId<=this->NumberOfBlocks_,"Block does not exist yet and can therefore not be reset."); if (!this->DistributionList_->get("Type","linear").compare("ZoltanDual")) { FROSCH_ASSERT(false,"RGDSWCoarseOperator:: Distribution Type ZoltanDual only works for IPOUHarmonicCoarseOperator"); @@ -88,20 +88,27 @@ namespace FROSch { FROSCH_WARNING("FROSch::RGDSWCoarseOperator",this->Verbose_,"Rotations cannot be used since nodeList.is_null()."); } - this->DofsMaps_[blockId] = dofsMaps; - this->DofsPerNode_[blockId] = dofsPerNode; + if (useForCoarseSpace) { + this->NumberOfBlocks_++; - Array tmpDirichletBoundaryDofs(dirichletBoundaryDofs()); // Here, we do a copy. Maybe, this is not necessary - sortunique(tmpDirichletBoundaryDofs); + this->GammaDofs_.resize(this->GammaDofs_.size()+1); + this->IDofs_.resize(this->IDofs_.size()+1); + this->InterfaceCoarseSpaces_.resize(this->InterfaceCoarseSpaces_.size()+1); + this->DofsMaps_.resize(this->DofsMaps_.size()+1); + this->DofsPerNode_.resize(this->DofsPerNode_.size()+1); - this->DDInterface_.reset(new DDInterface(dimension,this->DofsPerNode_[blockId],nodesMap.getConst(),verbosity,this->LevelID_,communicationStrategy)); - this->DDInterface_->resetGlobalDofs(dofsMaps); - this->DDInterface_->removeDirichletNodes(tmpDirichletBoundaryDofs); + this->DofsMaps_[blockId] = dofsMaps; + this->DofsPerNode_[blockId] = dofsPerNode; - EntitySetPtr interface = this->DDInterface_->getInterface(); - EntitySetPtr interior = this->DDInterface_->getInterior(); + Array tmpDirichletBoundaryDofs(dirichletBoundaryDofs()); // Here, we do a copy. Maybe, this is not necessary + sortunique(tmpDirichletBoundaryDofs); - if (useForCoarseSpace) { + this->DDInterface_.reset(new DDInterface(dimension,this->DofsPerNode_[blockId],nodesMap.getConst(),verbosity,this->LevelID_,communicationStrategy)); + this->DDInterface_->resetGlobalDofs(dofsMaps); + this->DDInterface_->removeDirichletNodes(tmpDirichletBoundaryDofs); + + EntitySetPtr interface = this->DDInterface_->getInterface(); + EntitySetPtr interior = this->DDInterface_->getInterior(); if (this->Verbose_) { cout From 1ba71f5491075d75755aaeb81808fd7e91e1a0c7 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Wed, 11 Sep 2024 20:34:29 -0600 Subject: [PATCH 05/38] FROSch : compile error.. --- .../FROSch_HarmonicCoarseOperator_def.hpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp index b9b6b12303df..ed3db33e573c 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp @@ -121,27 +121,20 @@ namespace FROSch { int HarmonicCoarseOperator::addZeroCoarseSpaceBlock(ConstXMapPtr dofsMap) { FROSCH_DETAILTIMER_START_LEVELID(addZeroCoarseSpaceBlockTime,"HarmonicCoarseOperator::addZeroCoarseSpaceBlock"); + ///// + int blockId = NumberOfBlocks_-1; + + // Process the parameter list + stringstream blockIdStringstream; + blockIdStringstream << blockId+1; + string blockIdString = blockIdStringstream.str(); + RCP coarseSpaceList = sublist(sublist(this->ParameterList_,"Blocks"),blockIdString.c_str()); bool useForCoarseSpace = coarseSpaceList->get("Use For Coarse Space",true); if (useForCoarseSpace) { // Das könnte man noch ändern - GammaDofs_->resize(GammaDofs_.size()+1); - IDofs_->resize(IDofs_.size()+1); - InterfaceCoarseSpaces_->resize(InterfaceCoarseSpaces_.size()+1); - DofsMaps_->resize(DofsMaps_.size()+1); - DofsPerNode_->resize(DofsPerNode_.size()+1); - NumberOfBlocks_++; - ///// - int blockId = NumberOfBlocks_-1; - - // Process the parameter list - stringstream blockIdStringstream; - blockIdStringstream << blockId+1; - string blockIdString = blockIdStringstream.str(); - RCP coarseSpaceList = sublist(sublist(this->ParameterList_,"Blocks"),blockIdString.c_str()); - GammaDofs_[blockId] = LOVecPtr(0); XMultiVectorPtr mVPhiGamma; @@ -162,6 +155,12 @@ namespace FROSch { } } + GammaDofs_->resize(GammaDofs_.size()+1); + IDofs_->resize(IDofs_.size()+1); + InterfaceCoarseSpaces_->resize(InterfaceCoarseSpaces_.size()+1); + DofsMaps_->resize(DofsMaps_.size()+1); + DofsPerNode_->resize(DofsPerNode_.size()+1); + IDofs_[blockId] = LOVecPtr(0); if (useForCoarseSpace) { From 0b6172d4a13235c24d9547ccf1f92d9953fe7311 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Wed, 18 Sep 2024 16:33:50 -0500 Subject: [PATCH 06/38] Added Framework tests AT2 job Signed-off-by: Anderson Chauphan --- .github/workflows/AT2.yml | 103 +++++++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 6 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index f02eb8b30a48..578d64ea1219 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -82,8 +82,8 @@ jobs: popd export TRILINOS_DIR=${GITHUB_WORKSPACE:?} - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/pr_tools printf "\n\n\n" @@ -173,8 +173,8 @@ jobs: popd export TRILINOS_DIR=${GITHUB_WORKSPACE:?} - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/pr_tools printf "\n\n\n" @@ -263,8 +263,8 @@ jobs: popd export TRILINOS_DIR=${GITHUB_WORKSPACE:?} - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig - export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/pr_tools printf "\n\n\n" @@ -303,3 +303,94 @@ jobs: echo "## Helpful Links" >> $GITHUB_STEP_SUMMARY echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY + + framework-tests-EXPERIMENTAL: + needs: pre-checks + runs-on: [self-hosted, python-3.8] + if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} + steps: + - name: env + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + run: | + env + - name: module list + shell: bash + run: | + bash -l -c "module list" + printenv PATH + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + with: + access_token: ${{ github.token }} + - name: make dirs + working-directory: / + run: | + mkdir -p /home/Trilinos/src/Trilinos + mkdir -p /home/Trilinos/build + - name: Clone trilinos + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 + - name: Repo status + run: | + git fetch --all + pwd + ls -lhat + git status + git branch -vv + git branch -a + - name: get dependencies + working-directory: ./packages/framework + run: | + bash -l -c "./get_dependencies.sh --container" + - name: PullRequestLinuxDriverTest.py + shell: bash -l {0} + working-directory: /home/Trilinos/build + run: | + mkdir bin + pushd bin + ln -s $(type -p python3) python + export PATH=$(pwd):${PATH} + popd + + export TRILINOS_DIR=${GITHUB_WORKSPACE:?} + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${TRILINOS_DIR}/packages/framework/pr_tools + + printf "\n\n\n" + + echo "image: ${AT2_IMAGE:-unknown}" + type python + python3 ${GITHUB_WORKSPACE}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py \ + --target-branch-name ${{ github.event.pull_request.base.ref }} \ + --genconfig-build-name rhel8_python_debug_shared_no-kokkos-arch_no-asan_no-complex_no-fpic_no-mpi_no-pt_no-rdc_no-uvm_deprecated-on_pr-framework \ + --pullrequest-number ${{ github.event.pull_request.number }} \ + --pullrequest-env-config-file ${GITHUB_WORKSPACE}/packages/framework/pr_tools/trilinos_pr.ini \ + --pullrequest-gen-config-file ${GITHUB_WORKSPACE}/packages/framework/GenConfig/src/gen-config.ini \ + --workspace-dir /home/runner/_work/Trilinos \ + --source-dir ${GITHUB_WORKSPACE} \ + --build-dir /home/Trilinos/build \ + --dashboard-build-name `cat /etc/hostname` \ + --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ + --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ + --filename-subprojects ./package_subproject_list.cmake \ + --filename-packageenables ./packageEnables.cmake \ + - name: Summary + if: ${{ !cancelled() }} + shell: bash -l {0} + working-directory: /home/Trilinos/build + run: | + echo "## Image" >> $GITHUB_STEP_SUMMARY + echo "image: ${AT2_IMAGE:-unknown}" >> $GITHUB_STEP_SUMMARY + echo "## CDash Links" >> $GITHUB_STEP_SUMMARY + echo "### Current Build" >> $GITHUB_STEP_SUMMARY + AT2_URL=$(> $GITHUB_STEP_SUMMARY + echo "### All Builds" >> $GITHUB_STEP_SUMMARY + AT2_ALL_BUILDS=$(> $GITHUB_STEP_SUMMARY + echo "## Helpful Links" >> $GITHUB_STEP_SUMMARY + echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY + echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY + \ No newline at end of file From 3cdd603ec2aac19740880c056e0695b67697f6dc Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 20 Sep 2024 20:53:14 -0600 Subject: [PATCH 07/38] Ifpack : propogate error code to all MPIs for additive Schwarz --- packages/ifpack/src/Ifpack_AdditiveSchwarz.h | 6 +- packages/ifpack/src/Ifpack_ConfigDefs.h | 16 +++ packages/ifpack/test/TestAll/cxx_main.cpp | 110 ++++++++++++++++++- 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/packages/ifpack/src/Ifpack_AdditiveSchwarz.h b/packages/ifpack/src/Ifpack_AdditiveSchwarz.h index f9522df44b97..2c57231cdf61 100644 --- a/packages/ifpack/src/Ifpack_AdditiveSchwarz.h +++ b/packages/ifpack/src/Ifpack_AdditiveSchwarz.h @@ -858,7 +858,7 @@ int Ifpack_AdditiveSchwarz::Initialize() IFPACK_CHK_ERR(Inverse_->SetUseTranspose(UseTranspose())); IFPACK_CHK_ERR(Inverse_->SetParameters(List_)); - IFPACK_CHK_ERR(Inverse_->Initialize()); + IFPACK_CHK_GLOBAL_ERR(Inverse_->Initialize()); // Label is for Aztec-like solvers Label_ = "Ifpack_AdditiveSchwarz, "; @@ -889,13 +889,13 @@ template int Ifpack_AdditiveSchwarz::Compute() { if (IsInitialized() == false) - IFPACK_CHK_ERR(Initialize()); + IFPACK_CHK_GLOBAL_ERR(Initialize()); Time_->ResetStartTime(); IsComputed_ = false; Condest_ = -1.0; - IFPACK_CHK_ERR(Inverse_->Compute()); + IFPACK_CHK_GLOBAL_ERR(Inverse_->Compute()); IsComputed_ = true; // need this here for Condest(Ifpack_Cheap) ++NumCompute_; diff --git a/packages/ifpack/src/Ifpack_ConfigDefs.h b/packages/ifpack/src/Ifpack_ConfigDefs.h index a5705b377719..5d62680bf38e 100644 --- a/packages/ifpack/src/Ifpack_ConfigDefs.h +++ b/packages/ifpack/src/Ifpack_ConfigDefs.h @@ -135,12 +135,28 @@ std::cerr << "IFPACK ERROR " << ifpack_err << ", " \ << __FILE__ << ", line " << __LINE__ << std::endl; \ return; } } +// prints out an error message if variable is not zero, +// and returns false +#define IFPACK_CHK_ERRB(ifpack_err) \ +{ if (ifpack_err < 0) { \ + std::cerr << "IFPACK ERROR " << ifpack_err << ", " \ + << __FILE__ << ", line " << __LINE__ << std::endl; \ + return false; } } // prints out an error message and returns #define IFPACK_RETURN(ifpack_err) \ { if (ifpack_err < 0) { \ std::cerr << "IFPACK ERROR " << ifpack_err << ", " \ << __FILE__ << ", line " << __LINE__ << std::endl; \ } return(ifpack_err); } +// prints out an error message if its *global* variable is not zero, +// and returns this *global* value. +#define IFPACK_CHK_GLOBAL_ERR(ifpack_err) \ +{ int local_err = ifpack_err; \ + int global_min_err = 0; \ + Comm().MinAll(&local_err, &global_min_err, 1); \ + if (global_min_err < 0) { \ + return global_min_err; \ + } } #define IFPACK_SGN(x) (((x) < 0.0) ? -1.0 : 1.0) /* sign function */ #define IFPACK_ABS(x) (((x) > 0.0) ? (x) : (-x)) /* abs function */ diff --git a/packages/ifpack/test/TestAll/cxx_main.cpp b/packages/ifpack/test/TestAll/cxx_main.cpp index d34b6d21fd78..6a4f0c00666e 100644 --- a/packages/ifpack/test/TestAll/cxx_main.cpp +++ b/packages/ifpack/test/TestAll/cxx_main.cpp @@ -66,6 +66,82 @@ #include "Ifpack_Polynomial.h" #include "Ifpack_Krylov.h" +template +Teuchos::RefCountPtr +createTriDiagMat(int NumGlobalElements, CommType Comm, bool str_singular, bool num_singular) { + // Construct a Map that puts approximatively the same number of + // equations on each processor. `0' is the index base (that is, + // numbering starts from 0. + Epetra_Map Map(NumGlobalElements, 0, Comm); + + // Create an empty EpetraCrsMatrix + Teuchos::RefCountPtr A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, Map, 0)); + + // Create the structure of the matrix (tridiagonal) + int NumMyElements = Map.NumMyElements(); + + // Add rows one-at-a-time + // Need some vectors to help + + double Values[3]; + // Right now, we put zeros only in the matrix. + int Indices[3]; + int NumEntries; + /// global ID's of local ID's + int* MyGlobalElements = Map.MyGlobalElements(); + + // At this point we simply set the nonzero structure of A. + // Actual values will be inserted later (now all zeros) + for (int i = 0; i < NumMyElements; i++) + { + if (MyGlobalElements[i] == 0) + { + if (str_singular) { + NumEntries = 0; + } else { + Indices[0] = 0; + Indices[1] = 1; + if (num_singular) { + Values[0] = 0.0; + Values[1] = 0.0; + } else { + Values[0] = 2.0; + Values[1] = 1.0; + } + NumEntries = 2; + } + } + else if (MyGlobalElements[i] == NumGlobalElements-1) + { + Indices[0] = NumGlobalElements-1; + Indices[1] = NumGlobalElements-2; + Values[0] = 2.0; + Values[1] = 1.0; + NumEntries = 2; + } + else + { + Indices[0] = MyGlobalElements[i]-1; + Indices[1] = MyGlobalElements[i]; + Indices[2] = MyGlobalElements[i]+1; + Values[0] = 1.0; + Values[1] = 2.0; + Values[2] = 1.0; + NumEntries = 3; + } + + if (NumEntries > 0) + A->InsertGlobalValues(MyGlobalElements[i], + NumEntries, Values, Indices); + } + + // Finish up. + A->FillComplete(); + //A->Print(std::cout); + + return A; +} + template bool Test(const Teuchos::RefCountPtr& Matrix, Teuchos::ParameterList& List) { @@ -88,9 +164,9 @@ bool Test(const Teuchos::RefCountPtr& Matrix, Teuchos::Paramet Prec = Teuchos::rcp( new T(&*Matrix) ); assert(Prec != Teuchos::null); - IFPACK_CHK_ERR(Prec->SetParameters(List)); - IFPACK_CHK_ERR(Prec->Initialize()); - IFPACK_CHK_ERR(Prec->Compute()); + IFPACK_CHK_ERRB(Prec->SetParameters(List)); + IFPACK_CHK_ERRB(Prec->Initialize()); + IFPACK_CHK_ERRB(Prec->Compute()); // create the AztecOO solver AztecOO AztecOOSolver(Problem); @@ -170,7 +246,6 @@ int main(int argc, char *argv[]) TestPassed = false; } - if (!Test(Matrix,List)) { TestPassed = false; @@ -217,6 +292,33 @@ int main(int argc, char *argv[]) if (!Test > > >(Matrix,List)) { TestPassed = false; } + +#ifdef HAVE_IFPACK_AMESOS + // Additive Schwarz with local Amesos + // Amesos should fail on MPI-0. + // So, these tests should fail, + // but are designed to check that error is propagated to all MPI processes + // instead of just failing on MPI-0, causing deadlock + bool check_for_global_error = false; + if (check_for_global_error) { + // structurally singular case + List = DefaultList; + bool num_singular = false; + bool str_singular = true; + Matrix = createTriDiagMat(10, Comm, str_singular, num_singular); + if (Test>(Matrix,List)) { + TestPassed = false; + } + // numerically singular case + num_singular = true; + str_singular = false; + Matrix = createTriDiagMat(10, Comm, str_singular, num_singular); + if (Test>(Matrix,List)) { + TestPassed = false; + } + } +#endif + if (!TestPassed) { cerr << "Test `TestAll.exe' FAILED!" << endl; exit(EXIT_FAILURE); From 916e7589a89e439f9d0723f9d28ee70592f989e6 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 20 Sep 2024 20:55:03 -0600 Subject: [PATCH 08/38] amesos : CSS Cmake --- packages/amesos/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/amesos/CMakeLists.txt b/packages/amesos/CMakeLists.txt index 4edb508fad03..ffede382d20a 100644 --- a/packages/amesos/CMakeLists.txt +++ b/packages/amesos/CMakeLists.txt @@ -39,7 +39,7 @@ ENDIF() IF (${PACKAGE_NAME}_ENABLE_CSS_MKL) IF (NOT TPL_ENABLE_MPI) MESSAGE(WARNING "*****Amesos_ENABLE_CSS_MKL requires MPI. Turning off CSS_MKL. *****") - SET(AMESOS_ENABLE_CSS_MKL OFF) + SET(${PACKAGE_NAME}_ENABLE_CSS_MKL OFF) SET(HAVE_AMESOS_CSS_MKL OFF) ENDIF() ENDIF() From beae7a3f03b43dcead8da241fbb569c355fda8b9 Mon Sep 17 00:00:00 2001 From: Victor Brunini Date: Tue, 24 Sep 2024 11:47:45 -0600 Subject: [PATCH 09/38] belos: Use a pooling strategy for Tpetra MultiVectors. To reduce the number of allocations which are expensive in Cuda builds. --- .../tpetra/example/Orthog/TpetraOrthogEx.cpp | 2 +- .../tpetra/src/BelosMultiVecTraits_Tpetra.hpp | 75 +++++++++++++++++-- .../src/solvers/Belos_Tpetra_GmresSstep.hpp | 2 +- 3 files changed, 69 insertions(+), 10 deletions(-) diff --git a/packages/belos/tpetra/example/Orthog/TpetraOrthogEx.cpp b/packages/belos/tpetra/example/Orthog/TpetraOrthogEx.cpp index 0d9da6e5ab7e..8ef7f604fee9 100644 --- a/packages/belos/tpetra/example/Orthog/TpetraOrthogEx.cpp +++ b/packages/belos/tpetra/example/Orthog/TpetraOrthogEx.cpp @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) { } //Verify coefficients: - MV coeffs_mv = makeStaticLocalMultiVector (*X1, numVecs, numVecs); + MV coeffs_mv = impl::makeStaticLocalMultiVector (*X1, numVecs, numVecs); Tpetra::deep_copy(coeffs_mv, *coeffMat); XCopy->multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, *X1, coeffs_mv, -1.0); std::vector norms(numVecs); diff --git a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp index 376006519e11..d990262f9c81 100644 --- a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp +++ b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp @@ -22,12 +22,15 @@ #include "Teuchos_Array.hpp" #include "Teuchos_ScalarTraits.hpp" #include "Kokkos_ArithTraits.hpp" +#include +#include +#include #ifdef HAVE_BELOS_TSQR # include "Tpetra_TsqrAdaptor.hpp" #endif // HAVE_BELOS_TSQR -namespace { // (anonymous) +namespace impl { // MapType: a Tpetra::Map specialization. // @@ -88,7 +91,60 @@ makeStaticLocalMultiVector (const MultiVectorType& gblMv, return MultiVectorType (lclMap, dv); } -} // namespace (anonymous) +template +class MultiVecPool +{ +public: + MultiVecPool() { + Kokkos::push_finalize_hook([this]() { this->availableDVs.clear(); }); + } + + using MV = ::Tpetra::MultiVector; + + Teuchos::RCP getMV(const Teuchos::RCP> & map, const int numVecs) { + auto key = std::make_pair(map->getLocalNumElements(), numVecs); + auto & available = availableDVs[key]; + if(!available.empty()) { + auto dv = available.back(); + available.pop_back(); + return Teuchos::rcpWithDealloc(new MV(map, dv), RCPDeleter{available, dv}); + } + + dv_t dv("Belos::MultiVecPool DV", key.first, key.second); + return Teuchos::rcpWithDealloc(new MV(map, dv), RCPDeleter{available, dv}); + } + +private: + using dv_t = typename MV::dual_view_type; + struct RCPDeleter + { + void free(MV * mv_ptr) { + if(mv_ptr) { + dv_pool.push_back(dv); + delete mv_ptr; + } + } + std::vector & dv_pool; + dv_t dv; + }; + std::map, std::vector> availableDVs; +}; + + +template +inline MultiVecPool & getPool() +{ + static MultiVecPool static_pool; + return static_pool; +} + +template +inline Teuchos::RCP<::Tpetra::MultiVector> getMultiVectorFromPool(const Teuchos::RCP> & map, const int numVecs) +{ + return getPool().getMV(map, numVecs); +} + +} // namespace impl namespace Belos { @@ -118,7 +174,7 @@ namespace Belos { /// (distribution over one or more parallel processes) as \c X. /// Its entries are not initialized and have undefined values. static Teuchos::RCP Clone (const MV& X, const int numVecs) { - Teuchos::RCP Y (new MV (X.getMap (), numVecs, false)); + auto Y = impl::getMultiVectorFromPool(X.getMap(), numVecs); Y->setCopyOrView (Teuchos::View); return Y; } @@ -129,7 +185,8 @@ namespace Belos { // Make a deep copy of X. The one-argument copy constructor // does a shallow copy by default; the second argument tells it // to do a deep copy. - Teuchos::RCP X_copy (new MV (X, Teuchos::Copy)); + auto X_copy = impl::getMultiVectorFromPool(X.getMap(), X.getNumVectors()); + Tpetra::deep_copy(*X_copy, X); // Make Tpetra::MultiVector use the new view semantics. This is // a no-op for the Kokkos refactor version of Tpetra; it only // does something for the "classic" version of Tpetra. This @@ -176,8 +233,9 @@ namespace Belos { // mfh 14 Aug 2014: Tpetra already detects and optimizes for a // continuous column index range in MultiVector::subCopy, so we // don't have to check here. - Teuchos::RCP X_copy = mv.subCopy (columns ()); + auto X_copy = impl::getMultiVectorFromPool(mv.getMap(), index.size()); X_copy->setCopyOrView (Teuchos::View); + X_copy->assign(*mv.subView(columns())); return X_copy; } @@ -211,8 +269,9 @@ namespace Belos { TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str() << "Should never get here!"); } - Teuchos::RCP X_copy = mv.subCopy (index); + auto X_copy = impl::getMultiVectorFromPool(mv.getMap(), index.size()); X_copy->setCopyOrView (Teuchos::View); + X_copy->assign(*mv.subView(index)); return X_copy; } @@ -374,7 +433,7 @@ namespace Belos { mv.update (alpha*B(0,0), A, beta); } else { - MV B_mv = makeStaticLocalMultiVector (A, B_numRows, B_numCols); + MV B_mv = impl::makeStaticLocalMultiVector (A, B_numRows, B_numCols); Tpetra::deep_copy (B_mv, B); mv.multiply (Teuchos::NO_TRANS, Teuchos::NO_TRANS, alpha, A, B_mv, beta); @@ -442,7 +501,7 @@ namespace Belos { return; } - MV C_mv = makeStaticLocalMultiVector (A, numRowsC, numColsC); + MV C_mv = impl::makeStaticLocalMultiVector (A, numRowsC, numColsC); // Filling with zero should be unnecessary, in theory, but not // in practice, alas (Issue_3235 test fails). C_mv.putScalar (ZERO); diff --git a/packages/belos/tpetra/src/solvers/Belos_Tpetra_GmresSstep.hpp b/packages/belos/tpetra/src/solvers/Belos_Tpetra_GmresSstep.hpp index c187a2b3d21a..8a0529d0903b 100644 --- a/packages/belos/tpetra/src/solvers/Belos_Tpetra_GmresSstep.hpp +++ b/packages/belos/tpetra/src/solvers/Belos_Tpetra_GmresSstep.hpp @@ -75,7 +75,7 @@ class CholQR { // Compute R := A^T * A, using a single BLAS call. // MV with "static" memory (e.g., Tpetra manages the static GPU memory pool) - MV R_mv = makeStaticLocalMultiVector (A, ncols, ncols); + MV R_mv = impl::makeStaticLocalMultiVector (A, ncols, ncols); //R_mv.putScalar (STS::zero ()); // compute R := A^T * A From d7989620ee723a8eed8e84b06f37a85f925c7e16 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Thu, 26 Sep 2024 13:28:10 -0500 Subject: [PATCH 10/38] Updated Framework AT2 job to use new runner label Signed-off-by: Anderson Chauphan --- .github/workflows/AT2.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 578d64ea1219..22711ab5fdd5 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -306,7 +306,7 @@ jobs: framework-tests-EXPERIMENTAL: needs: pre-checks - runs-on: [self-hosted, python-3.8] + runs-on: [self-hosted, python-3.9] if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} steps: - name: env @@ -393,4 +393,4 @@ jobs: echo "## Helpful Links" >> $GITHUB_STEP_SUMMARY echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY - \ No newline at end of file + From c8e2697dce74496d12c7fe468e62aced88fb167c Mon Sep 17 00:00:00 2001 From: Victor Brunini Date: Thu, 26 Sep 2024 13:41:08 -0600 Subject: [PATCH 11/38] belos: Improve tpetra MV pool strategy for case of multiple different size linear solves in a single run. --- .../tpetra/src/BelosMultiVecTraits_Tpetra.hpp | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp index d990262f9c81..1dea7d1ec5a1 100644 --- a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp +++ b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp @@ -102,15 +102,38 @@ class MultiVecPool using MV = ::Tpetra::MultiVector; Teuchos::RCP getMV(const Teuchos::RCP> & map, const int numVecs) { - auto key = std::make_pair(map->getLocalNumElements(), numVecs); - auto & available = availableDVs[key]; - if(!available.empty()) { - auto dv = available.back(); + const auto num_local_elems = map->getLocalNumElements(); + size_t total_size = num_local_elems * numVecs; + + // Use lower_bound so that we can re-use a slightly larger allocation if it is available + auto available_it = availableDVs.lower_bound(total_size); + while(available_it != availableDVs.end() && available_it->second.empty()) { + ++available_it; + } + if(available_it != availableDVs.end()) { + auto & available = available_it->second; + auto full_size_dv = available.back(); available.pop_back(); - return Teuchos::rcpWithDealloc(new MV(map, dv), RCPDeleter{available, dv}); + + using dv_t = typename MV::dual_view_type; + typename dv_t::t_dev mv_dev(full_size_dv.view_device().data(), num_local_elems, numVecs); + typename dv_t::t_host mv_host(full_size_dv.view_host().data(), num_local_elems, numVecs); + + return Teuchos::rcpWithDealloc(new MV(map, dv_t(mv_dev, mv_host)), RCPDeleter{available, full_size_dv}); } - dv_t dv("Belos::MultiVecPool DV", key.first, key.second); + // No sufficiently large allocations were found so we need to create a new one. + // Also remove the largest currently available allocation if there is one because it would be able + // to use the allocation we are adding instead. + auto available_rit = availableDVs.rbegin(); + while(available_rit != availableDVs.rend() && available_rit->second.empty()) { + ++available_rit; + } + if(available_rit != availableDVs.rend()) { + available_rit->second.pop_back(); + } + dv_t dv("Belos::MultiVecPool DV", num_local_elems, numVecs); + auto & available = availableDVs[total_size]; return Teuchos::rcpWithDealloc(new MV(map, dv), RCPDeleter{available, dv}); } @@ -127,7 +150,7 @@ class MultiVecPool std::vector & dv_pool; dv_t dv; }; - std::map, std::vector> availableDVs; + std::map> availableDVs; }; From 90ea917998c5541abd6d12c5b3363bbc9e7ef98a Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Fri, 27 Sep 2024 11:17:37 -0600 Subject: [PATCH 12/38] Tacho : fix compile warning on Mi300 --- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 08d77d163d25..694a1c796d8f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -1939,7 +1939,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrU, s0.colindU, s0.nzvalsU, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv @@ -1958,7 +1958,11 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_U = buffer_U.extent(0); + #if (ROCM_VERSION >= 60000) + rocsparse_spmv + #else rocsparse_spmv_ex + #endif (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrU, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -1971,7 +1975,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrL, s0.colindL, s0.nzvalsL, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv @@ -1990,7 +1994,11 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_L = buffer_L.extent(0); + #if (ROCM_VERSION >= 60000) + rocsparse_spmv + #else rocsparse_spmv_ex + #endif (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2003,7 +2011,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrU, s0.colindU, s0.nzvalsU, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace (transpose) - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv @@ -2022,7 +2030,11 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_L = buffer_L.extent(0); + #if (ROCM_VERSION >= 60000) + rocsparse_spmv + #else rocsparse_spmv_ex + #endif (rocsparseHandle, rocsparse_operation_transpose, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2491,7 +2503,7 @@ class NumericToolsLevelSet : public NumericToolsBase { auto vecY = ((nlvls-1-lvl)%2 == 0 ? vecW : vecL); if (s0.spmv_explicit_transpose) { status = - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv @@ -2505,7 +2517,7 @@ class NumericToolsLevelSet : public NumericToolsBase { &buffer_size_L, (void*)buffer_L.data()); } else { status = - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv @@ -2827,7 +2839,7 @@ class NumericToolsLevelSet : public NumericToolsBase { auto vecX = (lvl%2 == 0 ? vecU : vecW); auto vecY = (lvl%2 == 0 ? vecW : vecU); status = - #if ROCM_VERSION >= 50400 + #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) rocsparse_spmv_ex #else rocsparse_spmv From c03262c4cdb91111e4f68f115f27a36555729231 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Thu, 26 Sep 2024 08:33:37 -0600 Subject: [PATCH 13/38] Add action guaranteeing DCO signoff Does not allow for manual override like DCO check, but does not have the niceness of the DCO instructions. So keep both. Signed-off-by: Samuel E. Browne --- .github/workflows/per-commit.yml | 23 +++++++++++++++++++ .../test/utilities/check-commit-signoffs.sh | 18 +++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 .github/workflows/per-commit.yml create mode 100755 commonTools/test/utilities/check-commit-signoffs.sh diff --git a/.github/workflows/per-commit.yml b/.github/workflows/per-commit.yml new file mode 100644 index 000000000000..f316371f2743 --- /dev/null +++ b/.github/workflows/per-commit.yml @@ -0,0 +1,23 @@ +name: Per-Commit Checks + +on: + pull_request: + +permissions: + contents: read + +jobs: + DCO-signoff: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 + + - name: Search for signoff statements in commit messages + run: | + $GITHUB_WORKSPACE/commonTools/test/utilities/check-commit-signoffs.sh \ + origin/${{ github.event.pull_request.base.ref }} \ + ${{ github.event.pull_request.head.sha }} diff --git a/commonTools/test/utilities/check-commit-signoffs.sh b/commonTools/test/utilities/check-commit-signoffs.sh new file mode 100755 index 000000000000..2170eb59e29d --- /dev/null +++ b/commonTools/test/utilities/check-commit-signoffs.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +target_branch=${1} +source_branch=${2} + +estat=0 +for commit in $(git log --format=%H ${source_branch} --not ${target_branch}) +do + echo "Processing commit ${commit}" + git show -s --format=%B ${commit} | grep --extended-regexp --quiet "Signed-off-by:\s+\S+.*<\S+@\S+\.\S+>" \ + || { echo -e "Commit ${commit} does not contain the required DCO (https://developercertificate.org) sign-off!\nThe \"DCO\" check for this PR should have failed, and manual override is not permitted.\n" ; estat=1 ; } +done + +exit ${estat} From 223c841a84372140d3655a45a8efa3ec156e788b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 22:49:36 +0000 Subject: [PATCH 14/38] Bump github/codeql-action from 3.26.8 to 3.26.10 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.8 to 3.26.10. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/294a9d92911152fe08befb9ec03e240add280cb3...e2b3eafc8d227b0241d48be5f425d47c2d750a13) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7b51bbec8c75..bbf8e76c9799 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 + uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 + uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 3497f84e0df4..c8e39fa28f45 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 + uses: github/codeql-action/upload-sarif@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 with: sarif_file: results.sarif From 47474b98ba888183172aff4f102f6db7e4a1f0c2 Mon Sep 17 00:00:00 2001 From: Alan Williams Date: Mon, 30 Sep 2024 18:31:12 -0600 Subject: [PATCH 15/38] STK: Snapshot 09-30-24 18:31 from Sierra 5.21.5-353-g60799ad8 --- packages/stk/CHANGELOG.md | 3 + .../stk/stk_mesh/stk_mesh/base/BulkData.cpp | 4 +- .../stk/stk_mesh/stk_mesh/base/BulkData.hpp | 5 +- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp | 26 +++ .../stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp | 27 +-- .../stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp | 44 ++++- .../stk_mesh/baseImpl/NgpFieldBLASImpl.hpp | 184 +++++++++++++++--- .../morton_lbvh/CoarseSearchMortonLBVH.hpp | 1 - .../MeshUtilsForBoundingVolumes.cpp | 5 +- .../stk_mesh/ngp/ngpFieldBLASTest.cpp | 78 +++++++- .../stk_search/UnitTestCoarseSearch.cpp | 4 +- packages/stk/stk_util/stk_util/Version.hpp | 2 +- .../stk_util/stk_util/parallel/CMakeLists.txt | 5 + .../stk_util/registry/ProductRegistry.cpp | 2 +- 14 files changed, 317 insertions(+), 73 deletions(-) diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index cf98fdc51eaf..0715081353f9 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,8 @@ # CHANGELOG +5.21.5-1 (STK_VERSION 5210501) 9/27/2024 + stk_mesh: deprecate BulkData::relation_exist + 5.21.5 (STK_VERSION 5210500) 9/25/2024 general: Fixed MI300A unified memory build errors. stk_search: Turned off sorted results by default. diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp index 10a3401d3b53..ad975fddea5f 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp @@ -5799,7 +5799,8 @@ BulkData::copy_entity_fields(Entity src, Entity dst) src_mesh_idx.bucket_ordinal); } -bool +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Oct 2024 +STK_DEPRECATED bool BulkData::relation_exist( const Entity entity, EntityRank subcell_rank, RelationIdentifier subcell_id ) { bool found = false; @@ -5816,6 +5817,7 @@ BulkData::relation_exist( const Entity entity, EntityRank subcell_rank, Relation return found; } +#endif void BulkData::create_side_entities(const SideSet &sideSet, const stk::mesh::PartVector& parts) { diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp index 3d2323ecf578..ce1615918346 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp @@ -571,9 +571,10 @@ class BulkData { Entity e_to, const RelationIdentifier local_id ); - +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Oct 2024 // Check if entity has a specific relation to an entity of subcell_rank - inline bool relation_exist( const Entity entity, EntityRank subcell_rank, RelationIdentifier subcell_id ); + STK_DEPRECATED bool relation_exist( const Entity entity, EntityRank subcell_rank, RelationIdentifier subcell_id ); +#endif #ifndef STK_HIDE_DEPRECATED_CODE // Delete after Sept 2024 STK_DEPRECATED inline VolatileFastSharedCommMapOneRank const& volatile_fast_shared_comm_map(EntityRank rank) const; diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp index 1722d5955489..ceb6e6bdb27a 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp @@ -530,6 +530,32 @@ int get_entity_subcell_id(const BulkData& mesh, return INVALID_SIDE; } +void get_parts_with_topology(stk::topology topology, + stk::mesh::BulkData& mesh, + stk::mesh::PartVector& parts, + bool skip_topology_root_parts) +{ + parts.clear(); + + const stk::mesh::MetaData & fem_meta = mesh.mesh_meta_data(); + + const stk::mesh::PartVector& all_parts = fem_meta.get_parts(); + + stk::mesh::PartVector::const_iterator + iter = all_parts.begin(), + iter_end = all_parts.end(); + + for(; iter!=iter_end; ++iter) { + stk::mesh::Part* part = *iter; + if (fem_meta.get_topology(*part) == topology) { + if (skip_topology_root_parts && stk::mesh::is_topology_root_part(*part)) { + continue; + } + parts.push_back(part); + } + } +} + stk::mesh::Entity get_side_entity_for_elem_side_pair_of_rank(const stk::mesh::BulkData &bulk, Entity elem, int sideOrdinal, stk::mesh::EntityRank sideRank) { if(bulk.is_valid(elem)) diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp index 433218726254..b886026b8ecf 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp @@ -127,8 +127,7 @@ stk::topology get_subcell_nodes(const BulkData& mesh, const Entity entity , EntityRank subcell_rank , unsigned subcell_ordinal , - EntityVector & subcell_nodes - ); + EntityVector & subcell_nodes); /** \brief Given an entity and collection of nodes, return the * local id of the subcell that contains those nodes in the @@ -139,32 +138,10 @@ int get_entity_subcell_id( const BulkData& mesh, const Entity entity , stk::topology side_topology, const EntityVector & side_nodes ); -inline void get_parts_with_topology(stk::topology topology, stk::mesh::BulkData& mesh, stk::mesh::PartVector& parts, - bool skip_topology_root_parts=false) -{ - parts.clear(); - - const stk::mesh::MetaData & fem_meta = mesh.mesh_meta_data(); - - const stk::mesh::PartVector& all_parts = fem_meta.get_parts(); - - stk::mesh::PartVector::const_iterator - iter = all_parts.begin(), - iter_end = all_parts.end(); - - for(; iter!=iter_end; ++iter) { - stk::mesh::Part* part = *iter; - if (fem_meta.get_topology(*part) == topology) { - if (skip_topology_root_parts && stk::mesh::is_topology_root_part(*part)) { - continue; - } - parts.push_back(part); - } - } -} + bool skip_topology_root_parts=false); stk::mesh::Entity get_side_entity_for_elem_side_pair_of_rank(const stk::mesh::BulkData &bulk, Entity elem, int sideOrdinal, stk::mesh::EntityRank sideRank); stk::mesh::Entity get_side_entity_for_elem_side_pair(const stk::mesh::BulkData &bulk, Entity elem, int sideOrdinal); diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp index 6ff9aaa6fdf9..3d0d0036581d 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldBLAS.hpp @@ -65,7 +65,8 @@ void field_fill(const Scalar alpha, const EXEC_SPACE& execSpace, bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - ngp_field_blas::impl::field_fill_impl(alpha, field, component, &selector, execSpace, isDeviceExecSpaceUserOverride); + std::array field_array = {&field}; + ngp_field_blas::impl::field_fill_impl(alpha, field_array.data(), field_array.size(), component, &selector, execSpace, isDeviceExecSpaceUserOverride); } template @@ -75,7 +76,8 @@ void field_fill(const Scalar alpha, const EXEC_SPACE& execSpace, bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - ngp_field_blas::impl::field_fill_impl(alpha, field, -1, nullptr, execSpace, isDeviceExecSpaceUserOverride); + std::array field_array = {&field}; + ngp_field_blas::impl::field_fill_impl(alpha, field_array.data(), field_array.size(), -1, nullptr, execSpace, isDeviceExecSpaceUserOverride); } template @@ -86,9 +88,45 @@ void field_fill(const Scalar alpha, const EXEC_SPACE& execSpace, bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) { - ngp_field_blas::impl::field_fill_impl(alpha, field, -1, &selector, execSpace, isDeviceExecSpaceUserOverride); + std::array field_array = {&field}; + ngp_field_blas::impl::field_fill_impl(alpha, field_array.data(), field_array.size(), -1, &selector, execSpace, isDeviceExecSpaceUserOverride); } + +template +inline +void field_fill(const Scalar alpha, + const std::vector& fields, + int component, + const Selector& selector, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) +{ + ngp_field_blas::impl::field_fill_impl(alpha, fields.data(), fields.size(), component, &selector, execSpace, isDeviceExecSpaceUserOverride); +} + +template +inline +void field_fill(const Scalar alpha, + const std::vector& fields, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) +{ + ngp_field_blas::impl::field_fill_impl(alpha, fields.data(), fields.size(), -1, nullptr, execSpace, isDeviceExecSpaceUserOverride); +} + +template +inline +void field_fill(const Scalar alpha, + const std::vector& fields, + const Selector& selector, + const EXEC_SPACE& execSpace, + bool isDeviceExecSpaceUserOverride = (!std::is_same_v)) +{ + ngp_field_blas::impl::field_fill_impl(alpha, fields.data(), fields.size(), -1, &selector, execSpace, isDeviceExecSpaceUserOverride); +} + + template inline void field_copy(const FieldBase& xField, diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp index 60b2f0b264f1..1e33e8840f47 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp @@ -99,15 +99,50 @@ void mark_field_modified(const mesh::FieldBase& field, EXEC_SPACE execSpace, boo } } -template +template class FieldFill { public: - FieldFill(const NGP_FIELD_TYPE& field, Scalar inputAlpha) - : ngpField(field), alpha(inputAlpha) - {} + FieldFill(const NGP_FIELD_TYPE* fields, int fieldCount, Scalar inputAlpha) + : ngpFieldsDynamic("ngp_fields", 0), alpha(inputAlpha), nfields(fieldCount) + { + if (nfields <= STATIC_FIELD_LIMIT) + { + for (int i=0; i < nfields; ++i) + { + ngpFieldsStatic[i] = fields[i]; + } + } else + { + Kokkos::resize(ngpFieldsDynamic, nfields); + auto ngpFieldsDynamicHost = Kokkos::create_mirror_view(ngpFieldsDynamic); + for (int i=0; i < nfields; ++i) + { + ngpFieldsDynamicHost[i] = fields[i]; + } + Kokkos::deep_copy(ngpFieldsDynamic, ngpFieldsDynamicHost); + } + } KOKKOS_FUNCTION void operator()(const stk::mesh::FastMeshIndex& entityIndex) const + { + if (nfields <= STATIC_FIELD_LIMIT) + { + for (int i=0; i < nfields; ++i) + { + setComponents(ngpFieldsStatic[i], entityIndex); + } + } else + { + for (int i=0; i < nfields; ++i) + { + setComponents(ngpFieldsDynamic[i], entityIndex); + } + } + } + + KOKKOS_INLINE_FUNCTION + void setComponents(const NGP_FIELD_TYPE& ngpField, const stk::mesh::FastMeshIndex& entityIndex) const { const int numComponents = ngpField.get_num_components_per_entity(entityIndex); for(int component=0; component; + using FieldHostView = typename FieldView::HostMirror; + static constexpr int STATIC_FIELD_LIMIT = 4; + NGP_FIELD_TYPE ngpFieldsStatic[STATIC_FIELD_LIMIT]; + FieldView ngpFieldsDynamic; Scalar alpha; + int nfields; }; -template +template class FieldFillComponent { public: - FieldFillComponent(const NGP_FIELD_TYPE& field, Scalar inputAlpha, int inputComponent) - : ngpField(field), alpha(inputAlpha), component(inputComponent) - {} + static_assert(std::is_same_v> || + std::is_same_v>); + FieldFillComponent(const NGP_FIELD_TYPE* fields, int fieldCount, Scalar inputAlpha, int inputComponent) + : ngpFieldsDynamic("ngp_fields", 0), alpha(inputAlpha), component(inputComponent), nfields(fieldCount) + { + if (nfields <= STATIC_FIELD_LIMIT) + { + for (int i=0; i < nfields; ++i) + { + ngpFieldsStatic[i] = fields[i]; + } + } else + { + Kokkos::resize(ngpFieldsDynamic, nfields); + auto ngpFieldsDynamicHost = Kokkos::create_mirror_view(ngpFieldsDynamic); + for (int i=0; i < nfields; ++i) + { + ngpFieldsDynamicHost(i) = fields[i]; + } + + Kokkos::deep_copy(ngpFieldsDynamic, ngpFieldsDynamicHost); + } + } KOKKOS_FUNCTION void operator()(const stk::mesh::FastMeshIndex& entityIndex) const { - const int numComponents = ngpField.get_num_components_per_entity(entityIndex); - STK_NGP_ThrowRequire(component < numComponents); - ngpField(entityIndex, component) = alpha; + if (nfields <= STATIC_FIELD_LIMIT) + { + for (int i=0; i < nfields; ++i) + { + setComponent(ngpFieldsStatic[i], entityIndex); + } + } else + { + for (int i=0; i < nfields; ++i) + { + setComponent(ngpFieldsDynamic(i), entityIndex); + } + } } - NGP_FIELD_TYPE ngpField; + KOKKOS_INLINE_FUNCTION + void setComponent(const NGP_FIELD_TYPE& ngpField, const stk::mesh::FastMeshIndex& entityIndex) const + { + for (int i=0; i < nfields; ++i) + { + const int numComponents = ngpField.get_num_components_per_entity(entityIndex); + STK_NGP_ThrowRequire(component < numComponents); + ngpField(entityIndex, component) = alpha; + } + } + + using FieldView = Kokkos::View; + using FieldHostView = typename FieldView::HostMirror; + static constexpr int STATIC_FIELD_LIMIT = 4; + NGP_FIELD_TYPE ngpFieldsStatic[STATIC_FIELD_LIMIT]; + FieldView ngpFieldsDynamic; Scalar alpha; int component; + int nfields; }; template void field_fill_for_each_entity(const NGP_MESH_TYPE& ngpMesh, - const NGP_FIELD_TYPE& ngpField, + const NGP_FIELD_TYPE* ngpFields, + int nfields, Scalar alpha, int component, const stk::mesh::Selector& selector, const EXEC_SPACE& execSpace) { if (component == -1) { - FieldFill fieldFill(ngpField, alpha); - stk::mesh::for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, fieldFill, execSpace); + FieldFill fieldFill(ngpFields, nfields, alpha); + stk::mesh::for_each_entity_run(ngpMesh, ngpFields[0].get_rank(), selector, fieldFill, execSpace); } else { - FieldFillComponent fieldFill(ngpField, alpha, component); - stk::mesh::for_each_entity_run(ngpMesh, ngpField.get_rank(), selector, fieldFill, execSpace); + FieldFillComponent fieldFill(ngpFields, nfields, alpha, component); + stk::mesh::for_each_entity_run(ngpMesh, ngpFields[0].get_rank(), selector, fieldFill, execSpace); } } template void field_fill_impl(const Scalar alpha, - const stk::mesh::FieldBase& field, + const stk::mesh::FieldBase*const* fields, + int nfields, int component, const stk::mesh::Selector* selectorPtr, const EXEC_SPACE& execSpace, bool isDeviceExecSpaceUserOverride) { - field.clear_sync_state(); + STK_ThrowRequireMsg(nfields > 0, "must have one or more fields"); + for (int i=0; i < nfields; ++i) + { + fields[i]->clear_sync_state(); + } - std::unique_ptr fieldSelector; + stk::mesh::Selector fieldSelector; if (selectorPtr == nullptr) { - fieldSelector = std::make_unique(field); + fieldSelector = stk::mesh::Selector(*fields[0]); + for (int i=1; i < nfields; ++i) + { + fieldSelector &= stk::mesh::Selector(*fields[i]); + } + } else + { + fieldSelector = *selectorPtr; } - const stk::mesh::Selector& selector = selectorPtr != nullptr ? *selectorPtr : *(fieldSelector.get()); if constexpr (operate_on_ngp_mesh()) { - stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(field.get_mesh()); - stk::mesh::NgpField& ngpField = stk::mesh::get_updated_ngp_field(field); - field_fill_for_each_entity(ngpMesh, ngpField, alpha, component, selector, execSpace); + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(fields[0]->get_mesh()); + if (nfields == 1) + { + stk::mesh::NgpField ngpField = stk::mesh::get_updated_ngp_field(*fields[0]); + field_fill_for_each_entity(ngpMesh, &ngpField, nfields, alpha, component, fieldSelector, execSpace); + } else + { + std::vector> ngpFields; + for (int i=0; i < nfields; ++i) + { + ngpFields.push_back(stk::mesh::get_updated_ngp_field(*fields[i])); + } + field_fill_for_each_entity(ngpMesh, ngpFields.data(), nfields, alpha, component, fieldSelector, execSpace); + } } else { - stk::mesh::HostMesh hostMesh(field.get_mesh()); - stk::mesh::HostField hostField(field.get_mesh(), field); - field_fill_for_each_entity(hostMesh, hostField, alpha, component, selector, execSpace); + stk::mesh::HostMesh hostMesh(fields[0]->get_mesh()); + if (nfields == 1) + { + stk::mesh::HostField ngpField(fields[0]->get_mesh(), *fields[0]); + field_fill_for_each_entity(hostMesh, &ngpField, nfields, alpha, component, fieldSelector, execSpace); + } else + { + std::vector> ngpFields; + for (int i=0; i < nfields; ++i) + { + ngpFields.emplace_back(fields[i]->get_mesh(), *fields[i]); + + } + field_fill_for_each_entity(hostMesh, ngpFields.data(), nfields, alpha, component, fieldSelector, execSpace); + } } - mark_field_modified(field, execSpace, isDeviceExecSpaceUserOverride); + for (int i=0; i < nfields; ++i) + { + mark_field_modified(*fields[i], execSpace, isDeviceExecSpaceUserOverride); + } } template diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp index 36094b37baa0..1a0f729a82c3 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp @@ -297,7 +297,6 @@ inline void coarse_search_morton_lbvh( using DomainBoxType = typename DomainView::value_type::box_type; using RangeBoxType = typename RangeView::value_type::box_type; using RangeIdentProcType = typename RangeView::value_type::ident_proc_type; - using ValueType = typename DomainBoxType::value_type; using BoundingShapeIntersectionChecker = impl::BoundingShapeIntersectionCheckFunctor; using ExtendedRangeBoxView = Kokkos::View; using ExtendedRangeIdentProcView = Kokkos::View; diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp index 051f1452cd5e..b7ea5900b9ad 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/MeshUtilsForBoundingVolumes.cpp @@ -246,6 +246,7 @@ void writeExodusFileUsingBoxes(const std::vector& boxes, const std::st } const int num_nodes_per_elem = 8; + const int num_edges_per_elem = 0, num_faces_per_elem = 0; const int num_attr = 0; const unsigned num_elements = boxes.size(); std::vector numElementsPerBlock; @@ -260,7 +261,7 @@ void writeExodusFileUsingBoxes(const std::vector& boxes, const std::st for (int blockId=1;blockId<=num_blocks;blockId++) { const int num_elements_this_block = numElementsPerBlock[blockId-1]; - ex_put_elem_block(exoid, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_attr); + ex_put_block(exoid, EX_ELEM_BLOCK, blockId, "HEX", num_elements_this_block, num_nodes_per_elem, num_edges_per_elem, num_faces_per_elem, num_attr); for (int j=0;j& boxes, const std::st } offset += num_elements_this_block*num_nodes_per_elem; - ex_put_elem_conn(exoid, blockId, connect.data()); + ex_put_conn(exoid, EX_ELEM_BLOCK, blockId, connect.data(), nullptr, nullptr); } ex_close(exoid); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp index 8a079cc6ea68..c257449706fa 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp @@ -76,6 +76,8 @@ class NgpFieldBLAS : public stk::unit_test_util::MeshFixture stkField1 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField1", numStates); stkField2 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField2", numStates); stkField3 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField3", numStates); + stkField4 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField4", numStates); + stkField5 = &get_meta().declare_field(stk::topology::ELEM_RANK, "variableLengthField5", numStates); stk::mesh::Part& block1 = get_meta().declare_part_with_topology("block_1", stk::topology::HEX_8); stk::mesh::Part& block2 = get_meta().declare_part_with_topology("block_2", stk::topology::HEX_8); @@ -99,6 +101,19 @@ class NgpFieldBLAS : public stk::unit_test_util::MeshFixture const std::vector init6(numComponent2, -2); stk::mesh::put_field_on_mesh(*stkField3, block2, numComponent2, init6.data()); + + const std::vector init7(numComponent1, -1); + stk::mesh::put_field_on_mesh(*stkField4, block1, numComponent1, init7.data()); + + const std::vector init8(numComponent2, -2); + stk::mesh::put_field_on_mesh(*stkField4, block2, numComponent2, init8.data()); + + const std::vector init9(numComponent1, -1); + stk::mesh::put_field_on_mesh(*stkField5, block1, numComponent1, init9.data()); + + const std::vector init10(numComponent2, -2); + stk::mesh::put_field_on_mesh(*stkField5, block2, numComponent2, init10.data()); + const std::string meshDesc = "0,1,HEX_8,1,2,3,4,5,6,7,8,block_1\n" "0,2,HEX_8,5,6,7,8,9,10,11,12,block_1\n" "0,3,HEX_8,9,13,14,15,16,17,18,19,block_2\n" @@ -114,10 +129,8 @@ class NgpFieldBLAS : public stk::unit_test_util::MeshFixture stk::mesh::Field* stkField1 = nullptr; stk::mesh::Field* stkField2 = nullptr; stk::mesh::Field* stkField3 = nullptr; - - stk::mesh::Field* stkNodeField1 = nullptr; - stk::mesh::Field* stkNodeField2 = nullptr; - stk::mesh::Field* stkNodeField3 = nullptr; + stk::mesh::Field* stkField4 = nullptr; + stk::mesh::Field* stkField5 = nullptr; }; class NgpFieldBLASNode : public stk::unit_test_util::MeshFixture @@ -226,6 +239,33 @@ TEST_F(NgpFieldBLAS, field_fill_device) ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue); } +TEST_F(NgpFieldBLAS, field_fill_device_multiple) +{ + if (get_parallel_size() != 1) { GTEST_SKIP(); } + + const double myConstantValue = 55.5; + + std::vector allFields = {stkField1, stkField2, stkField3, stkField4, stkField5}; + stk::mesh::field_fill(myConstantValue, allFields, stk::ngp::ExecSpace()); + + auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); + for (const stk::mesh::FieldBase* field : allFields) + { + EXPECT_TRUE(field->need_sync_to_host()); + + stk::mesh::NgpField& ngpField = stk::mesh::get_updated_ngp_field(*field); + + stk::mesh::Selector selector(*field); + ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField, selector, myConstantValue); + + const double initialValue = -1; + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *field, selector, initialValue); + + field->sync_to_host(); + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *field, selector, myConstantValue); + } +} + TEST_F(NgpFieldBLAS, field_fill_selector_device) { if (get_parallel_size() != 1) { GTEST_SKIP(); } @@ -267,6 +307,36 @@ TEST_F(NgpFieldBLAS, field_fill_component_selector_device) ngp_field_test_utils::check_field_data_on_host(get_bulk(), *stkField1, selector, myConstantValue, component, myConstantComponentValue); } +TEST_F(NgpFieldBLAS, field_fill_device_component_multiple) +{ + if (get_parallel_size() != 1) { GTEST_SKIP(); } + + const int component = 1; + const double myConstantComponentValue = 15.5; + const double myConstantValue = 55.5; + + std::vector allFields = {stkField1, stkField2, stkField3, stkField4, stkField5}; + stk::mesh::Part& block2 = *get_meta().get_part("block_2"); + stk::mesh::Selector selector = block2; + stk::mesh::field_fill(myConstantValue, allFields, selector, stk::ngp::ExecSpace()); + stk::mesh::field_fill(myConstantComponentValue, allFields, component, selector, stk::ngp::ExecSpace()); + + auto ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); + for (const stk::mesh::FieldBase* field : allFields) + { + EXPECT_TRUE(field->need_sync_to_host()); + + stk::mesh::NgpField& ngpField = stk::mesh::get_updated_ngp_field(*field); + + ngp_field_test_utils::check_field_data_on_device(ngpMesh, ngpField, selector, myConstantValue, component, myConstantComponentValue); + + field->sync_to_host(); + + ngp_field_test_utils::check_field_data_on_host(get_bulk(), *field, selector, myConstantValue, component, myConstantComponentValue); + } +} + + TEST_F(NgpFieldBLAS, field_fill_host_ngp) { if (get_parallel_size() != 1) { GTEST_SKIP(); } diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp index 290868f7b1f1..d609ce5df8c2 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp @@ -403,8 +403,6 @@ void device_local_test_coarse_search_for_algorithm(stk::search::SearchMethod alg Kokkos::View::HostMirror hostIntersections = Kokkos::create_mirror_view(intersections); Kokkos::deep_copy(hostIntersections, intersections); - Kokkos::sort(hostIntersections); - local_expect_search_results(hostIntersections); } @@ -725,7 +723,7 @@ void test_ident_proc_with_search_with_views(stk::search::SearchMethod searchMeth ASSERT_EQ(3u, searchResults.extent(0)); } - Kokkos::sort(goldResults); + Kokkos::sort(goldResults, stk::search::Comparator()); for (size_t i = 0; i < goldResults.extent(0); i++) { EXPECT_EQ(goldResults[i], searchResults[i]) diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 297266908285..aedfb9cb959f 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5210500 +#define STK_VERSION 5210501 namespace stk diff --git a/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt b/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt index 729bc81c16d4..e5a99525b73a 100644 --- a/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt +++ b/packages/stk/stk_util/stk_util/parallel/CMakeLists.txt @@ -76,6 +76,11 @@ ELSE() ENDIF() target_link_libraries(stk_util_parallel PUBLIC stk_util_util) + + IF(${PROJECT_NAME}_ENABLE_SEACASAprepro_lib) + find_package(SEACASAprepro_lib REQUIRED) + target_link_libraries(stk_util_parallel PUBLIC SEACASAprepro_lib::aprepro_lib) + ENDIF() ENDIF() target_include_directories(stk_util_parallel PUBLIC diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index 32c5615621f3..48109fc4c112 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.21.5-241-g354b4bbc" +#define STK_VERSION_STRING "5.21.5-353-g60799ad8" #endif namespace stk { From 516273a038d6592655348a4f63c00010324d1d00 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:31:28 +0000 Subject: [PATCH 16/38] Bump actions/checkout from 4.1.7 to 4.2.0 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.7 to 4.2.0. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/692973e3d937129bcbf40652eb9f2f61becf3332...d632683dd7b4114ad314bca15554477dd762a938) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/AT2.yml | 6 +++--- .github/workflows/clang_format.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/dependency-review.yml | 2 +- .github/workflows/detect-git-lfs.yml | 2 +- .github/workflows/detect-mpi-comm-world.yml | 2 +- .github/workflows/per-commit.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/spack.yml | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index f02eb8b30a48..07ea66df88ae 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -55,7 +55,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 - name: Repo status @@ -146,7 +146,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 - name: Repo status @@ -237,7 +237,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 - name: Repo status diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 32a2ea121a9c..9582304d9654 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - uses: DoozyX/clang-format-lint-action@c71d0bf4e21876ebec3e5647491186f8797fde31 # v0.18.2 with: source: './packages/muelu ./packages/tempus ./packages/teko ./packages/xpetra' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index bbf8e76c9799..eb12d791cf40 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -58,7 +58,7 @@ jobs: # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index c1209291aab7..5c03af260a6c 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -22,6 +22,6 @@ jobs: egress-policy: audit - name: 'Checkout Repository' - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: 'Dependency Review' uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 diff --git a/.github/workflows/detect-git-lfs.yml b/.github/workflows/detect-git-lfs.yml index 9fe410cacdd3..25f52a8aa30a 100644 --- a/.github/workflows/detect-git-lfs.yml +++ b/.github/workflows/detect-git-lfs.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 diff --git a/.github/workflows/detect-mpi-comm-world.yml b/.github/workflows/detect-mpi-comm-world.yml index a3374b6d4fa3..80414fd34c73 100644 --- a/.github/workflows/detect-mpi-comm-world.yml +++ b/.github/workflows/detect-mpi-comm-world.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 diff --git a/.github/workflows/per-commit.yml b/.github/workflows/per-commit.yml index f316371f2743..91fe1c8126d0 100644 --- a/.github/workflows/per-commit.yml +++ b/.github/workflows/per-commit.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 0 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index c8e39fa28f45..db665933d980 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -31,7 +31,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: persist-credentials: false diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index 6d7b43e1f58e..51a8b71e8049 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -24,7 +24,7 @@ jobs: with: access_token: ${{ github.token }} - name: Clone Trilinos - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: fetch-depth: 1 - name: Spack build From 1d24c17a5677672dade3e0f5dbe7ef23c4c85841 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 1 Oct 2024 11:36:00 -0600 Subject: [PATCH 17/38] tpetra: compatibility update with KOKKOS_VERSION>=40499 Compatibility update with kokkos@develop changes merged in kokkos/kokkos#7394 Signed-off-by: Nathan Ellingwood --- packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp | 4 ++++ .../src/Tpetra_Details_KokkosTeuchosTimerInjection.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp index 912a0c532eda..cd8dadd24ed1 100644 --- a/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp +++ b/packages/tpetra/core/src/Tpetra_Details_KokkosCounter.cpp @@ -88,7 +88,11 @@ namespace Details { // Figure out what count bin to stick this in int idx = (int) eid.type; +#if KOKKOS_VERSION >= 40499 + if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization)) +#else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization)) +#endif count_global[idx]++; else count_instance[idx]++; diff --git a/packages/tpetra/core/src/Tpetra_Details_KokkosTeuchosTimerInjection.cpp b/packages/tpetra/core/src/Tpetra_Details_KokkosTeuchosTimerInjection.cpp index e37c7a5b29a6..9379aaf0ce3a 100644 --- a/packages/tpetra/core/src/Tpetra_Details_KokkosTeuchosTimerInjection.cpp +++ b/packages/tpetra/core/src/Tpetra_Details_KokkosTeuchosTimerInjection.cpp @@ -42,11 +42,17 @@ namespace { else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC"; else if (eid.type == DeviceType::Unknown) device_label+="Unknown"; else device_label+="Unknown to Tpetra"; - +#if KOKKOS_VERSION >= 40499 + if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization)) + device_label += " All Instances)"; + else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization)) + device_label += " DeepCopyResource)"; +#else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization)) device_label += " All Instances)"; else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization)) device_label += " DeepCopyResource)"; +#endif else device_label += " Instance " + std::to_string(eid.instance_id) + ")"; From a1bd08ffc344d3ed2efbbf75390f6e6f831e2f23 Mon Sep 17 00:00:00 2001 From: Curtis Ober Date: Tue, 1 Oct 2024 15:53:09 -0600 Subject: [PATCH 18/38] Update SECURITY.md --- SECURITY.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 3cca180ccf1c..e3ae00ff5c7d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,4 +6,20 @@ The latest released version of Trilinos is the only version supported with secur ## Reporting a Vulnerability -For run-of-the mill memory errors, etc., please file a Github issue. For specific, sensitive security issues, please click on "Report a vulnerability" at https://github.com/trilinos/Trilinos/security. +For run-of-the mill memory errors, etc., please file a Github issue. + +If you discover a security vulnerability within Trilinos, please follow these steps to report it: + +1. **Do Not Create a Public Issue**: Please do not disclose the vulnerability publicly until it has been addressed. +2. **Contact Us**: Please click on "Report a vulnerability" at https://github.com/trilinos/Trilinos/security. +3. **Provide Details**: Include as much information as possible about the vulnerability, including: + - A description of the vulnerability + - Steps to reproduce the issue + - Any relevant logs or screenshots + - Your contact information (optional, but helpful for follow-up) + +## Response Process + +We will acknowledge your report within 5 days and aim to provide a resolution or further information within 30 days. + +We appreciate your help in keeping our project secure! From 96b9685dd30047d2447d087a92ab84b3991f2fb5 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 1 Oct 2024 16:48:33 -0600 Subject: [PATCH 19/38] Tacho : clean up rocsparse_spmv function definition --- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 65 +++++-------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 694a1c796d8f..322151e6015a 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -78,6 +78,14 @@ #else #define TACHO_CUSPARSE_SPMM_ALG CUSPARSE_MM_ALG_DEFAULT #endif +#elif defined(KOKKOS_ENABLE_HIP) + #if (ROCM_VERSION >= 60000) + #define tacho_rocsparse_spmv rocsparse_spmv + #elif (ROCM_VERSION >= 50400) + #define tacho_rocsparse_spmv rocsparse_spmv_ex + #else + #define tacho_rocsparse_spmv rocsparse_spmv + #endif #endif namespace Tacho { @@ -1939,11 +1947,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrU, s0.colindU, s0.nzvalsU, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrU, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -1958,11 +1962,7 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_U = buffer_U.extent(0); - #if (ROCM_VERSION >= 60000) - rocsparse_spmv - #else - rocsparse_spmv_ex - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrU, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -1975,11 +1975,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrL, s0.colindL, s0.nzvalsL, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -1994,11 +1990,7 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_L = buffer_L.extent(0); - #if (ROCM_VERSION >= 60000) - rocsparse_spmv - #else - rocsparse_spmv_ex - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2011,11 +2003,7 @@ class NumericToolsLevelSet : public NumericToolsBase { s0.rowptrU, s0.colindU, s0.nzvalsU, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); // workspace (transpose) - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_transpose, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2030,11 +2018,7 @@ class NumericToolsLevelSet : public NumericToolsBase { #if ROCM_VERSION >= 50400 // preprocess buffer_size_L = buffer_L.extent(0); - #if (ROCM_VERSION >= 60000) - rocsparse_spmv - #else - rocsparse_spmv_ex - #endif + tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_transpose, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2502,12 +2486,7 @@ class NumericToolsLevelSet : public NumericToolsBase { auto vecX = ((nlvls-1-lvl)%2 == 0 ? vecL : vecW); auto vecY = ((nlvls-1-lvl)%2 == 0 ? vecW : vecL); if (s0.spmv_explicit_transpose) { - status = - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + status = tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrL, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2516,12 +2495,7 @@ class NumericToolsLevelSet : public NumericToolsBase { #endif &buffer_size_L, (void*)buffer_L.data()); } else { - status = - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + status = tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_transpose, &alpha, s0.descrL, vecX, &beta, vecY, // dscrL stores the same ptrs as descrU, but optimized for trans rocsparse_compute_type, rocsparse_spmv_alg_default, @@ -2838,12 +2812,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } auto vecX = (lvl%2 == 0 ? vecU : vecW); auto vecY = (lvl%2 == 0 ? vecW : vecU); - status = - #if (ROCM_VERSION >= 50400 && ROCM_VERSION < 60000) - rocsparse_spmv_ex - #else - rocsparse_spmv - #endif + status = tacho_rocsparse_spmv (rocsparseHandle, rocsparse_operation_none, &alpha, s0.descrU, vecX, &beta, vecY, rocsparse_compute_type, rocsparse_spmv_alg_default, From a97e508e222919ef421b3b5b96337dc9caff296f Mon Sep 17 00:00:00 2001 From: Jonathan Hu Date: Tue, 1 Oct 2024 17:51:55 -0700 Subject: [PATCH 20/38] Tpetra: fix doxygen reference --- packages/tpetra/core/src/Tpetra_Map_decl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Map_decl.hpp b/packages/tpetra/core/src/Tpetra_Map_decl.hpp index 8f97f7b1d71f..2ea5ee6f343e 100644 --- a/packages/tpetra/core/src/Tpetra_Map_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_Map_decl.hpp @@ -643,9 +643,9 @@ namespace Tpetra { /// Teuchos::OrdinalTraits::invalid(). global_ordinal_type getGlobalElement (local_ordinal_type localIndex) const; - /// \brief Get the local Map for Kokkos kernels. + /// \brief Get the LocalMap for Kokkos-Kernels. /// - /// \warning The interface of the local Map object is SUBJECT TO + /// \warning The interface of the LocalMap object is SUBJECT TO /// CHANGE and is for EXPERT USERS ONLY. local_map_type getLocalMap () const; From 2af9e8748819503f961606e842c16dcdac206afa Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Tue, 1 Oct 2024 21:25:59 -0600 Subject: [PATCH 21/38] Add DCO instructions Signed-off-by: Samuel E. Browne --- CONTRIBUTING.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 23e6c1bb7e88..9170250a8ab5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,3 +78,17 @@ When your changes are ready to be integrated into Trilinos' `develop` branch: ### Feedback At this point you'll enter into a stage where you and various Trilinos developers will iterate back and forth until your changes are in an acceptable state and can be merged in. If you need to make changes to your pull request, make additional commits on your `` branch and push them up to your fork. Make sure you don't delete your remote feature branch or your fork of Trilinos before your pull request has been merged. + + +## Sign-off Your Work + +The Developer Certificate of Origin ([DCO](https://developercertificate.org)) is a lightweight way for contributors to certify that they wrote or otherwise have the right to submit the code they are contributing to the project. Contributors must sign-off that they adhere to these requirements by adding a Signed-off-by line to commit messages. + +Example: +``` +This is a commit message + +Signed-off-by: John A. Doe +``` + +See [`git commit --signoff`](https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff) From 8666c92c6cb31065904efb4f22a74e4b3b0f23a7 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Wed, 2 Oct 2024 07:35:17 -0600 Subject: [PATCH 22/38] Fix Fortan -> Fortran typo Signed-off-by: Samuel E. Browne --- commonTools/buildTools/external/makefileSupport.mak | 2 +- packages/TrilinosInstallTests/CMakeLists.txt | 8 ++++---- packages/zoltan/config/zac_arg_config_mpi.m4 | 2 +- packages/zoltan/configure | 2 +- packages/zoltan/docs/dev_html/dev_view.html | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/commonTools/buildTools/external/makefileSupport.mak b/commonTools/buildTools/external/makefileSupport.mak index 11b8849e0e63..b4788d29b5c4 100644 --- a/commonTools/buildTools/external/makefileSupport.mak +++ b/commonTools/buildTools/external/makefileSupport.mak @@ -78,7 +78,7 @@ EXTERNAL_INCLUDES += $(EXTERNAL_INCL_DIR) # Linker Options EXTERNAL_LDFLAGS = $(TEUCHOS_LDFLAGS) $(TEUCHOS_LIBS) -# EXTERNAL_C, EXTERNAL_C++ and Fortan compiler options +# EXTERNAL_C, EXTERNAL_C++ and Fortran compiler options EXTERNAL_CFLAGS = $(TEUCHOS_CFLAGS) EXTERNAL_CXXFLAGS = $(TEUCHOS_CXXFLAGS) diff --git a/packages/TrilinosInstallTests/CMakeLists.txt b/packages/TrilinosInstallTests/CMakeLists.txt index 4f7767a56e49..9592aad4d8de 100644 --- a/packages/TrilinosInstallTests/CMakeLists.txt +++ b/packages/TrilinosInstallTests/CMakeLists.txt @@ -58,7 +58,7 @@ tribits_add_advanced_test(reduced_tarball -D${CMAKE_PROJECT_NAME}_CONFIGURE_OPTIONS_FILE=${${CMAKE_PROJECT_NAME}_CONFIGURE_OPTIONS_FILE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_Fortan_COMPILER=${CMAKE_Fortran_COMPILER} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -D${CMAKE_PROJECT_NAME}_ENABLE_ALL_PACKAGES=ON -D${CMAKE_PROJECT_NAME}_ASSERT_DEFINED_DEPENDENCIES=OFF ../trilinos-${Trilinos_VERSION}-Source @@ -112,7 +112,7 @@ tribits_add_advanced_test(simpleBuildAgainstTrilinos_by_package_build_tree ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_Fortan_COMPILER=${CMAKE_Fortran_COMPILER} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_PREFIX_PATH=${PROJECT_BINARY_DIR}/cmake_packages -DCMAKE_BUILD_TYPE=DEBUG simpleBuildAgainstTrilinos @@ -191,7 +191,7 @@ tribits_add_advanced_test(find_package_Trilinos ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_Fortan_COMPILER=${CMAKE_Fortran_COMPILER} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_PREFIX_PATH=${PROJECT_BINARY_DIR}/install ${CMAKE_CURRENT_SOURCE_DIR}/find_package_Trilinos PASS_REGULAR_EXPRESSION_ALL @@ -234,7 +234,7 @@ tribits_add_advanced_test(simpleBuildAgainstTrilinos ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_Fortan_COMPILER=${CMAKE_Fortran_COMPILER} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_PREFIX_PATH=${PROJECT_BINARY_DIR}/install -DCMAKE_BUILD_TYPE=DEBUG ${PROJECT_SOURCE_DIR}/demos/simpleBuildAgainstTrilinos diff --git a/packages/zoltan/config/zac_arg_config_mpi.m4 b/packages/zoltan/config/zac_arg_config_mpi.m4 index 796d38bd6a58..aa56a332b5de 100644 --- a/packages/zoltan/config/zac_arg_config_mpi.m4 +++ b/packages/zoltan/config/zac_arg_config_mpi.m4 @@ -177,7 +177,7 @@ if test X${SEEK_MPI_COMPILERS} = Xyes; then # F77=${MPI_F77} # else # echo "-----" -# echo "Cannot find MPI Fortan 77 compiler." +# echo "Cannot find MPI Fortran 77 compiler." # echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH," # echo "or specify a path to top mpi directory (above bin) with --with-mpi=PATH," # echo "or specify a fortran 77 compiler using F77=" diff --git a/packages/zoltan/configure b/packages/zoltan/configure index c0f33f40bacf..4d6a7db6f539 100755 --- a/packages/zoltan/configure +++ b/packages/zoltan/configure @@ -3333,7 +3333,7 @@ test -n "$MPI_CXX" || MPI_CXX="notFound" # F77=${MPI_F77} # else # echo "-----" -# echo "Cannot find MPI Fortan 77 compiler." +# echo "Cannot find MPI Fortran 77 compiler." # echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH," # echo "or specify a path to top mpi directory (above bin) with --with-mpi=PATH," # echo "or specify a fortran 77 compiler using F77=" diff --git a/packages/zoltan/docs/dev_html/dev_view.html b/packages/zoltan/docs/dev_html/dev_view.html index d379704727f9..275c2add7255 100644 --- a/packages/zoltan/docs/dev_html/dev_view.html +++ b/packages/zoltan/docs/dev_html/dev_view.html @@ -111,7 +111,7 @@

any value p. Note that since the window may be resized with the mouse, you may not need image height and image width unless you must have a very specific window size. Also note that if you -ran the Fortan test driver zfdrive, you will need to rename the +ran the Fortran test driver zfdrive, you will need to rename the output files from file_name.fout.p.n to file_name.out.p.n. From 26c1ba928a87730bb2b6670623ecc1ba80b65bb2 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 2 Oct 2024 08:01:02 -0600 Subject: [PATCH 23/38] MueLu: Provide a version of ML_Epetra::SetDefaults that does not require ML (#13489) * MueLu: Adding ML_Epetra::SetDefaults support to MueLu (for when you don't have ML) * MueLu: Adding unit test for SetDefaults * MueLu: Fixes to parameter translator * MueLu: Hail Kang! --- .../MueLu_ML2MueLuParameterTranslator.cpp | 416 +++++++++++++++++- .../MueLu_ML2MueLuParameterTranslator.hpp | 82 ++++ packages/muelu/test/unit_tests/CMakeLists.txt | 1 + .../ML2MueLuParameterTranslator.cpp | 166 +++++++ 4 files changed, 662 insertions(+), 3 deletions(-) create mode 100644 packages/muelu/test/unit_tests/ML2MueLuParameterTranslator.cpp diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp index 7589c6830464..b0d19e3fdcb6 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.cpp @@ -18,10 +18,11 @@ #endif #include +using Teuchos::ParameterList; namespace MueLu { -std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList& paramList, Teuchos::ParameterList& adaptingParamList, const std::string& pname, const std::string& value) { +std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::ParameterList ¶mList, Teuchos::ParameterList &adaptingParamList, const std::string &pname, const std::string &value) { TEUCHOS_TEST_FOR_EXCEPTION(pname != "coarse: type" && pname != "coarse: list" && pname != "smoother: type" && pname.find("smoother: list", 0) != 0, Exceptions::RuntimeError, "MueLu::MLParameterListInterpreter::Setup(): Only \"coarse: type\", \"smoother: type\" or \"smoother: list\" (\"coarse: list\") are " @@ -308,7 +309,7 @@ std::string ML2MueLuParameterTranslator::GetSmootherFactory(const Teuchos::Param return mueluss.str(); } -std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList& paramList_in, const std::string& defaultVals) { +std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::ParameterList ¶mList_in, const std::string &defaultVals) { Teuchos::ParameterList paramList = paramList_in; RCP out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); // TODO: use internal out (GetOStream()) @@ -407,7 +408,7 @@ std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::Paramet // loop over all ML parameters in provided parameter list for (ParameterList::ConstIterator param = paramListWithSubList.begin(); param != paramListWithSubList.end(); ++param) { // extract ML parameter name - const std::string& pname = paramListWithSubList.name(param); + const std::string &pname = paramListWithSubList.name(param); // extract corresponding (ML) value // remove ParameterList specific information from result string @@ -508,4 +509,413 @@ std::string ML2MueLuParameterTranslator::SetParameterList(const Teuchos::Paramet return mueluss.str(); } +static void ML_OverwriteDefaults(ParameterList &inList, ParameterList &List, bool OverWrite) { + ParameterList *coarseList = 0; + // Don't create the coarse list if it doesn't already exist! + if (inList.isSublist("coarse: list")) + coarseList = &(inList.sublist("coarse: list")); + for (ParameterList::ConstIterator param = List.begin(); param != List.end(); param++) { + std::string pname = List.name(param); + if (coarseList && pname.find("coarse: ", 0) != std::string::npos) { + if (!coarseList->isParameter(pname) || OverWrite) + coarseList->setEntry(pname, List.entry(param)); + } else if (!inList.isParameter(pname) || OverWrite) { + inList.setEntry(pname, List.entry(param)); + } + } +} // ML_OverwriteDefaults() + +static int UpdateList(Teuchos::ParameterList &source, Teuchos::ParameterList &dest, bool OverWrite) { + for (Teuchos::ParameterList::ConstIterator param = source.begin(); param != source.end(); param++) + if (dest.isParameter(source.name(param)) == false || OverWrite) + dest.setEntry(source.name(param), source.entry(param)); + return 0; +} + +int ML2MueLuParameterTranslator::SetDefaults(std::string ProblemType, Teuchos::ParameterList &List, + int *ioptions, double *iparams, const bool OverWrite) { + Teuchos::RCP > options; + Teuchos::RCP > params; + + // Taken from AztecOO + const int MUELU_AZ_OPTIONS_SIZE = 47; + const int MUELU_AZ_PARAMS_SIZE = 30; + + /*bool SetDefaults = false; + if (ioptions == NULL || iparams == NULL) + SetDefaults = true;*/ + + if (ioptions == NULL) + options = rcp(new std::vector(MUELU_AZ_OPTIONS_SIZE)); + else + options = rcp(new std::vector(ioptions, ioptions + MUELU_AZ_OPTIONS_SIZE)); + if (iparams == NULL) + params = rcp(new std::vector(MUELU_AZ_PARAMS_SIZE)); + else + params = rcp(new std::vector(iparams, iparams + MUELU_AZ_PARAMS_SIZE)); + + // if (SetDefaults) + // AZ_defaults(&(*options)[0],&(*params)[0]); + + if (ProblemType == "SA") { + SetDefaultsSA(List, options, params, OverWrite); + } else if (ProblemType == "DD") { + SetDefaultsDD(List, options, params, OverWrite); + } else if (ProblemType == "DD-ML") { + SetDefaultsDD_3Levels(List, options, params, OverWrite); + } else if (ProblemType == "maxwell" || ProblemType == "Maxwell") { + SetDefaultsMaxwell(List, options, params, OverWrite); + } else if (ProblemType == "NSSA") { + SetDefaultsNSSA(List, options, params, OverWrite); + } else if (ProblemType == "DD-ML-LU") { + SetDefaultsDD_3Levels_LU(List, options, params, OverWrite); + } else if (ProblemType == "DD-LU") { + SetDefaultsDD_LU(List, options, params, OverWrite); + } else if (ProblemType == "Classical-AMG") { + SetDefaultsClassicalAMG(List, options, params, OverWrite); + } else { + std::cerr << "ERROR: Wrong input parameter in `SetDefaults' (" + << ProblemType << "). Should be: " << std::endl + << "ERROR: /
/ / " << std::endl; + } + + return (0); +} + +int ML2MueLuParameterTranslator::SetDefaultsSA(ParameterList &inList, + Teuchos::RCP > & /* options */, + Teuchos::RCP > & /* params */, + bool OverWrite) { + ParameterList List; + + inList.setName("SA default values"); + List.set("default values", "SA"); + List.set("max levels", 10); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "Uncoupled-MIS"); + List.set("aggregation: damping factor", 1.333); + List.set("eigen-analysis: type", "cg"); + List.set("eigen-analysis: iterations", 10); + + List.set("smoother: sweeps", 2); + List.set("smoother: damping factor", 1.0); + List.set("smoother: pre or post", "both"); + List.set("smoother: type", "symmetric Gauss-Seidel"); + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + List.set("coarse: split communicator", false); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsSA() + +int ML2MueLuParameterTranslator::SetDefaultsDD(ParameterList &inList, + Teuchos::RCP > &options, + Teuchos::RCP > ¶ms, + bool OverWrite) { + ParameterList List; + + inList.setName("DD default values"); + List.set("default values", "DD"); + List.set("max levels", 2); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "METIS"); + List.set("aggregation: local aggregates", 1); + List.set("aggregation: damping factor", 1.333); + List.set("eigen-analysis: type", "power-method"); + List.set("eigen-analysis: iterations", 20); + + List.set("smoother: sweeps", 1); + List.set("smoother: pre or post", "both"); + /*#ifdef HAVE_ML_AZTECOO + List.set("smoother: type","Aztec"); + (*options)[AZ_precond] = AZ_dom_decomp; + (*options)[AZ_subdomain_solve] = AZ_ilu; + List.set("smoother: Aztec options",options); + List.set("smoother: Aztec params",params); + List.set("smoother: Aztec as solver",false); + #endif*/ + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsDD() + +int ML2MueLuParameterTranslator::SetDefaultsDD_3Levels(ParameterList &inList, + Teuchos::RCP > &options, + Teuchos::RCP > ¶ms, + bool OverWrite) { + ParameterList List; + + inList.setName("DD-ML default values"); + List.set("default values", "DD-ML"); + + List.set("max levels", 3); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "METIS"); + List.set("aggregation: nodes per aggregate", 512); + List.set("aggregation: next-level aggregates per process", 128); + List.set("aggregation: damping factor", 1.333); + List.set("eigen-analysis: type", "power-method"); + List.set("eigen-analysis: iterations", 20); + + List.set("smoother: sweeps", 1); + List.set("smoother: pre or post", "both"); + /*#ifdef HAVE_ML_AZTECOO + List.set("smoother: type","Aztec"); + (*options)[AZ_precond] = AZ_dom_decomp; + (*options)[AZ_subdomain_solve] = AZ_ilu; + List.set("smoother: Aztec options",options); + List.set("smoother: Aztec params",params); + List.set("smoother: Aztec as solver",false); + #endif*/ + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsDD_3Levels() + +int ML2MueLuParameterTranslator::SetDefaultsMaxwell(ParameterList &inList, + Teuchos::RCP > & /* options */, + Teuchos::RCP > & /* params */, + bool OverWrite) { + ParameterList List; + + inList.setName("Maxwell default values"); + List.set("default values", "maxwell"); + List.set("max levels", 10); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "decreasing"); + + List.set("aggregation: type", "Uncoupled-MIS"); + List.set("aggregation: damping factor", 1.333); + List.set("eigen-analysis: type", "cg"); + List.set("eigen-analysis: iterations", 10); + // dropping threshold for small entries in edge prolongator + List.set("aggregation: edge prolongator drop threshold", 0.0); + + List.set("smoother: sweeps", 1); + List.set("smoother: damping factor", 1.0); + List.set("smoother: pre or post", "both"); + List.set("smoother: type", "Hiptmair"); + List.set("smoother: Hiptmair efficient symmetric", true); + List.set("subsmoother: type", "Chebyshev"); // Hiptmair subsmoother options + List.set("subsmoother: Chebyshev alpha", 20.0); + List.set("subsmoother: node sweeps", 4); + List.set("subsmoother: edge sweeps", 4); + + // direct solver on coarse problem + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsMaxwell() + +int ML2MueLuParameterTranslator::SetDefaultsNSSA(ParameterList &inList, + Teuchos::RCP > & /* options */, + Teuchos::RCP > & /* params */, + bool OverWrite) { + ParameterList List; + + inList.setName("NSSA default values"); + List.set("default values", "NSSA"); + List.set("max levels", 10); + List.set("prec type", "MGW"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "Uncoupled-MIS"); + List.set("energy minimization: enable", true); + List.set("eigen-analysis: type", "power-method"); + List.set("eigen-analysis: iterations", 20); + + List.set("smoother: sweeps", 4); + List.set("smoother: damping factor", .67); + List.set("smoother: pre or post", "post"); + List.set("smoother: type", "symmetric Gauss-Seidel"); + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 256); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsNSSA() + +int ML2MueLuParameterTranslator::SetDefaultsDD_LU(ParameterList &inList, + Teuchos::RCP > &options, + Teuchos::RCP > ¶ms, + bool OverWrite) { + ParameterList List; + + inList.setName("DD-LU default values"); + List.set("default values", "DD-LU"); + List.set("max levels", 2); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "METIS"); + List.set("aggregation: local aggregates", 1); + List.set("aggregation: damping factor", 1.333); + List.set("eigen-analysis: type", "power-method"); + List.set("eigen-analysis: iterations", 20); + + List.set("smoother: sweeps", 1); + List.set("smoother: pre or post", "both"); + + /*#ifdef HAVE_ML_AZTECOO + List.set("smoother: type","Aztec"); + (*options)[AZ_precond] = AZ_dom_decomp; + (*options)[AZ_subdomain_solve] = AZ_lu; + List.set("smoother: Aztec options",options); + List.set("smoother: Aztec params",params); + List.set("smoother: Aztec as solver",false); + #endif*/ + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsDD_LU() + +int ML2MueLuParameterTranslator::SetDefaultsDD_3Levels_LU(ParameterList &inList, + Teuchos::RCP > &options, + Teuchos::RCP > ¶ms, + bool OverWrite) { + ParameterList List; + + inList.setName("DD-ML-LU default values"); + List.set("default values", "DD-ML-LU"); + List.set("max levels", 3); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + + List.set("aggregation: type", "METIS"); + List.set("aggregation: nodes per aggregate", 512); + List.set("aggregation: next-level aggregates per process", 128); + List.set("aggregation: damping factor", 1.333); + + List.set("smoother: sweeps", 1); + List.set("smoother: pre or post", "both"); + /*#ifdef HAVE_ML_AZTECOO + List.set("smoother: type","Aztec"); + (*options)[AZ_precond] = AZ_dom_decomp; + (*options)[AZ_subdomain_solve] = AZ_lu; + List.set("smoother: Aztec options",options); + List.set("smoother: Aztec params",params); + List.set("smoother: Aztec as solver",false); + #endif*/ + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsDD_3Levels_LU() + +int ML2MueLuParameterTranslator::SetDefaultsClassicalAMG(ParameterList &inList, + Teuchos::RCP > & /* options */, + Teuchos::RCP > & /* params */, + bool OverWrite) { + ParameterList List; + + inList.setName("Classical-AMG default values"); + List.set("default values", "Classical-AMG"); + List.set("max levels", 10); + List.set("prec type", "MGV"); + List.set("increasing or decreasing", "increasing"); + List.set("smoother: sweeps", 2); + List.set("smoother: damping factor", 1.0); + List.set("smoother: pre or post", "both"); + List.set("smoother: type", "symmetric Gauss-Seidel"); + + List.set("coarse: type", "Amesos-KLU"); + List.set("coarse: max size", 128); + List.set("coarse: pre or post", "post"); + List.set("coarse: sweeps", 1); + + ML_OverwriteDefaults(inList, List, OverWrite); + return 0; +} // ML2MueLuParameterTranslator::SetDefaultsClassicalAMG() + +int ML2MueLuParameterTranslator::SetDefaultsRefMaxwell(Teuchos::ParameterList &inList, bool OverWrite) { + /* Sublists */ + Teuchos::ParameterList ListRF, List11, List11c, List22, dummy; + Teuchos::ParameterList &List11_ = inList.sublist("refmaxwell: 11list"); + Teuchos::ParameterList &List22_ = inList.sublist("refmaxwell: 22list"); + Teuchos::ParameterList &List11c_ = List11_.sublist("edge matrix free: coarse"); + + /* Build Teuchos List: (1,1) coarse */ + SetDefaults("SA", List11c); + List11c.set("cycle applications", 1); + List11c.set("smoother: type", "Chebyshev"); + List11c.set("aggregation: threshold", .01); + List11c.set("coarse: type", "Amesos-KLU"); + List11c.set("ML label", "coarse (1,1) block"); + UpdateList(List11c, List11c_, OverWrite); + + /* Build Teuchos List: (1,1) */ + SetDefaults("SA", List11); + List11.set("cycle applications", 1); + List11.set("aggregation: type", "Uncoupled"); + List11.set("smoother: sweeps", 0); + List11.set("aggregation: damping factor", 0.0); + List11.set("edge matrix free: coarse", List11c); + List11.set("aggregation: threshold", .01); + UpdateList(List11, List11_, OverWrite); + + /* Build Teuchos List: (2,2) */ + SetDefaults("SA", List22); + List22.set("cycle applications", 1); + List22.set("smoother: type", "Chebyshev"); + List22.set("aggregation: type", "Uncoupled"); + List22.set("aggregation: threshold", .01); + List22.set("coarse: type", "Amesos-KLU"); + List22.set("ML label", "(2,2) block"); + + // This line is commented out due to IFPACK issues + // List22.set("smoother: sweeps (level 0)",0); + UpdateList(List22, List22_, OverWrite); + + /* Build Teuchos List: Overall */ + SetDefaults("maxwell", ListRF, 0, 0, false); + ListRF.set("smoother: type", "Chebyshev"); + ListRF.set("smoother: sweeps", 2); + ListRF.set("refmaxwell: 11solver", "edge matrix free"); // either "edge matrix free" or "sa" + ListRF.set("refmaxwell: 11list", List11); + ListRF.set("refmaxwell: 22solver", "multilevel"); + ListRF.set("refmaxwell: 22list", List22); + ListRF.set("refmaxwell: mode", "additive"); + ListRF.set("default values", "RefMaxwell"); + ListRF.set("zero starting solution", false); + + UpdateList(ListRF, inList, OverWrite); + + return 0; +} /*end SetDefaultsRefMaxwell*/ + } // namespace MueLu diff --git a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp index 7183619abb46..417bb3191a78 100644 --- a/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp +++ b/packages/muelu/src/Interface/MueLu_ML2MueLuParameterTranslator.hpp @@ -11,10 +11,12 @@ #define MUELU_ML2MUELUPARAMETERTRANSLATOR_HPP #include +#include #include #include #include +#include #include #include @@ -61,6 +63,86 @@ class ML2MueLuParameterTranslator { return SetParameterList(*paramList, defaultVals); } + //! Sets ML's (not MueLu's) default parameters for aggregation-based preconditioners. + /*! This function is use to set ML's default parameters, as + defined in ml_MultiLevelPreconditioner.h. This has been ported to MueLu as a backwards + compatibility feature for ML users transitioning to MueLu. These routines are designed + to be used with or without compiling ML. + + NOTE: MueLu's SetDefaults does *NOT* support the AztecOO options supported by ML. + + \param ProblemType (In) : a std::string, whose possible values are: + - "SA" : classical smoothed aggregation preconditioners; + - "NSSA" : default values for Petrov-Galerkin preconditioner for nonsymmetric systems + - "maxwell" : default values for aggregation preconditioner for eddy current systems + - "DD" : defaults for 2-level domain decomposition preconditioners based + on aggregation; + - "DD-LU" : Like "DD", but use exact LU decompositions on each subdomain; + - "DD-ML" : 3-level domain decomposition preconditioners, with coarser + spaces defined by aggregation; + - "DD-ML-LU" : Like "DD-ML", but with LU decompositions on each subdomain. + \param List (Out) : list which will populated by the default parameters + \param options (In/Out) : integer array, of size \c AZ_OPTIONS_SIZE. + NOTE: MueLu will ignore this parameter. + \param params (In/Out) : double array, of size \c AZ_PARAMS_SIZE. + NOTE: MueLu will ignore this parameter. + \param OverWrite (In) : boolean. If false, any pre-existing values in the + parameter list will be preserved. Default value is true, i.e., any + pre-existing values may be overwritten. + */ + static int SetDefaults(std::string ProblemType, Teuchos::ParameterList& List, + int* options = 0, double* params = 0, const bool OverWrite = true); + + //! Sets default parameters for aggregation-based 2-level domain decomposition preconditioners. + static int SetDefaultsDD(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets default parameters for aggregation-based 2-level domain decomposition preconditioners, using LU on each subdomain + static int SetDefaultsDD_LU(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets default parameters for aggregation-based 3-level domain decomposition preconditioners. + static int SetDefaultsDD_3Levels(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets default parameters for aggregation-based 3-level domain decomposition preconditioners with LU. + static int SetDefaultsDD_3Levels_LU(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets default parameters for the eddy current equations equations. + static int SetDefaultsMaxwell(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets default parameters for classical smoothed aggregation. + static int SetDefaultsSA(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets defaults for energy minimization preconditioning for nonsymmetric problems. + static int SetDefaultsNSSA(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets defaults for classical amg + static int SetDefaultsClassicalAMG(Teuchos::ParameterList& List, + Teuchos::RCP >& options, + Teuchos::RCP >& params, + bool Overwrite = true); + + //! Sets defaults for RefMaxwell / Maxwell2 + static int SetDefaultsRefMaxwell(Teuchos::ParameterList& inList, bool OverWrite = true); //@} private: diff --git a/packages/muelu/test/unit_tests/CMakeLists.txt b/packages/muelu/test/unit_tests/CMakeLists.txt index ea356df4ea6a..7347b498987f 100644 --- a/packages/muelu/test/unit_tests/CMakeLists.txt +++ b/packages/muelu/test/unit_tests/CMakeLists.txt @@ -75,6 +75,7 @@ APPEND_SET(SOURCES MueLu_CoupledRBMFactory.cpp PermutationFactory.cpp RigidBodyModeFactory.cpp + ML2MueLuParameterTranslator.cpp ) APPEND_SET(SOURCES_INTREPID2 diff --git a/packages/muelu/test/unit_tests/ML2MueLuParameterTranslator.cpp b/packages/muelu/test/unit_tests/ML2MueLuParameterTranslator.cpp new file mode 100644 index 000000000000..fbcd0bfa0703 --- /dev/null +++ b/packages/muelu/test/unit_tests/ML2MueLuParameterTranslator.cpp @@ -0,0 +1,166 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#include +#include + +#include + +#include +#include + +#include + +#ifdef HAVE_MUELU_ML +#include +#include +#endif + +namespace MueLuTests { + +bool compareLists(Teuchos::ParameterList& L1, Teuchos::ParameterList& L2) { + return Teuchos::haveSameValuesSorted(L1, L2, true); +} + +TEUCHOS_UNIT_TEST(ML2MueLuParameterTranslator, SA) { + // SetDefaults(SA) + Teuchos::ParameterList List, goldList; + MueLu::ML2MueLuParameterTranslator::SetDefaults("SA", List); + +#ifdef HAVE_MUELU_ML + Teuchos::ParameterList mlList; + ML_Epetra::SetDefaults("SA", mlList); + TEST_EQUALITY(compareLists(List, mlList), true); +#endif + + // Gold list + goldList.set("default values", "SA"); + goldList.set("max levels", 10); + goldList.set("prec type", "MGV"); + goldList.set("increasing or decreasing", "increasing"); + goldList.set("aggregation: type", "Uncoupled-MIS"); + goldList.set("aggregation: damping factor", 1.333); + goldList.set("eigen-analysis: type", "cg"); + goldList.set("eigen-analysis: iterations", 10); + goldList.set("smoother: sweeps", 2); + goldList.set("smoother: damping factor", 1.0); + goldList.set("smoother: pre or post", "both"); + goldList.set("smoother: type", "symmetric Gauss-Seidel"); + goldList.set("coarse: type", "Amesos-KLU"); + goldList.set("coarse: max size", 128); + goldList.set("coarse: pre or post", "post"); + goldList.set("coarse: sweeps", 1); + goldList.set("coarse: split communicator", false); + + TEST_EQUALITY(compareLists(List, goldList), true); +} // SA + +TEUCHOS_UNIT_TEST(ML2MueLuParameterTranslator, RefMaxwell) { + // SetDefaults(SA) + Teuchos::ParameterList List, goldList; + MueLu::ML2MueLuParameterTranslator::SetDefaultsRefMaxwell(List); + +#ifdef HAVE_MUELU_ML + Teuchos::ParameterList mlList; + ML_Epetra::SetDefaultsRefMaxwell(mlList); + TEST_EQUALITY(compareLists(List, mlList), true); +#endif + + // Gold list + goldList.set("default values", "RefMaxwell"); + goldList.set("max levels", 10); + goldList.set("prec type", "MGV"); + goldList.set("increasing or decreasing", "decreasing"); + goldList.set("aggregation: type", "Uncoupled-MIS"); + goldList.set("aggregation: damping factor", 1.333); + goldList.set("eigen-analysis: type", "cg"); + goldList.set("eigen-analysis: iterations", 10); + goldList.set("aggregation: edge prolongator drop threshold", 0.0); + goldList.set("smoother: sweeps", 2); + goldList.set("smoother: damping factor", 1.0); + goldList.set("smoother: pre or post", "both"); + goldList.set("smoother: type", "Chebyshev"); + goldList.set("smoother: Hiptmair efficient symmetric", true); + goldList.set("subsmoother: type", "Chebyshev"); + goldList.set("subsmoother: Chebyshev alpha", 20.0); + goldList.set("subsmoother: node sweeps", 4); + goldList.set("subsmoother: edge sweeps", 4); + goldList.set("coarse: type", "Amesos-KLU"); + goldList.set("coarse: max size", 128); + goldList.set("coarse: pre or post", "post"); + goldList.set("coarse: sweeps", 1); + goldList.set("refmaxwell: 11solver", "edge matrix free"); + goldList.set("refmaxwell: 22solver", "multilevel"); + goldList.set("refmaxwell: mode", "additive"); + goldList.set("zero starting solution", false); + goldList.sublist("refmaxwell: 11list").set("default values", "SA"); + goldList.sublist("refmaxwell: 11list").set("max levels", 10); + goldList.sublist("refmaxwell: 11list").set("prec type", "MGV"); + goldList.sublist("refmaxwell: 11list").set("increasing or decreasing", "increasing"); + goldList.sublist("refmaxwell: 11list").set("aggregation: type", "Uncoupled"); + goldList.sublist("refmaxwell: 11list").set("aggregation: damping factor", 0.0); + goldList.sublist("refmaxwell: 11list").set("eigen-analysis: type", "cg"); + goldList.sublist("refmaxwell: 11list").set("eigen-analysis: iterations", 10); + goldList.sublist("refmaxwell: 11list").set("smoother: sweeps", 0); + goldList.sublist("refmaxwell: 11list").set("smoother: damping factor", 1.0); + goldList.sublist("refmaxwell: 11list").set("smoother: pre or post", "both"); + goldList.sublist("refmaxwell: 11list").set("smoother: type", "symmetric Gauss-Seidel"); + goldList.sublist("refmaxwell: 11list").set("coarse: type", "Amesos-KLU"); + goldList.sublist("refmaxwell: 11list").set("coarse: max size", 128); + goldList.sublist("refmaxwell: 11list").set("coarse: pre or post", "post"); + goldList.sublist("refmaxwell: 11list").set("coarse: sweeps", 1); + goldList.sublist("refmaxwell: 11list").set("coarse: split communicator", false); + goldList.sublist("refmaxwell: 11list").set("cycle applications", 1); + goldList.sublist("refmaxwell: 11list").set("aggregation: threshold", 0.01); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("default values", "SA"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("max levels", 10); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("prec type", "MGV"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("increasing or decreasing", "increasing"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("aggregation: type", "Uncoupled-MIS"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("aggregation: damping factor", 1.333); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("eigen-analysis: type", "cg"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("eigen-analysis: iterations", 10); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("smoother: sweeps", 2); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("smoother: damping factor", 1.0); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("smoother: pre or post", "both"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("smoother: type", "Chebyshev"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("coarse: type", "Amesos-KLU"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("coarse: max size", 128); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("coarse: pre or post", "post"); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("coarse: sweeps", 1); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("coarse: split communicator", false); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("cycle applications", 1); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("aggregation: threshold", 0.01); + goldList.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse").set("ML label", "coarse (1,1) block"); + + goldList.sublist("refmaxwell: 22list").set("default values", "SA"); + goldList.sublist("refmaxwell: 22list").set("max levels", 10); + goldList.sublist("refmaxwell: 22list").set("prec type", "MGV"); + goldList.sublist("refmaxwell: 22list").set("increasing or decreasing", "increasing"); + goldList.sublist("refmaxwell: 22list").set("aggregation: type", "Uncoupled"); + goldList.sublist("refmaxwell: 22list").set("aggregation: damping factor", 1.333); + goldList.sublist("refmaxwell: 22list").set("eigen-analysis: type", "cg"); + goldList.sublist("refmaxwell: 22list").set("eigen-analysis: iterations", 10); + goldList.sublist("refmaxwell: 22list").set("smoother: sweeps", 2); + goldList.sublist("refmaxwell: 22list").set("smoother: damping factor", 1.0); + goldList.sublist("refmaxwell: 22list").set("smoother: pre or post", "both"); + goldList.sublist("refmaxwell: 22list").set("smoother: type", "Chebyshev"); + goldList.sublist("refmaxwell: 22list").set("coarse: type", "Amesos-KLU"); + goldList.sublist("refmaxwell: 22list").set("coarse: max size", 128); + goldList.sublist("refmaxwell: 22list").set("coarse: pre or post", "post"); + goldList.sublist("refmaxwell: 22list").set("coarse: sweeps", 1); + goldList.sublist("refmaxwell: 22list").set("coarse: split communicator", false); + goldList.sublist("refmaxwell: 22list").set("cycle applications", 1); + goldList.sublist("refmaxwell: 22list").set("aggregation: threshold", 0.01); + goldList.sublist("refmaxwell: 22list").set("ML label", "(2,2) block"); + + TEST_EQUALITY(compareLists(List, goldList), true); +} + +} // namespace MueLuTests From 162707c015a0f6d3242e5d99806e1f5368864de3 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 3 Oct 2024 14:59:30 -0600 Subject: [PATCH 24/38] KokkosKernels: patch PR 2345 Fix batched serial SVD hanging for some inputs. This fixes the kernel and adds some new test cases. --- .../KokkosBatched_SVD_Serial_Internal.hpp | 54 +++-- .../unit_test/Test_Batched_SerialSVD.hpp | 214 ++++++++++++++---- 2 files changed, 195 insertions(+), 73 deletions(-) diff --git a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp index 0b85b1e28ed7..4e5c1e01dd73 100644 --- a/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp +++ b/packages/kokkos-kernels/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp @@ -51,11 +51,10 @@ struct SerialSVDInternal { template KOKKOS_INLINE_FUNCTION static void symEigen2x2(value_type a11, value_type a21, value_type a22, value_type& e1, value_type& e2) { - value_type a = Kokkos::ArithTraits::one(); - value_type b = -a11 - a22; - value_type c = a11 * a22 - a21 * a21; - using Kokkos::sqrt; - value_type sqrtDet = sqrt(b * b - 4 * a * c); + value_type a = Kokkos::ArithTraits::one(); + value_type b = -a11 - a22; + value_type c = a11 * a22 - a21 * a21; + value_type sqrtDet = Kokkos::sqrt(b * b - 4 * a * c); e1 = (-b + sqrtDet) / (2 * a); e2 = (-b - sqrtDet) / (2 * a); } @@ -78,7 +77,7 @@ struct SerialSVDInternal { value_type e1, e2, mu; symEigen2x2(dm * dm + fmm1 * fmm1, dm * fm, target, e1, e2); // the shift is the eigenvalue closer to the last diagonal entry of B^T*B - if (fabs(e1 - target) < fabs(e2 - target)) + if (Kokkos::abs(e1 - target) < Kokkos::abs(e2 - target)) mu = e1; else mu = e2; @@ -124,7 +123,7 @@ struct SerialSVDInternal { // Assumes i is not the last row. // U is m*m, B is n*n template - KOKKOS_INLINE_FUNCTION static void svdZeroRow(int i, value_type* B, int n, int Bs0, int Bs1, value_type* U, int m, + KOKKOS_INLINE_FUNCTION static void svdZeroRow(int i, value_type* B, int n, int Bs0, int Bs1, value_type* U, int Um, int Us0, int Us1) { Kokkos::pair G; for (int j = i + 1; j < n; j++) { @@ -138,17 +137,16 @@ struct SerialSVDInternal { &SVDIND(B, j, j + 1), Bs1); } if (U) { - KokkosBatched::SerialApplyRightGivensInternal::invoke(G, m, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, j), - Us0); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, Um, &SVDIND(U, 0, i), Us0, + &SVDIND(U, 0, j), Us0); } } } template - KOKKOS_INLINE_FUNCTION static void svdZeroLastColumn(value_type* B, int n, int Bs0, int Bs1, value_type* Vt, int Vts0, - int Vts1) { - // Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero up the - // last column. + KOKKOS_INLINE_FUNCTION static void svdZeroLastColumn(value_type* B, int n, int Bs0, int Bs1, int vn, value_type* Vt, + int Vts0, int Vts1) { + // Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero up the last column. Kokkos::pair G; for (int j = n - 2; j >= 0; j--) { KokkosBatched::SerialGivensInternal::invoke(SVDIND(B, j, j), SVDIND(B, j, n - 1), &G, @@ -159,7 +157,7 @@ struct SerialSVDInternal { &SVDIND(B, j - 1, j), Bs0); } if (Vt) { - KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, n, &SVDIND(Vt, n - 1, 0), Vts1, + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, vn, &SVDIND(Vt, n - 1, 0), Vts1, &SVDIND(Vt, j, 0), Vts1); } } @@ -224,8 +222,9 @@ struct SerialSVDInternal { while (true) { // Zero out tiny superdiagonal entries for (int i = 0; i < n - 1; i++) { - if (fabs(SVDIND(B, i, i + 1)) < eps * (fabs(SVDIND(B, i, i)) + fabs(SVDIND(B, i + 1, i + 1))) || - fabs(SVDIND(B, i, i + 1)) < tol) { + if (Kokkos::abs(SVDIND(B, i, i + 1)) < + eps * (Kokkos::abs(SVDIND(B, i, i)) + Kokkos::abs(SVDIND(B, i + 1, i + 1))) || + Kokkos::abs(SVDIND(B, i, i + 1)) < tol) { SVDIND(B, i, i + 1) = KAT::zero(); } } @@ -246,25 +245,32 @@ struct SerialSVDInternal { for (p = q - 1; p > 0; p--) { if (SVDIND(B, p - 1, p) == KAT::zero()) break; } + value_type* Bsub = &SVDIND(B, p, p); + value_type* Usub = &SVDIND(U, 0, p); + value_type* Vtsub = &SVDIND(Vt, p, 0); + int nsub = q - p; // If there are zero diagonals in this range, eliminate the entire row //(effectively decoupling into two subproblems) for (int i = q - 1; i >= p; i--) { if (SVDIND(B, i, i) == KAT::zero()) { - if (i == n - 1) { + if (i == q - 1) { // Last diagonal entry being 0 is a special case. // Zero out the superdiagonal above it. - // Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero - // up the last column. - svdZeroLastColumn(B, n, Bs0, Bs1, Vt, Vts0, Vts1); + // Deal with B(q-1, q-1) = 0, by chasing the superdiagonal nonzero + // B(q-2, q-1) up the last column. + // + // Once that nonzero reaches B(p, q-1), we are either at the top of B + // (if p == 0) or the superdiag above B(p, p) is zero. + // In either case, the chase stops after eliminating B(p, q-1) because no + // new entry is introduced by the Givens. + svdZeroLastColumn(Bsub, nsub, Bs0, Bs1, n, Vtsub, Vts0, Vts1); } else if (SVDIND(B, i, i + 1) != KAT::zero()) { - svdZeroRow(i, B, n, Bs0, Bs1, U, m, Us0, Us1); + svdZeroRow(i - p, Bsub, nsub, Bs0, Bs1, Usub, m, Us0, Us1); } } - continue; } - int nsub = q - p; // B22 is nsub * nsub, Usub is m * nsub, and Vtsub is nsub * n - svdStep(&SVDIND(B, p, p), &SVDIND(U, 0, p), &SVDIND(Vt, p, 0), m, n, nsub, Bs0, Bs1, Us0, Us1, Vts0, Vts1); + svdStep(Bsub, Usub, Vtsub, m, n, nsub, Bs0, Bs1, Us0, Us1, Vts0, Vts1); } for (int i = 0; i < n; i++) { sigma[i * ss] = SVDIND(B, i, i); diff --git a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp index 9bf9d43578e5..a30c9dcc899e 100644 --- a/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp +++ b/packages/kokkos-kernels/batched/dense/unit_test/Test_Batched_SerialSVD.hpp @@ -103,40 +103,6 @@ void verifySVD(const AView& A, const UView& U, const VtView& Vt, const SigmaView } } -template -Matrix createRandomMatrix(int m, int n, int deficiency, double maxval = 1.0) { - using Scalar = typename Matrix::non_const_value_type; - Matrix mat("A", m, n); - auto mhost = Kokkos::create_mirror_view(mat); - // Fill mat with random values first - if (maxval != 0.0) { - Kokkos::Random_XorShift64_Pool rand_pool(13718); - Scalar minrand, maxrand; - Test::getRandomBounds(maxval, minrand, maxrand); - Kokkos::fill_random(mhost, rand_pool, minrand, maxrand); - } - // Apply the rank deficiency. - // If m < n, make some rows a multiple of the first row. - // Otherwise, make some columns a multiple of the first column. - if (m < n) { - for (int i = 0; i < deficiency; i++) { - // make row i + 1 a multiple of row 0 - for (int j = 0; j < n; j++) { - mhost(i + 1, j) = (double)(i + 2) * mhost(0, j); - } - } - } else { - for (int i = 0; i < deficiency; i++) { - // make col i + 1 a multiple of col 0 - for (int j = 0; j < m; j++) { - mhost(j, i + 1) = (double)(i + 2) * mhost(j, 0); - } - } - } - Kokkos::deep_copy(mat, mhost); - return mat; -} - template struct SerialSVDFunctor_Full { SerialSVDFunctor_Full(const Matrix& A_, const Matrix& U_, const Matrix& Vt_, const Vector& sigma_, @@ -172,12 +138,39 @@ struct SerialSVDFunctor_SingularValuesOnly { Vector work; }; +template +Matrix randomMatrixWithRank(int m, int n, int rank) { + Matrix A("A", m, n); + if (rank == Kokkos::min(m, n)) { + // A is full-rank so as a shortcut, fill it with random values directly. + Kokkos::Random_XorShift64_Pool rand_pool(13318); + Kokkos::fill_random(A, rand_pool, -1.0, 1.0); + } else { + // A is rank-deficient, so compute it as a product of two random matrices + using MatrixHost = typename Matrix::HostMirror; + auto Ahost = Kokkos::create_mirror_view(A); + Kokkos::Random_XorShift64_Pool rand_pool(13318); + MatrixHost U("U", m, rank); + MatrixHost Vt("Vt", rank, n); + Kokkos::fill_random(U, rand_pool, -1.0, 1.0); + Kokkos::fill_random(Vt, rand_pool, -1.0, 1.0); + Test::vanillaGEMM(1.0, U, Vt, 0.0, Ahost); + Kokkos::deep_copy(A, Ahost); + } + return A; +} + +template +Matrix randomMatrixWithRank(int m, int n) { + return randomMatrixWithRank(m, n, Kokkos::min(m, n)); +} + template -void testSerialSVD(int m, int n, int deficiency, double maxval = 1.0) { +void testSerialSVD(int m, int n, int rank) { using Matrix = Kokkos::View; using Vector = Kokkos::View; using ExecSpace = typename Device::execution_space; - Matrix A = createRandomMatrix(m, n, deficiency, maxval); + Matrix A = randomMatrixWithRank(m, n, rank); // Fill U, Vt, sigma with nonzeros as well to make sure they are properly // overwritten Matrix U("U", m, m); @@ -185,6 +178,8 @@ void testSerialSVD(int m, int n, int deficiency, double maxval = 1.0) { int maxrank = std::min(m, n); Vector sigma("sigma", maxrank); Vector work("work", std::max(m, n)); + // Fill these views with an arbitrary value, to make sure SVD + // doesn't rely on them being zero initialized. Kokkos::deep_copy(U, -5.0); Kokkos::deep_copy(Vt, -5.0); Kokkos::deep_copy(sigma, -5.0); @@ -204,12 +199,17 @@ void testSerialSVD(int m, int n, int deficiency, double maxval = 1.0) { verifySVD(Acopy, Uhost, Vthost, sigmaHost); } +template +void testSerialSVD(int m, int n) { + testSerialSVD(m, n, Kokkos::min(m, n)); +} + template void testSerialSVDSingularValuesOnly(int m, int n) { using Matrix = Kokkos::View; using Vector = Kokkos::View; using ExecSpace = typename Device::execution_space; - Matrix A = createRandomMatrix(m, n, 0); + Matrix A = randomMatrixWithRank(m, n); // Fill U, Vt, sigma with nonzeros as well to make sure they are properly // overwritten Matrix U("U", m, m); @@ -248,7 +248,7 @@ void testSerialSVDZeroLastRow(int n) { // Generate a bidiagonal matrix using Matrix = Kokkos::View; using KAT = Kokkos::ArithTraits; - Matrix B = createRandomMatrix(n, n, 0, 1.0); + Matrix B = randomMatrixWithRank(n, n); // Zero out entries to make B bidiagonal for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { @@ -265,7 +265,7 @@ void testSerialSVDZeroLastRow(int n) { Matrix BVt("UBVt", n, n); Test::vanillaGEMM(1.0, B, Vt, 0.0, BVt); // Run the routine (just on host) - KokkosBatched::SerialSVDInternal::svdZeroLastColumn(B.data(), n, B.stride(0), B.stride(1), Vt.data(), + KokkosBatched::SerialSVDInternal::svdZeroLastColumn(B.data(), n, B.stride(0), B.stride(1), n, Vt.data(), Vt.stride(0), Vt.stride(1)); // Check that B is still bidiagonal (to a tight tolerance, but not exactly // zero) @@ -298,7 +298,7 @@ void testSerialSVDZeroDiagonal(int n, int row) { using KAT = Kokkos::ArithTraits; int m = n + 2; // Make U somewhat bigger to make sure the Givens transforms // are applied correctly - Matrix B = createRandomMatrix(m, n, 0, 1.0); + Matrix B = randomMatrixWithRank(m, n); // Zero out entries to make B bidiagonal for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { @@ -342,19 +342,18 @@ void testSerialSVDZeroDiagonal(int n, int row) { template void testSVD() { - testSerialSVD(0, 0, 0); - testSerialSVD(1, 0, 0); - testSerialSVD(0, 1, 0); - testSerialSVD(2, 2, 0); + testSerialSVD(0, 0); + testSerialSVD(1, 0); + testSerialSVD(0, 1); + testSerialSVD(2, 2); testSerialSVD(2, 2, 1); - testSerialSVD(10, 8, 0); - testSerialSVD(8, 10, 0); - testSerialSVD(10, 1, 0); + testSerialSVD(10, 8); + testSerialSVD(8, 10); + testSerialSVD(10, 1); testSerialSVD(1, 10, 0); testSerialSVD(10, 8, 3); testSerialSVD(8, 10, 4); - // Test with all-zero matrix - testSerialSVD(8, 10, 0, 0.0); + testSerialSVD(8, 10, 7); // Test some important internal routines which are not called often testSerialSVDZeroLastRow(10); testSerialSVDZeroDiagonal(10, 3); @@ -425,6 +424,119 @@ void testIssue1786() { } } +// Generate specific test cases +template +Kokkos::View getTestCase(int testCase) { + using MatrixHost = Kokkos::View; + MatrixHost Ahost; + int m, n; + switch (testCase) { + case 0: + // Issue #2344 case 1 + m = 3; + n = 3; + Ahost = MatrixHost("A0", m, n); + Ahost(1, 0) = 3.58442287931538747e-02; + Ahost(1, 1) = 3.81743062695684907e-02; + Ahost(2, 2) = -5.55555555555555733e-02; + break; + case 1: + // Test a matrix that is strictly lower triangular (so the diagonal + // is zero) + m = 8; + n = 8; + Ahost = MatrixHost("A1", m, n); + for (int i = 0; i < m; i++) { + for (int j = 0; j < i; j++) { + Ahost(i, j) = 1; + } + } + break; + case 2: + // Test a matrix that's already diagonal, except for one superdiagonal in the middle + m = 10; + n = 5; + Ahost = MatrixHost("A2", m, n); + for (int i = 0; i < n; i++) Ahost(i, i) = 1.0; + Ahost(2, 3) = 2.2; + break; + case 3: + // Test a matrix that is already bidiagonal, and has a zero diagonal in the middle + m = 10; + n = 7; + Ahost = MatrixHost("A3", m, n); + for (int i = 0; i < n; i++) Ahost(i, i) = 1.0; + for (int i = 0; i < n - 1; i++) Ahost(i, i + 1) = 0.7; + Ahost(4, 4) = 0; + break; + case 4: { + // Issue #2344 case 2 + m = 3; + n = 4; + Ahost = MatrixHost("A4", m, n); + Ahost(0, 0) = -2.0305040121856084e-02; + Ahost(1, 0) = 0.0000000000000000e+00; + Ahost(2, 0) = 0.0000000000000000e+00; + Ahost(0, 1) = -0.0000000000000000e+00; + Ahost(1, 1) = -0.0000000000000000e+00; + Ahost(2, 1) = 1.9506119814028472e-02; + Ahost(0, 2) = -2.0305040121856091e-02; + Ahost(1, 2) = 0.0000000000000000e+00; + Ahost(2, 2) = 0.0000000000000000e+00; + Ahost(0, 3) = -0.0000000000000000e+00; + Ahost(1, 3) = -0.0000000000000000e+00; + Ahost(2, 3) = 1.9506119814028472e-02; + break; + } + case 5: { + // Test with all-zero matrix + m = 17; + n = 19; + Ahost = MatrixHost("A5", m, n); + break; + } + default: throw std::runtime_error("Test case out of bounds."); + } + Kokkos::View A(Ahost.label(), m, n); + Kokkos::deep_copy(A, Ahost); + return A; +} + +template +void testSpecialCases() { + using Matrix = Kokkos::View; + using Vector = Kokkos::View; + using ExecSpace = typename Device::execution_space; + for (int i = 0; i < 6; i++) { + Matrix A = getTestCase(i); + int m = A.extent(0); + int n = A.extent(1); + Matrix U("U", m, m); + Matrix Vt("Vt", n, n); + int maxrank = std::min(m, n); + Vector sigma("sigma", maxrank); + Vector work("work", std::max(m, n)); + Kokkos::deep_copy(U, -5.0); + Kokkos::deep_copy(Vt, -5.0); + Kokkos::deep_copy(sigma, -5.0); + Kokkos::deep_copy(work, -5.0); + // Make a copy of A (before SVD) for verification, since the original will be + // overwritten + typename Matrix::HostMirror Acopy("Acopy", m, n); + Kokkos::deep_copy(Acopy, A); + // Run the SVD + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_Full(A, U, Vt, sigma, work)); + // Get the results back + auto Uhost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), U); + auto Vthost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Vt); + auto sigmaHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma); + + // Verify the SVD is correct + verifySVD(Acopy, Uhost, Vthost, sigmaHost); + } +} + #if defined(KOKKOSKERNELS_INST_DOUBLE) TEST_F(TestCategory, batched_scalar_serial_svd_double) { // Test general SVD on a few different input sizes (full rank randomized) @@ -432,6 +544,8 @@ TEST_F(TestCategory, batched_scalar_serial_svd_double) { testSVD(); testIssue1786(); testIssue1786(); + testSpecialCases(); + testSpecialCases(); } #endif @@ -442,5 +556,7 @@ TEST_F(TestCategory, batched_scalar_serial_svd_float) { testSVD(); testIssue1786(); testIssue1786(); + testSpecialCases(); + testSpecialCases(); } #endif From 7031709385fb375778f068e5073b4d569e419231 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 3 Oct 2024 11:10:55 -0600 Subject: [PATCH 25/38] match kokkos-kernels PR 2353 Signed-off-by: Carl Pearson --- .../impl/KokkosSparse_sort_crs_impl.hpp | 29 ++++++++++++++++--- .../sparse/src/KokkosSparse_SortCrs.hpp | 4 +-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp index 5e18c3fd5ca2..edde2e93a571 100644 --- a/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp @@ -329,11 +329,32 @@ Kokkos::View computeEntryPerm } // Heuristic for choosing bulk sorting algorithm -template +template bool useBulkSortHeuristic(Ordinal avgDeg, Ordinal maxDeg) { - // Use bulk sort if matrix is highly imbalanced, - // OR the longest rows have many entries. - return (maxDeg / 10 > avgDeg) || (maxDeg > 1024); + // Issue 2352: the KokkosSparse::sort_crs_matrix uses Kokkos::Experimental::sort_by_key when this returns true. + // sort_by_key executes on the host when a thrust-like library is not available, which really kills the performance in + // a scenario where the bulk sort algorithm would otherwise be appropriate. Additionally, On MI300A, sorting via + // ROCTHRUST was observed to be ~3x slower than the Kokkos kernels native implementation on some matrices of interest, + // so on that architecture only always bypass bulk sort. + // * GPU execution space, SYLC is enabled, but no ONEDPL does not have sort_by_key + // * GPU execution space, HIP is enabled, but no ROCTHRUST + // * GPU execution space, HIP is enabled, and GPU is GFX942 + // (Kokkos seems to require thrust when CUDA is enabled) + if constexpr (KokkosKernels::Impl::kk_is_gpu_exec_space()) { +#if (defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ONEDPL_HAS_SORT_BY_KEY)) || \ + (defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_ROCTHRUST)) || \ + (defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_ARCH_AMD_GFX942)) + return false; +#else + // Use bulk sort if matrix is highly imbalanced, + // OR the longest rows have many entries. + return (maxDeg / 10 > avgDeg) || (maxDeg > 1024); +#endif + } else { + // Use bulk sort if matrix is highly imbalanced, + // OR the longest rows have many entries. + return (maxDeg / 10 > avgDeg) || (maxDeg > 1024); + } } #endif diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp index 1203cd244b5b..838144dd5937 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp @@ -77,7 +77,7 @@ void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const #ifndef KK_DISABLE_BULK_SORT_BY_KEY Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); bool useBulkSort = false; - if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { // Calculate the true number of columns if user didn't pass it in if (numCols == Kokkos::ArithTraits::max()) { KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); @@ -255,7 +255,7 @@ void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const e #ifndef KK_DISABLE_BULK_SORT_BY_KEY Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); bool useBulkSort = false; - if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { // Calculate the true number of columns if user didn't pass it in if (numCols == Kokkos::ArithTraits::max()) { KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); From f6f55b768e1d82e516185befbbecd5f923086ae3 Mon Sep 17 00:00:00 2001 From: Heather Pacella Date: Mon, 7 Oct 2024 12:22:40 -0600 Subject: [PATCH 26/38] STK: Snapshot 10-07-24 12:22 from Sierra 5.21.5-699-g38edc8e6 --- packages/stk/CHANGELOG.md | 3 + .../stk_coupling/stk_coupling/Constants.hpp | 4 + packages/stk/stk_io/stk_io/OutputFile.hpp | 1 - .../stk/stk_io/stk_io/StkMeshIoBroker.hpp | 6 - .../stk_mesh/baseImpl/NgpFieldBLASImpl.hpp | 23 +- .../baseImpl/elementGraph/ElemElemGraph.cpp | 241 ------------------ .../baseImpl/elementGraph/ElemElemGraph.hpp | 8 - .../elementGraph/ProcessKilledElements.cpp | 240 +++++++++++++++++ .../elementGraph/ProcessKilledElements.hpp | 55 ++++ .../MortonLBVH_TreeManipulationUtils.hpp | 11 +- .../stk/stk_tools/stk_tools/CMakeLists.txt | 1 + .../ElemGraphMultipleSharedSidesUtils.hpp | 1 + .../element_graph/UnitTestElemElemGraph.cpp | 1 + .../UnitTestElemElemGraphChangeOwner.cpp | 1 + .../UnitTestElemElemGraphDeathIntegration.cpp | 2 +- .../element_graph/UnitTestElementDeath.cpp | 1 + .../UnitTestSkinMeshElementDeath.cpp | 1 + .../stk_search/UnitTestCoarseSearch.cpp | 4 + packages/stk/stk_util/stk_util/Version.hpp | 2 +- .../stk/stk_util/stk_util/diag/Option.hpp | 16 +- .../stk_util/registry/ProductRegistry.cpp | 2 +- 21 files changed, 326 insertions(+), 298 deletions(-) create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.cpp create mode 100644 packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.hpp diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 0715081353f9..5b1b4b01f5a4 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,8 @@ # CHANGELOG +5.21.5-2 (STK_VERSION 5210502) 10/07/2024 + stk_search: Fixed HIP sort error. + stk_mesh: add multi-field NGP-FieldBLAS field_fill 5.21.5-1 (STK_VERSION 5210501) 9/27/2024 stk_mesh: deprecate BulkData::relation_exist diff --git a/packages/stk/stk_coupling/stk_coupling/Constants.hpp b/packages/stk/stk_coupling/stk_coupling/Constants.hpp index e7eb64fea35c..13a73deb5fc5 100644 --- a/packages/stk/stk_coupling/stk_coupling/Constants.hpp +++ b/packages/stk/stk_coupling/stk_coupling/Constants.hpp @@ -33,6 +33,10 @@ static const std::string TimeStep = "Time Step"; static const std::string FinalTime = "Final Time"; static const std::string IsFinished = "Is Finished"; static const std::string SuccessFlag = "Is Successful"; +static const std::string GlobalVars = "Global Vars"; +static const std::string CouplingVersion = "CouplingVersion"; +static const std::string ConvergenceStatus = "iteration_convergence_status"; +static const std::string StepContinuationStatus = "solve_step_continuation_status"; //ENDCouplingReservedNames } diff --git a/packages/stk/stk_io/stk_io/OutputFile.hpp b/packages/stk/stk_io/stk_io/OutputFile.hpp index 6530f4612ffd..08baf9eec551 100644 --- a/packages/stk/stk_io/stk_io/OutputFile.hpp +++ b/packages/stk/stk_io/stk_io/OutputFile.hpp @@ -213,7 +213,6 @@ class OutputFile void has_adaptivity(bool hasAdaptivity); bool is_skin_mesh() const; - void is_skin_mesh(bool skinMesh); void set_enable_edge_io(bool enableEdgeIO); diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp index 918cbe1e796a..2b1201ded8a5 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp @@ -159,7 +159,6 @@ namespace stk { void set_ghosting_filter(size_t output_file_index, bool hasGhosting); void set_adaptivity_filter(size_t output_file_index, bool hasAdaptivity); - void set_skin_mesh_flag(size_t output_file_index, bool skinMesh); void set_filter_empty_output_entity_blocks(size_t output_file_index, const bool filterEmptyEntityBlocks); void set_filter_empty_output_assembly_entity_blocks(size_t output_file_index, const bool filterEmptyAssemblyEntityBlocks); @@ -853,11 +852,6 @@ namespace stk { m_outputFiles[output_file_index]->has_adaptivity(hasAdaptivity); } - inline void StkMeshIoBroker::set_skin_mesh_flag(size_t output_file_index, bool skinMesh) { - validate_output_file_index(output_file_index); - m_outputFiles[output_file_index]->is_skin_mesh(skinMesh); - } - inline void StkMeshIoBroker::set_filter_empty_output_entity_blocks(size_t output_file_index, const bool filterEmptyEntityBlocks) { validate_output_file_index(output_file_index); m_outputFiles[output_file_index]->set_filter_empty_entity_blocks(filterEmptyEntityBlocks); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp index 1e33e8840f47..5983b997afaa 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpFieldBLASImpl.hpp @@ -288,21 +288,8 @@ void field_fill_impl(const Scalar alpha, } } else { - stk::mesh::HostMesh hostMesh(fields[0]->get_mesh()); - if (nfields == 1) - { - stk::mesh::HostField ngpField(fields[0]->get_mesh(), *fields[0]); - field_fill_for_each_entity(hostMesh, &ngpField, nfields, alpha, component, fieldSelector, execSpace); - } else - { - std::vector> ngpFields; - for (int i=0; i < nfields; ++i) - { - ngpFields.emplace_back(fields[i]->get_mesh(), *fields[i]); - - } - field_fill_for_each_entity(hostMesh, ngpFields.data(), nfields, alpha, component, fieldSelector, execSpace); - } + std::vector fieldsVec(fields, fields+nfields); + stk::mesh::field_fill(alpha, fieldsVec, fieldSelector); } for (int i=0; i < nfields; ++i) @@ -348,11 +335,7 @@ void field_copy_no_mark_t(const stk::mesh::FieldBase& xField, } else { xField.sync_to_host(); - stk::mesh::HostField hostX(xField.get_mesh(), xField); - stk::mesh::HostField hostY(yField.get_mesh(), yField); - stk::mesh::HostMesh hostMesh(xField.get_mesh()); - FieldCopy> fieldCopy(hostX, hostY); - stk::mesh::for_each_entity_run(hostMesh, xField.entity_rank(), selector, fieldCopy); + stk::mesh::field_copy(xField, yField, selector); } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.cpp index 9d75a0a23a96..bd0288d318d6 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.cpp @@ -680,247 +680,6 @@ stk::topology ElemElemGraph::get_topology_of_connected_element(const GraphEdge & return m_element_topologies[graphEdge.elem2()]; } -void report_error_with_invalid_ordinal(std::pair ord_and_perm, const stk::mesh::BulkData& bulkData, const stk::mesh::EntityVector& side_nodes_vec, - stk::mesh::Entity element_with_perm_0, stk::mesh::Entity element_with_perm_4) -{ - if(ord_and_perm.first == stk::mesh::INVALID_CONNECTIVITY_ORDINAL) - { - std::ostringstream os; - os << "Proc: " << bulkData.parallel_rank() << std::endl; - os << "this element: " << bulkData.identifier(element_with_perm_0) << std::endl; - os << "other element: " << bulkData.identifier(element_with_perm_4) << std::endl; - os << "Nodes: "; - - for(stk::mesh::Entity side_node : side_nodes_vec) - { - os << bulkData.identifier(side_node) << " "; - } - - os << std::endl; - std::cerr << os.str(); - } - - STK_ThrowRequireMsg(ord_and_perm.first != stk::mesh::INVALID_CONNECTIVITY_ORDINAL, "yikes!"); - STK_ThrowRequireMsg(ord_and_perm.second != stk::mesh::INVALID_PERMUTATION, "yikes!"); -} - -void ensure_fresh_modifiable_state(stk::mesh::BulkData& bulkData) -{ - if(bulkData.in_modifiable_state()) { - bulkData.modification_end(); - } - bulkData.modification_begin(); -} - -class RemoteDeathBoundary -{ -public: - RemoteDeathBoundary(stk::mesh::BulkData& bulkData, ElemElemGraph& elementGraph, - const stk::mesh::EntityVector& killedElements, const stk::mesh::PartVector& parts_for_creating_side, stk::mesh::Part& active, const stk::mesh::PartVector* boundary_mesh_parts) : - m_bulkData(bulkData), m_elementGraph(elementGraph), m_killedElements(killedElements), m_parts_for_creating_side(parts_for_creating_side), m_active(active), - m_boundary_mesh_parts(boundary_mesh_parts), m_topology_modified(false) - {} - ~RemoteDeathBoundary(){} - - void update_death_boundary_for_remotely_killed_elements(std::vector &shared_modified, - stk::mesh::EntityVector& deletedEntities, - stk::mesh::impl::ParallelSelectedInfo &remoteActiveSelector) - { - std::vector remote_edges = get_remote_edges(); - - for(impl::GraphEdgeProc& re : remote_edges) - { - stk::mesh::EntityId local_id = re.get_local_element_global_id(); - int local_side = re.get_local_element_side_index(); - stk::mesh::EntityId remote_id = re.get_remote_element_global_id(); - int remote_side = re.get_remote_element_side_index(); - - stk::mesh::Entity element = m_bulkData.get_entity(stk::topology::ELEM_RANK, local_id); - - impl::ParallelInfo ¶llel_edge_info = m_elementGraph.get_parallel_edge_info(element, local_side, remote_id, remote_side); - remoteActiveSelector[-remote_id] = false; - - m_topology_modified = true; - - bool create_side = m_bulkData.bucket(element).member(m_active); - if(create_side==true) - { - impl::add_side_into_exposed_boundary(m_bulkData, - parallel_edge_info, - element, - local_side, - remote_id, - m_parts_for_creating_side, - shared_modified, - remoteActiveSelector, - m_boundary_mesh_parts); - } - else - { - impl::remove_side_from_death_boundary(m_bulkData, element, m_active, deletedEntities, local_side); - } - } - } - - void set_topology_is_modified() - { - m_topology_modified = true; - } - - bool get_topology_modification_status() const - { - return m_topology_modified; - } - -private: - - std::vector get_remote_edges() const - { - std::vector elements_to_comm = get_elements_to_communicate(); - return impl::communicate_killed_entities(m_bulkData.parallel(), elements_to_comm); - } - - std::vector get_elements_to_communicate() const - { - std::vector elements_to_comm; - - for(stk::mesh::Entity this_element :m_killedElements) - { - for(size_t j=0;j shared_modified; - stk::mesh::EntityVector deletedEntities; - - bulkData.initialize_face_adjacent_element_graph(); - ElemElemGraph& elementGraph = bulkData.get_face_adjacent_element_graph(); - - RemoteDeathBoundary remote_death_boundary(bulkData, elementGraph, killedElements, parts_for_creating_side, active, boundary_mesh_parts); - remote_death_boundary.update_death_boundary_for_remotely_killed_elements(shared_modified, deletedEntities, remoteActiveSelector); - - std::vector element_side_pairs; - element_side_pairs.reserve(impl::get_element_side_multiplier() * killedElements.size()); - - for(size_t k = 0; k < killedElements.size(); ++k) - { - stk::mesh::Entity this_element = killedElements[k]; - - for(size_t j = 0; j < elementGraph.get_num_connected_elems(this_element); ++j) - { - if(impl::does_element_have_side(bulkData, this_element)) - { - remote_death_boundary.set_topology_is_modified(); - if(elementGraph.is_connected_elem_locally_owned(this_element, j)) - { - impl::ElementViaSidePair other_element_via_side = elementGraph.get_connected_element_and_via_side(this_element, j); - stk::mesh::Entity other_element = other_element_via_side.element; - if(impl::does_element_have_side(bulkData, other_element_via_side.element)) - { - int side_id = other_element_via_side.side; - STK_ThrowRequireWithSierraHelpMsg(side_id != -1); - - bool is_other_element_alive = bulkData.bucket(other_element).member(active); - if(is_other_element_alive) - { - stk::mesh::Entity side = stk::mesh::get_side_entity_for_elem_side_pair(bulkData, this_element, side_id); - - if(bulkData.is_valid(side)) - { - if(bulkData.bucket(side).owned()) - { - stk::mesh::ConstPartVector parts = impl::get_stk_parts_for_moving_parts_into_death_boundary(boundary_mesh_parts); - bulkData.change_entity_parts(side, parts); - } - } - else - { - stk::mesh::PartVector parts = impl::get_parts_for_creating_side(bulkData, parts_for_creating_side, other_element, side_id); - - // switch elements - stk::mesh::Entity element_with_perm_0 = other_element; - stk::mesh::Entity element_with_perm_4 = this_element; - - int side_id_needed = elementGraph.get_connected_elements_side(this_element, j); - - STK_ThrowRequireMsg(side_id_needed >= 0, "ERROR: proc " << bulkData.parallel_rank() << " found side_id_needed=" << side_id_needed - << " between elem " << bulkData.identifier(element_with_perm_0)<< " and " << bulkData.identifier(element_with_perm_4) - << " in elem-elem-graph"); - - side = bulkData.declare_element_side(element_with_perm_0, side_id_needed, parts); - } - } - else - { - impl::remove_side_from_death_boundary(bulkData, this_element, active, deletedEntities, side_id); - } - } - } - else - { - impl::IdViaSidePair remote_id_side_pair = elementGraph.get_connected_remote_id_and_via_side(this_element, j); - stk::mesh::EntityId remote_id = remote_id_side_pair.id; - int remote_side = elementGraph.get_connected_elements_side(this_element, j); - impl::ParallelInfo ¶llel_edge_info = elementGraph.get_parallel_edge_info(this_element, remote_id_side_pair.side, remote_id, remote_side); - bool other_element_active = remoteActiveSelector[-remote_id]; - bool create_side = other_element_active; - - if(create_side) - { - impl::add_side_into_exposed_boundary(bulkData, parallel_edge_info, this_element, remote_id_side_pair.side, remote_id, parts_for_creating_side, - shared_modified, remoteActiveSelector, boundary_mesh_parts); - } - else - { - int side_id = remote_id_side_pair.side; - STK_ThrowRequireWithSierraHelpMsg(side_id != -1); - impl::remove_side_from_death_boundary(bulkData, this_element, active, deletedEntities, side_id); - } - } - } - } - } - stk::mesh::impl::delete_entities_and_upward_relations(bulkData, deletedEntities); - bulkData.make_mesh_parallel_consistent_after_element_death(shared_modified, deletedEntities, elementGraph, killedElements, active, modEndOpt); - bulkData.m_bucket_repository.set_remove_mode_fill_and_sort(); - return remote_death_boundary.get_topology_modification_status(); -} - stk::mesh::SideIdChooser ElemElemGraph::get_side_id_chooser() { return stk::mesh::SideIdChooser(m_bulk_data, m_idMapper, m_graph, m_coincidentGraph); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.hpp index 15ff94150702..1f781168b37a 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ElemElemGraph.hpp @@ -350,14 +350,6 @@ class ElemElemGraph std::string print_edge(const GraphEdge& graphEdge); }; -bool process_killed_elements(stk::mesh::BulkData& bulkData, - const stk::mesh::EntityVector& killedElements, - stk::mesh::Part& active, - stk::mesh::impl::ParallelSelectedInfo &remoteActiveSelector, - const stk::mesh::PartVector& side_parts, - const stk::mesh::PartVector* boundary_mesh_parts = nullptr, - stk::mesh::ModEndOptimizationFlag modEndOpt = stk::mesh::ModEndOptimizationFlag::MOD_END_SORT); - namespace impl { diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.cpp new file mode 100644 index 000000000000..97909707db7f --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.cpp @@ -0,0 +1,240 @@ +#include "ProcessKilledElements.hpp" +#include "ElemElemGraph.hpp" +#include "ElemElemGraphImpl.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace stk { namespace mesh { + +void ensure_fresh_modifiable_state(stk::mesh::BulkData& bulkData) +{ + if(bulkData.in_modifiable_state()) { + bulkData.modification_end(); + } + bulkData.modification_begin(); +} + +class RemoteDeathBoundary +{ +public: + RemoteDeathBoundary(stk::mesh::BulkData& bulkData, ElemElemGraph& elementGraph, + const stk::mesh::EntityVector& killedElements, const stk::mesh::PartVector& parts_for_creating_side, stk::mesh::Part& active, const stk::mesh::PartVector* boundary_mesh_parts) : + m_bulkData(bulkData), m_elementGraph(elementGraph), m_killedElements(killedElements), m_parts_for_creating_side(parts_for_creating_side), m_active(active), + m_boundary_mesh_parts(boundary_mesh_parts), m_topology_modified(false) + {} + ~RemoteDeathBoundary(){} + + void update_death_boundary_for_remotely_killed_elements(std::vector &shared_modified, + stk::mesh::EntityVector& deletedEntities, + stk::mesh::impl::ParallelSelectedInfo &remoteActiveSelector) + { + std::vector remote_edges = get_remote_edges(); + + for(impl::GraphEdgeProc& re : remote_edges) + { + stk::mesh::EntityId local_id = re.get_local_element_global_id(); + int local_side = re.get_local_element_side_index(); + stk::mesh::EntityId remote_id = re.get_remote_element_global_id(); + int remote_side = re.get_remote_element_side_index(); + + stk::mesh::Entity element = m_bulkData.get_entity(stk::topology::ELEM_RANK, local_id); + + impl::ParallelInfo ¶llel_edge_info = m_elementGraph.get_parallel_edge_info(element, local_side, remote_id, remote_side); + remoteActiveSelector[-remote_id] = false; + + m_topology_modified = true; + + bool create_side = m_bulkData.bucket(element).member(m_active); + if(create_side==true) + { + impl::add_side_into_exposed_boundary(m_bulkData, + parallel_edge_info, + element, + local_side, + remote_id, + m_parts_for_creating_side, + shared_modified, + remoteActiveSelector, + m_boundary_mesh_parts); + } + else + { + impl::remove_side_from_death_boundary(m_bulkData, element, m_active, deletedEntities, local_side); + } + } + } + + void set_topology_is_modified() + { + m_topology_modified = true; + } + + bool get_topology_modification_status() const + { + return m_topology_modified; + } + +private: + + std::vector get_remote_edges() const + { + std::vector elements_to_comm = get_elements_to_communicate(); + return impl::communicate_killed_entities(m_bulkData.parallel(), elements_to_comm); + } + + std::vector get_elements_to_communicate() const + { + std::vector elements_to_comm; + + for(stk::mesh::Entity this_element :m_killedElements) + { + for(size_t j=0;j shared_modified; + stk::mesh::EntityVector deletedEntities; + + bulkData.initialize_face_adjacent_element_graph(); + ElemElemGraph& elementGraph = bulkData.get_face_adjacent_element_graph(); + + RemoteDeathBoundary remote_death_boundary(bulkData, elementGraph, killedElements, parts_for_creating_side, active, boundary_mesh_parts); + remote_death_boundary.update_death_boundary_for_remotely_killed_elements(shared_modified, deletedEntities, remoteActiveSelector); + + std::vector element_side_pairs; + element_side_pairs.reserve(impl::get_element_side_multiplier() * killedElements.size()); + + for(size_t k = 0; k < killedElements.size(); ++k) + { + stk::mesh::Entity this_element = killedElements[k]; + + for(size_t j = 0; j < elementGraph.get_num_connected_elems(this_element); ++j) + { + if(impl::does_element_have_side(bulkData, this_element)) + { + remote_death_boundary.set_topology_is_modified(); + if(elementGraph.is_connected_elem_locally_owned(this_element, j)) + { + impl::ElementViaSidePair other_element_via_side = elementGraph.get_connected_element_and_via_side(this_element, j); + stk::mesh::Entity other_element = other_element_via_side.element; + if(impl::does_element_have_side(bulkData, other_element_via_side.element)) + { + int side_id = other_element_via_side.side; + STK_ThrowRequireWithSierraHelpMsg(side_id != -1); + + bool is_other_element_alive = bulkData.bucket(other_element).member(active); + if(is_other_element_alive) + { + stk::mesh::Entity side = stk::mesh::get_side_entity_for_elem_side_pair(bulkData, this_element, side_id); + + if(bulkData.is_valid(side)) + { + if(bulkData.bucket(side).owned()) + { + stk::mesh::ConstPartVector parts = impl::get_stk_parts_for_moving_parts_into_death_boundary(boundary_mesh_parts); + bulkData.change_entity_parts(side, parts); + } + } + else + { + stk::mesh::PartVector parts = impl::get_parts_for_creating_side(bulkData, parts_for_creating_side, other_element, side_id); + + // switch elements + stk::mesh::Entity element_with_perm_0 = other_element; + stk::mesh::Entity element_with_perm_4 = this_element; + + int side_id_needed = elementGraph.get_connected_elements_side(this_element, j); + + STK_ThrowRequireMsg(side_id_needed >= 0, "ERROR: proc " << bulkData.parallel_rank() << " found side_id_needed=" << side_id_needed + << " between elem " << bulkData.identifier(element_with_perm_0)<< " and " << bulkData.identifier(element_with_perm_4) + << " in elem-elem-graph"); + + side = bulkData.declare_element_side(element_with_perm_0, side_id_needed, parts); + } + } + else + { + impl::remove_side_from_death_boundary(bulkData, this_element, active, deletedEntities, side_id); + } + } + } + else + { + impl::IdViaSidePair remote_id_side_pair = elementGraph.get_connected_remote_id_and_via_side(this_element, j); + stk::mesh::EntityId remote_id = remote_id_side_pair.id; + int remote_side = elementGraph.get_connected_elements_side(this_element, j); + impl::ParallelInfo ¶llel_edge_info = elementGraph.get_parallel_edge_info(this_element, remote_id_side_pair.side, remote_id, remote_side); + bool other_element_active = remoteActiveSelector[-remote_id]; + bool create_side = other_element_active; + + if(create_side) + { + impl::add_side_into_exposed_boundary(bulkData, parallel_edge_info, this_element, remote_id_side_pair.side, remote_id, parts_for_creating_side, + shared_modified, remoteActiveSelector, boundary_mesh_parts); + } + else + { + int side_id = remote_id_side_pair.side; + STK_ThrowRequireWithSierraHelpMsg(side_id != -1); + impl::remove_side_from_death_boundary(bulkData, this_element, active, deletedEntities, side_id); + } + } + } + } + } + stk::mesh::impl::delete_entities_and_upward_relations(bulkData, deletedEntities); + bulkData.make_mesh_parallel_consistent_after_element_death(shared_modified, deletedEntities, elementGraph, killedElements, active, modEndOpt); + bulkData.m_bucket_repository.set_remove_mode_fill_and_sort(); + return remote_death_boundary.get_topology_modification_status(); +} + +}} // end namespaces stk mesh + diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.hpp new file mode 100644 index 000000000000..12c864542394 --- /dev/null +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/elementGraph/ProcessKilledElements.hpp @@ -0,0 +1,55 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef STK_PROCESS_KILLED_ELEMENTS_HPP +#define STK_PROCESS_KILLED_ELEMENTS_HPP + +#include +#include +#include + +namespace stk { namespace mesh { +class Part; +class BulkData; + +bool process_killed_elements(stk::mesh::BulkData& bulkData, + const stk::mesh::EntityVector& killedElements, + stk::mesh::Part& active, + stk::mesh::impl::ParallelSelectedInfo &remoteActiveSelector, + const stk::mesh::PartVector& side_parts, + const stk::mesh::PartVector* boundary_mesh_parts = nullptr, + stk::mesh::impl::MeshModification::modification_optimization modEndOpt = stk::mesh::impl::MeshModification::modification_optimization::MOD_END_SORT); + +}} // end stk mesh namespaces + +#endif diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index b080ccf30b20..ff76f6ad9c5d 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -430,13 +430,10 @@ struct SortByCode { static void apply(const TreeType &tree, ExecutionSpace const& execSpace) { - if constexpr (Kokkos::SpaceAccessibility::accessible) { + if constexpr (Kokkos::SpaceAccessibility::accessible) { SortByCodeIdPair::apply(tree); } else { -//#if KOKKOS_VERSION >= 40300 -// Kokkos::Experimental::sort_by_key(execSpace, tree.m_leafCodes, tree.m_leafIds); -//#elif defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCTHRUST) #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCTHRUST) const int n = tree.m_leafIds.extent(0); @@ -447,7 +444,11 @@ struct SortByCode //thrust::stable_sort_by_key(rawLeafCodesThr, rawLeafCodesThr + n, rawLeafIdsThr); thrust::sort_by_key(rawLeafCodesThr, rawLeafCodesThr + n, rawLeafIdsThr); #else - STK_ThrowErrorMsg("shouldn't be able to get here"); // SortByCodeIdPair::apply(tree); +#if KOKKOS_VERSION >= 40300 + Kokkos::Experimental::sort_by_key(execSpace, tree.m_leafCodes, tree.m_leafIds); +#else + STK_ThrowErrorMessage("Need at least Kokkos 4.3"); +#endif #endif } } diff --git a/packages/stk/stk_tools/stk_tools/CMakeLists.txt b/packages/stk/stk_tools/stk_tools/CMakeLists.txt index 1e23d08ad8e2..33abe11f882c 100644 --- a/packages/stk/stk_tools/stk_tools/CMakeLists.txt +++ b/packages/stk/stk_tools/stk_tools/CMakeLists.txt @@ -79,6 +79,7 @@ else() add_library(stk_transfer_utils_lib ${TRANSFER_UTILS_SOURCES}) target_link_libraries(stk_transfer_utils_lib PUBLIC stk_io) target_link_libraries(stk_transfer_utils_lib PUBLIC stk_transfer) + target_link_libraries(stk_transfer_utils_lib PRIVATE stk_util_registry) add_library(stk_tools_lib ${MESH_CLONE_SOURCES} ${MESH_TOOLS_SOURCES} ${BLOCK_EXTRACTOR_SOURCES}) target_link_libraries(stk_tools_lib PUBLIC stk_io) diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp index 2957958ca339..212a1097572b 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/ElemGraphMultipleSharedSidesUtils.hpp @@ -56,6 +56,7 @@ #include #include #include +#include #include diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp index 5f4f5d8447f2..66c0d2867773 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraph.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp index e836d09b0327..5a27b594c4c7 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphChangeOwner.cpp @@ -25,6 +25,7 @@ #include #include // for change_entity_owner, etc #include // for parallel_info +#include #include namespace stk { namespace mesh { class Part; } } diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp index beea53050de9..1ef2f70fb169 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElemElemGraphDeathIntegration.cpp @@ -3,7 +3,7 @@ #include // for BulkData, etc #include // for count_selected_entities, etc #include // for MetaData -#include // for process_killed_elements +#include #include #include // for topology, etc #include // for parallel_machine_rank, etc diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp index 487036273f6a..8ef6e71e9569 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/element_graph/UnitTestElementDeath.cpp @@ -10,6 +10,7 @@ #include // for get_selected_entities, etc #include // for MetaData #include // for process_killed_elements, etc +#include #include // for topology, etc #include // for fill_mesh_using_stk_io, etc #include // for parallel_machine_size, etc diff --git a/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp b/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp index 0136648ed7be..5b62dd5d6b8a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/face_creation/integration/UnitTestSkinMeshElementDeath.cpp @@ -5,6 +5,7 @@ #include // for put_io_part_attribute #include // for BulkData #include // for process_killed_elements, etc +#include #include // for get_selected_entities #include // for MetaData #include // for topology, etc diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp index d609ce5df8c2..bca87004fe8a 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearch.cpp @@ -493,6 +493,10 @@ TEST(stk_search, Local_CoarseSearchWithViews_MORTON_LBVH) TEST(stk_search, Local_CoarseSearchWithViews_ARBORX) { +#ifndef STK_HAS_ARBORX + GTEST_SKIP(); +#endif + local_test_coarse_search_for_algorithm_with_views(stk::search::ARBORX); local_test_coarse_search_for_algorithm_with_views(stk::search::ARBORX); } diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index aedfb9cb959f..f021530e66ee 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5210501 +#define STK_VERSION 5210502 namespace stk diff --git a/packages/stk/stk_util/stk_util/diag/Option.hpp b/packages/stk/stk_util/stk_util/diag/Option.hpp index ec26e9a79394..839d8bd64945 100644 --- a/packages/stk/stk_util/stk_util/diag/Option.hpp +++ b/packages/stk/stk_util/stk_util/diag/Option.hpp @@ -76,7 +76,7 @@ struct OptionDescription * interface so that a help description may be generated. * */ -struct Option : public OptionDescription +struct Option { /** * Creates a new Option instance. @@ -163,18 +163,6 @@ struct Option : public OptionDescription return m_value; } - /** - * @brief Member function describe prints a description of the option to - * the stream. - * - * @param os a std::ostream reference to print to - * description to. - * - * @return a std::ostream reference to the output - * stream. - */ - virtual std::ostream &describe(std::ostream &os) const; - operator std::string &() { return m_value; } @@ -187,7 +175,7 @@ struct Option : public OptionDescription }; -struct OptionMaskName +struct OptionMaskName : public OptionDescription { /** * Creates a new OptionMaskName instance. diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index 48109fc4c112..d1acaca9193b 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.21.5-353-g60799ad8" +#define STK_VERSION_STRING "5.21.5-699-g38edc8e6" #endif namespace stk { From e1e087f5b2a277ed5e854058100244bedbddbd5e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:39:47 +0000 Subject: [PATCH 27/38] Bump github/codeql-action from 3.26.10 to 3.26.12 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.10 to 3.26.12. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/e2b3eafc8d227b0241d48be5f425d47c2d750a13...c36620d31ac7c881962c3d9dd939c40ec9434f2b) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index eb12d791cf40..41cc2c48623b 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 + uses: github/codeql-action/init@c36620d31ac7c881962c3d9dd939c40ec9434f2b # v3.26.12 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 + uses: github/codeql-action/analyze@c36620d31ac7c881962c3d9dd939c40ec9434f2b # v3.26.12 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index db665933d980..923d8932f406 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 + uses: github/codeql-action/upload-sarif@c36620d31ac7c881962c3d9dd939c40ec9434f2b # v3.26.12 with: sarif_file: results.sarif From 0ab66006d873aa90c572b61707e8bd63db3e247e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:39:53 +0000 Subject: [PATCH 28/38] Bump actions/checkout from 4.1.7 to 4.2.1 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.7 to 4.2.1. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4.1.7...eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/AT2.yml | 8 ++++---- .github/workflows/clang_format.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/dependency-review.yml | 2 +- .github/workflows/detect-git-lfs.yml | 2 +- .github/workflows/detect-mpi-comm-world.yml | 2 +- .github/workflows/per-commit.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/spack.yml | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index b6ad45d17b2b..cbb0b653bc4e 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -55,7 +55,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 - name: Repo status @@ -146,7 +146,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 - name: Repo status @@ -237,7 +237,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 - name: Repo status @@ -329,7 +329,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 - name: Repo status diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 9582304d9654..05df85dd93ee 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - uses: DoozyX/clang-format-lint-action@c71d0bf4e21876ebec3e5647491186f8797fde31 # v0.18.2 with: source: './packages/muelu ./packages/tempus ./packages/teko ./packages/xpetra' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index eb12d791cf40..5f80a59184e4 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -58,7 +58,7 @@ jobs: # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 5c03af260a6c..7b0990bcf5ca 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -22,6 +22,6 @@ jobs: egress-policy: audit - name: 'Checkout Repository' - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: 'Dependency Review' uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 diff --git a/.github/workflows/detect-git-lfs.yml b/.github/workflows/detect-git-lfs.yml index 25f52a8aa30a..ebe778088863 100644 --- a/.github/workflows/detect-git-lfs.yml +++ b/.github/workflows/detect-git-lfs.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 diff --git a/.github/workflows/detect-mpi-comm-world.yml b/.github/workflows/detect-mpi-comm-world.yml index 80414fd34c73..1fd6790c8c86 100644 --- a/.github/workflows/detect-mpi-comm-world.yml +++ b/.github/workflows/detect-mpi-comm-world.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 diff --git a/.github/workflows/per-commit.yml b/.github/workflows/per-commit.yml index 91fe1c8126d0..3f619a7dbbc0 100644 --- a/.github/workflows/per-commit.yml +++ b/.github/workflows/per-commit.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index db665933d980..af857aa665c3 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -31,7 +31,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: persist-credentials: false diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index 51a8b71e8049..27f318dbf321 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -24,7 +24,7 @@ jobs: with: access_token: ${{ github.token }} - name: Clone Trilinos - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 1 - name: Spack build From 754081f81d4f2a58a71e3160d298897afe5614e3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:39:58 +0000 Subject: [PATCH 29/38] Bump actions/upload-artifact from 4.4.0 to 4.4.1 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.0 to 4.4.1. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/50769540e7f4bd5e21e526ee35c689e35e0d6874...604373da6381bf24206979c74d06a550515601b9) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 9582304d9654..a59141abd2b9 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -22,7 +22,7 @@ jobs: - run: git diff HEAD > format_patch.txt - run: if [ "$(cat format_patch.txt)" == "" ] ; then rm format_patch.txt ; else cat format_patch.txt; fi - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@604373da6381bf24206979c74d06a550515601b9 # v4.4.1 id: upload-artf if: ${{ hashFiles('format_patch.txt') != '' }} with: diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index db665933d980..e6a5e0153429 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -58,7 +58,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@604373da6381bf24206979c74d06a550515601b9 # v4.4.1 with: name: SARIF file path: results.sarif From f88430eee3c99dfba9f4aa7c3c102c25d83392ac Mon Sep 17 00:00:00 2001 From: Victor Brunini Date: Tue, 8 Oct 2024 11:32:04 -0600 Subject: [PATCH 30/38] belos: Fix shadow warning reported in #13512. --- packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp index 1dea7d1ec5a1..203345d718a9 100644 --- a/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp +++ b/packages/belos/tpetra/src/BelosMultiVecTraits_Tpetra.hpp @@ -115,7 +115,6 @@ class MultiVecPool auto full_size_dv = available.back(); available.pop_back(); - using dv_t = typename MV::dual_view_type; typename dv_t::t_dev mv_dev(full_size_dv.view_device().data(), num_local_elems, numVecs); typename dv_t::t_host mv_host(full_size_dv.view_host().data(), num_local_elems, numVecs); From 4bfd20d24d7b1519ebb70580e8d3f75e2fbf9cfd Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 8 Oct 2024 14:20:06 -0600 Subject: [PATCH 31/38] Trilinos: Update README.md for OpenSSF Best Practices Badge (#13513) Adding our OpenSSF Best Bractices Badge! --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 998d7031cb39..75df4f80f3b5 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/trilinos/Trilinos/badge)](https://securityscorecards.dev/viewer/?uri=github.com/trilinos/Trilinos) - +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/9452/badge)](https://www.bestpractices.dev/projects/9452) The Trilinos Project is an effort to develop algorithms and enabling From 25d1244c798313a9d1e5c30532383121658b0287 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 8 Oct 2024 15:34:05 -0600 Subject: [PATCH 32/38] Tpetra: Maybe making test compile Signed-off-by: Chris Siefert --- .../ImportExport2/ImportExport2_UnitTests.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp index eeeb5aa891a2..1482f3132e3e 100644 --- a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp +++ b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp @@ -504,7 +504,7 @@ namespace { as (10) * ScalarTraits::eps (); typedef typename CrsMatrix::nonconst_local_inds_host_view_type lids_type; typedef typename CrsMatrix::nonconst_values_host_view_type vals_type; - + lids_type tgtRowInds; vals_type tgtRowVals; lids_type tgt2RowInds; @@ -2356,9 +2356,14 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( Import_Util, UnpackAndCombineWithOwningPIDs, os << *prefix << "Calling 4-arg doPostsAndWaits" << std::endl; std::cerr << os.str (); } - using char_host_mirror_view = typename Kokkos::View::HostMirror; - char_host_mirror_view importsView(imports.data(), imports.size()); - distor.doPostsAndWaits(exports.view_host(),numExportPackets(),importsView,numImportPackets()); + + // NOTE: This test is run entirely on host. Trying to run this on + // device is trickier, since we don't allow sending from CudaUVM buffers, but + // do allow sends from HIP Unified Memory + Kokkos::View importsView(imports.data(), imports.size()); + auto exportsView_h = create_mirror_view(Kokkos::HostSpace(),exports.view_host()); + deep_copy(exportsView_h,exports.view_host()); + distor.doPostsAndWaits(exportsView_h,numExportPackets(),importsView,numImportPackets()); auto importsView_d = Kokkos::create_mirror_view(Node::device_type::memory_space(), importsView); deep_copy(importsView_d,importsView); if (verbose) { @@ -2515,7 +2520,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( Import_Util,LowCommunicationMakeColMapAndRein // it will), in which case we can remove the persistingView call. auto rowptr = Kokkos::Compat::persistingView(A->getLocalRowPtrsHost()); auto colind = Kokkos::Compat::persistingView(A->getLocalIndicesHost()); - + Acolmap = A->getColMap(); Adomainmap = A->getDomainMap(); From f6fb1918e72113ae7e9926771ee855ad0918782d Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Wed, 9 Oct 2024 12:02:20 -0600 Subject: [PATCH 33/38] IOSS: Fix so single-rank file open matches integer size of parallel file open Signed-off-by: Greg Sjaardema --- .../libraries/ioss/src/exodus/Ioex_DecompositionData.C | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 5f2c54f2f079..9097acbe20c8 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -289,6 +289,15 @@ namespace Ioex { Ioss::PropertyManager properties; Ioss::DatabaseIO *dbi = Ioss::IOFactory::create( "exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); + + // Set integer size to match what the caller is using + if (sizeof(INT) == 8) { + dbi->set_int_byte_size_api(Ioss::USE_INT64_API); + } + else { + dbi->set_int_byte_size_api(Ioss::USE_INT32_API); + } + Ioss::Region region(dbi, "line_decomp_region"); Ioss::DecompUtils::line_decompose( From a253b3f79a2125aa19ce4cb3d8d9d59ced2c6c3e Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 9 Oct 2024 17:23:19 -0600 Subject: [PATCH 34/38] sacado,stokhos: remove use of Impl::MirrorType Compatibility update corresponding to kokkos/kokkos#7339 In this PR, previous use of `Impl::MirrorType::view_type` is guarded for Kokkos versions < 4.4.99 Usage is replaced with `Kokkos::Impl::MirrorViewType::dest_view_type` Existing usage of `Kokkos::Impl::MirrorViewType::view_type` was not changed Co-authored-by: Paul Zehner Co-authored-by: Damien L-G Signed-off-by: Nathan Ellingwood --- packages/sacado/src/KokkosExp_View_Fad.hpp | 21 +++++++++++++++++++ .../sacado/src/Kokkos_DynRankView_Fad.hpp | 18 ++++++++++++++++ packages/sacado/src/Kokkos_View_Fad_Fwd.hpp | 10 +++++++++ .../pce/KokkosExp_View_UQ_PCE_Contiguous.hpp | 18 ++++++++++++++++ .../kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp | 10 +++++++++ .../KokkosExp_View_MP_Vector_Contiguous.hpp | 4 ++++ .../vector/Kokkos_View_MP_Vector_Fwd.hpp | 2 ++ 7 files changed, 83 insertions(+) diff --git a/packages/sacado/src/KokkosExp_View_Fad.hpp b/packages/sacado/src/KokkosExp_View_Fad.hpp index e30b14f19ce0..3c9cb5351c5e 100644 --- a/packages/sacado/src/KokkosExp_View_Fad.hpp +++ b/packages/sacado/src/KokkosExp_View_Fad.hpp @@ -484,13 +484,21 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorViewType::dest_view_type(src.label(),layout); +#else return typename Impl::MirrorType::view_type(src.label(),layout); +#endif } template< class T , class ... P > @@ -565,19 +573,32 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value ), +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorViewType::dest_view_type( + Kokkos::view_alloc(src.label(), wi), layout); +#else return typename Impl::MirrorType::view_type( Kokkos::view_alloc(src.label(), wi), layout); +#endif } template +#if KOKKOS_VERSION >= 40499 +typename Impl::MirrorViewType::dest_view_type +#else typename Impl::MirrorViewType::view_type +#endif create_mirror_view_and_copy( const Space&, const Kokkos::View& src, std::string const& name, diff --git a/packages/sacado/src/Kokkos_DynRankView_Fad.hpp b/packages/sacado/src/Kokkos_DynRankView_Fad.hpp index 36be416bde09..0d330ef2dcb1 100644 --- a/packages/sacado/src/Kokkos_DynRankView_Fad.hpp +++ b/packages/sacado/src/Kokkos_DynRankView_Fad.hpp @@ -43,8 +43,13 @@ class DynRankView ; namespace Impl { +#if KOKKOS_VERSION >= 40499 +template +struct MirrorDRViewType; +#else template struct MirrorDRVType; +#endif } @@ -93,7 +98,11 @@ create_mirror( Kokkos::LayoutStride >::value >::type * = 0); template +#if KOKKOS_VERSION >= 40499 +typename Impl::MirrorDRViewType::dest_view_type +#else typename Impl::MirrorDRVType::view_type +#endif create_mirror( const Space&, const Kokkos::DynRankView & src, @@ -1246,7 +1255,11 @@ create_mirror( const Kokkos::DynRankView & src } template +#if KOKKOS_VERSION >= 40499 +typename Impl::MirrorDRViewType::dest_view_type +#else typename Impl::MirrorDRVType::view_type +#endif create_mirror(const Space& , const Kokkos::DynRankView & src , typename std::enable_if< ( std::is_same< typename ViewTraits::specialize , @@ -1257,8 +1270,13 @@ create_mirror(const Space& , const Kokkos::DynRankView & src typedef DynRankView src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src.rank()] = Kokkos::dimension_scalar(src); +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorDRViewType::dest_view_type( + src.label(),Impl::reconstructLayout(layout, src.rank()+1)); +#else return typename Impl::MirrorDRVType::view_type( src.label(),Impl::reconstructLayout(layout, src.rank()+1)); +#endif } namespace Impl { diff --git a/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp b/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp index fb13799e1fe4..d5935e3fc72c 100644 --- a/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp +++ b/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp @@ -69,8 +69,10 @@ typename std::enable_if< is_view_fad< Kokkos::View >::value && view_copy(const ExecutionSpace& space, const Kokkos::View& dst, const Kokkos::View& src); +#if KOKKOS_VERSION < 40499 template struct MirrorType; +#endif template struct MirrorViewType; @@ -110,7 +112,11 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(const Space&, const Kokkos::View & src); template< class T , class ... P > @@ -146,7 +152,11 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value ), +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::View & src); diff --git a/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp b/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp index 3d4b95d984c2..f78296aef544 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp @@ -229,14 +229,23 @@ template typename std::enable_if< std::is_same< typename ViewTraits::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorViewType::dest_view_type( + view_alloc(src.label(), src.impl_map().cijk()),layout); +#else return typename Impl::MirrorType::view_type( view_alloc(src.label(), src.impl_map().cijk()),layout); +#endif } template< class T , class ... P > @@ -304,15 +313,24 @@ template typename std::enable_if< std::is_same< typename ViewTraits::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorViewType::dest_view_type( + view_alloc(src.label(), wi, src.impl_map().cijk()), layout); +#else return typename Impl::MirrorType::view_type( view_alloc(src.label(), wi, src.impl_map().cijk()), layout); +#endif } template diff --git a/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp b/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp index 19ade8bfdc9b..4a84bda6675f 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp @@ -37,8 +37,10 @@ namespace Sacado { namespace Kokkos { namespace Impl { +#if KOKKOS_VERSION < 40499 template struct MirrorType; +#endif template struct MirrorViewType; } @@ -85,7 +87,11 @@ template::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(const Space&, const Kokkos::View & src); @@ -116,7 +122,11 @@ template::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, +#if KOKKOS_VERSION >= 40499 + typename Impl::MirrorViewType::dest_view_type>::type +#else typename Impl::MirrorType::view_type>::type +#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::View & src); diff --git a/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp b/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp index 94b49b6ce622..65b681444df4 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp @@ -128,7 +128,11 @@ inline auto create_mirror( arg_prop, std::string(src.label()).append("_mirror")); if constexpr (Impl::ViewCtorProp::has_memory_space){ +#if KOKKOS_VERSION >= 40499 + return typename Impl::MirrorViewType::memory_space, T, P ...>::dest_view_type(prop_copy, layout); +#else return typename Impl::MirrorType::memory_space, T, P ...>::view_type(prop_copy, layout); +#endif } else { return typename View::HostMirror(prop_copy, layout); } diff --git a/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp b/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp index f6a465e2b42d..c9cc310eda07 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp @@ -37,8 +37,10 @@ namespace Sacado { namespace Kokkos { namespace Impl { +#if KOKKOS_VERSION < 40499 template struct MirrorType; +#endif template struct MirrorViewType; } From 0aee2ae8294cf17aa235823729d072e3ccc2dfe5 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 9 Oct 2024 23:19:32 +0000 Subject: [PATCH 35/38] Tribits: find SYCL libraries for TPL MKL Signed-off-by: Brian Kelley --- cmake/TPLs/FindTPLMKL.cmake | 34 ++++++++++++++----- .../tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp | 2 +- .../tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 2 +- .../tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp | 3 +- .../tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp | 3 +- .../tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp | 2 +- .../tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 3 +- .../cmake/KokkosKernels_config.h.in | 1 - .../cmake/kokkoskernels_tpls.cmake | 4 --- .../sparse/src/KokkosSparse_spmv_handle.hpp | 2 +- .../tpls/KokkosSparse_spmv_tpl_spec_avail.hpp | 2 +- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 2 +- 12 files changed, 35 insertions(+), 25 deletions(-) diff --git a/cmake/TPLs/FindTPLMKL.cmake b/cmake/TPLs/FindTPLMKL.cmake index 1f051d6d2ef8..29c97751ef41 100644 --- a/cmake/TPLs/FindTPLMKL.cmake +++ b/cmake/TPLs/FindTPLMKL.cmake @@ -15,10 +15,32 @@ # pseudorandom number generators. That's why we require a header # file, to access the function declarations. -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( MKL - REQUIRED_HEADERS mkl.h - REQUIRED_LIBS_NAMES mkl_rt - ) +IF(Kokkos_ENABLE_SYCL) + # For OneAPI MKL on GPU, use the CMake target + # Temporarily change CMAKE_CXX_COMPILER to icpx to convince MKL to add the DPCPP target + # If it sees that CMAKE_CXX_COMPILER is just ".../mpicxx", it won't do this. + set(CMAKE_CXX_COMPILER_PREVIOUS "${CMAKE_CXX_COMPILER}") + set(CMAKE_CXX_COMPILER "icpx") + # Use the BLAS95 and LAPACK95 interfaces (int32_t for dimensions and indices) + set(MKL_INTERFACE lp64) + find_package(MKL REQUIRED COMPONENTS MKL::MKL MKL::MKL_SYCL) + IF (NOT MKL_FOUND) + MESSAGE(FATAL_ERROR "MKL (as CMake package) was not found! This is required for SYCL+MKL") + ENDIF() + set(CMAKE_CXX_COMPILER "${CMAKE_CXX_COMPILER_PREVIOUS}") + + tribits_extpkg_create_imported_all_libs_target_and_config_file( MKL + INNER_FIND_PACKAGE_NAME MKL + IMPORTED_TARGETS_FOR_ALL_LIBS MKL::MKL MKL::MKL_SYCL + ) +ELSE () + # For host MKL, the single library libmkl_rt is sufficient. + # This works for older versions of MKL that don't provide MKLConfig.cmake. + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( MKL + REQUIRED_HEADERS mkl.h + REQUIRED_LIBS_NAMES mkl_rt + ) +ENDIF() # In the past, MKL users had to link with a long list of libraries. # The choice of libraries enables specific functionality. Intel @@ -69,7 +91,3 @@ TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( MKL # where the MKLROOT environment variable points to my MKL install # directory. - - - - diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp index 37876d0129e0..6de384380ea6 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp @@ -95,7 +95,7 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLe // oneMKL #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) #define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_MKL_SYCL(SCALAR, LAYOUT, MEMSPACE) \ template \ diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index 1bf740b3fbc4..378fbc936f01 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -263,7 +263,7 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(Kokkos::complex, Kokkos::LayoutLe // oneMKL #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) #include #include diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp index 4d1a238740a8..b7b70b5edb75 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_avail.hpp @@ -75,8 +75,7 @@ KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSp KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::HIP, Kokkos::HIPSpace) #endif -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ - defined(KOKKOS_ENABLE_SYCL) +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) KOKKOSBLAS1_NRM2_TPL_SPEC_AVAIL(Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) #endif diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp index dfd6150914b9..b1e4cd58b92e 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas1_nrm2_tpl_spec_decl.hpp @@ -283,8 +283,7 @@ KOKKOSBLAS1_NRM2_TPL_SPEC_DECL_ROCBLAS_EXT(false) #endif -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ - defined(KOKKOS_ENABLE_SYCL) +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) #include #include #include diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp index 709f261b63c6..679a5ddacea8 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp @@ -129,7 +129,7 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, Kokkos::LayoutRi #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) #define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, LAYOUT) \ template \ diff --git a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index 4234afbd77b0..fcc5762f571d 100644 --- a/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -594,8 +594,7 @@ KOKKOSBLAS2_CGEMV_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIPSpace, false) #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS // ONEMKL -#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) && \ - defined(KOKKOS_ENABLE_SYCL) +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) #include #include #include diff --git a/packages/kokkos-kernels/cmake/KokkosKernels_config.h.in b/packages/kokkos-kernels/cmake/KokkosKernels_config.h.in index ef8fea78b845..c3865559c0b3 100644 --- a/packages/kokkos-kernels/cmake/KokkosKernels_config.h.in +++ b/packages/kokkos-kernels/cmake/KokkosKernels_config.h.in @@ -29,7 +29,6 @@ requires (a) header file(s) as well, and may use functions other than just BLAS and LAPACK functions. */ #cmakedefine HAVE_KOKKOSKERNELS_MKL -#cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE #cmakedefine KOKKOSKERNELS_ENABLE_TESTS_AND_PERFSUITE #cmakedefine KOKKOSKERNELS_ENABLE_BENCHMARK diff --git a/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake b/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake index b8267c4955e4..49d1adcdcb84 100644 --- a/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake +++ b/packages/kokkos-kernels/cmake/kokkoskernels_tpls.cmake @@ -31,10 +31,6 @@ MACRO(KOKKOSKERNELS_ADD_TPL_OPTION NAME DEFAULT_VALUE DOCSTRING) SET(ROOT_DEFAULT $ENV{${_NAME_ORIG}_ROOT}) KOKKOSKERNELS_ADD_OPTION(${_NAME_ORIG}_ROOT "${ROOT_DEFAULT}" PATH "Location of ${_NAME} install root. Default: None or the value of the environment variable ${_NAME}_ROOT if set") IF (DEFINED TPL_ENABLE_${_NAME}) - IF (${_NAME} STREQUAL MKL AND KOKKOSKERNELS_HAS_TRILINOS) - MESSAGE("Trilinos has enabled MKL and SYCL but it does not detect oneMKL correctly so we disable it!") - SET(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE ON) - ENDIF () IF (TPL_ENABLE_${_NAME} AND NOT KOKKOSKERNELS_ENABLE_TPL_${_NAME}) MESSAGE("Overriding KOKKOSKERNELS_ENABLE_TPL_${_NAME_ORIG}=OFF with TPL_ENABLE_${_NAME}=ON") SET(KOKKOSKERNELS_ENABLE_TPL_${_NAME_ORIG} ON) diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp index e91e53d68d14..a1cd8895a338 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp @@ -190,7 +190,7 @@ struct MKL_SpMV_Data : public TPL_SpMV_Data { }; #endif -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) struct OneMKL_SpMV_Data : public TPL_SpMV_Data { OneMKL_SpMV_Data(const Kokkos::Experimental::SYCL& exec_) : TPL_SpMV_Data(exec_) {} ~OneMKL_SpMV_Data() { diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp index 2f5ceca09ee1..30c760c14e76 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_avail.hpp @@ -179,7 +179,7 @@ KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) #endif -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) #define KOKKOSSPARSE_SPMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, ORDINAL, MEMSPACE) \ template <> \ struct spmv_tpl_spec_avail< \ diff --git a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 30e790a3ab40..6de2a70359ca 100644 --- a/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/packages/kokkos-kernels/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -548,7 +548,7 @@ KOKKOSSPARSE_SPMV_MKL(Kokkos::complex, Kokkos::OpenMP) #undef KOKKOSSPARSE_SPMV_MKL #endif -#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOSKERNELS_ENABLE_TPL_MKL_SYCL_OVERRIDE) +#if defined(KOKKOS_ENABLE_SYCL) inline oneapi::mkl::transpose mode_kk_to_onemkl(char mode_kk) { switch (toupper(mode_kk)) { case 'N': return oneapi::mkl::transpose::nontrans; From 04efcd940e2d0834f843221a8c5ad58d773300ab Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 10 Oct 2024 13:07:49 -0600 Subject: [PATCH 36/38] Address feedback Remove guarded regions of code that are unnecessary for backward compatibility Signed-off-by: Nathan Ellingwood --- packages/sacado/src/KokkosExp_View_Fad.hpp | 21 ------------------- .../sacado/src/Kokkos_DynRankView_Fad.hpp | 18 ---------------- packages/sacado/src/Kokkos_View_Fad_Fwd.hpp | 12 ----------- .../pce/KokkosExp_View_UQ_PCE_Contiguous.hpp | 18 ---------------- .../kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp | 12 ----------- .../KokkosExp_View_MP_Vector_Contiguous.hpp | 4 ---- .../vector/Kokkos_View_MP_Vector_Fwd.hpp | 4 ---- 7 files changed, 89 deletions(-) diff --git a/packages/sacado/src/KokkosExp_View_Fad.hpp b/packages/sacado/src/KokkosExp_View_Fad.hpp index 3c9cb5351c5e..d13bda35f278 100644 --- a/packages/sacado/src/KokkosExp_View_Fad.hpp +++ b/packages/sacado/src/KokkosExp_View_Fad.hpp @@ -484,21 +484,13 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorViewType::dest_view_type(src.label(),layout); -#else - return typename Impl::MirrorType::view_type(src.label(),layout); -#endif } template< class T , class ... P > @@ -573,32 +565,19 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value ), -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorViewType::dest_view_type( Kokkos::view_alloc(src.label(), wi), layout); -#else - return typename Impl::MirrorType::view_type( - Kokkos::view_alloc(src.label(), wi), layout); -#endif } template -#if KOKKOS_VERSION >= 40499 -typename Impl::MirrorViewType::dest_view_type -#else typename Impl::MirrorViewType::view_type -#endif create_mirror_view_and_copy( const Space&, const Kokkos::View& src, std::string const& name, diff --git a/packages/sacado/src/Kokkos_DynRankView_Fad.hpp b/packages/sacado/src/Kokkos_DynRankView_Fad.hpp index 0d330ef2dcb1..e2c1d78aa81b 100644 --- a/packages/sacado/src/Kokkos_DynRankView_Fad.hpp +++ b/packages/sacado/src/Kokkos_DynRankView_Fad.hpp @@ -43,13 +43,8 @@ class DynRankView ; namespace Impl { -#if KOKKOS_VERSION >= 40499 template struct MirrorDRViewType; -#else -template -struct MirrorDRVType; -#endif } @@ -98,11 +93,7 @@ create_mirror( Kokkos::LayoutStride >::value >::type * = 0); template -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorDRViewType::dest_view_type -#else -typename Impl::MirrorDRVType::view_type -#endif create_mirror( const Space&, const Kokkos::DynRankView & src, @@ -1255,11 +1246,7 @@ create_mirror( const Kokkos::DynRankView & src } template -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorDRViewType::dest_view_type -#else -typename Impl::MirrorDRVType::view_type -#endif create_mirror(const Space& , const Kokkos::DynRankView & src , typename std::enable_if< ( std::is_same< typename ViewTraits::specialize , @@ -1270,13 +1257,8 @@ create_mirror(const Space& , const Kokkos::DynRankView & src typedef DynRankView src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src.rank()] = Kokkos::dimension_scalar(src); -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorDRViewType::dest_view_type( src.label(),Impl::reconstructLayout(layout, src.rank()+1)); -#else - return typename Impl::MirrorDRVType::view_type( - src.label(),Impl::reconstructLayout(layout, src.rank()+1)); -#endif } namespace Impl { diff --git a/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp b/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp index d5935e3fc72c..913bcb045892 100644 --- a/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp +++ b/packages/sacado/src/Kokkos_View_Fad_Fwd.hpp @@ -69,10 +69,6 @@ typename std::enable_if< is_view_fad< Kokkos::View >::value && view_copy(const ExecutionSpace& space, const Kokkos::View& dst, const Kokkos::View& src); -#if KOKKOS_VERSION < 40499 -template -struct MirrorType; -#endif template struct MirrorViewType; @@ -112,11 +108,7 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(const Space&, const Kokkos::View & src); template< class T , class ... P > @@ -152,11 +144,7 @@ typename std::enable_if< Kokkos::Impl::ViewSpecializeSacadoFad >::value || std::is_same< typename ViewTraits::specialize , Kokkos::Impl::ViewSpecializeSacadoFadContiguous >::value ), -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::View & src); diff --git a/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp b/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp index f78296aef544..e8899eff096c 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/KokkosExp_View_UQ_PCE_Contiguous.hpp @@ -229,23 +229,14 @@ template typename std::enable_if< std::is_same< typename ViewTraits::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorViewType::dest_view_type( view_alloc(src.label(), src.impl_map().cijk()),layout); -#else - return typename Impl::MirrorType::view_type( - view_alloc(src.label(), src.impl_map().cijk()),layout); -#endif } template< class T , class ... P > @@ -313,24 +304,15 @@ template typename std::enable_if< std::is_same< typename ViewTraits::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space& , const Kokkos::View & src) { typedef View src_type ; typename src_type::array_layout layout = src.layout(); layout.dimension[src_type::rank] = Kokkos::dimension_scalar(src); -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorViewType::dest_view_type( view_alloc(src.label(), wi, src.impl_map().cijk()), layout); -#else - return typename Impl::MirrorType::view_type( - view_alloc(src.label(), wi, src.impl_map().cijk()), layout); -#endif } template diff --git a/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp b/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp index 4a84bda6675f..cc1feffe681c 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/Kokkos_View_UQ_PCE_Fwd.hpp @@ -37,10 +37,6 @@ namespace Sacado { namespace Kokkos { namespace Impl { -#if KOKKOS_VERSION < 40499 - template - struct MirrorType; -#endif template struct MirrorViewType; } @@ -87,11 +83,7 @@ template::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(const Space&, const Kokkos::View & src); @@ -122,11 +114,7 @@ template::specialize , Kokkos::Experimental::Impl::ViewPCEContiguous >::value, -#if KOKKOS_VERSION >= 40499 typename Impl::MirrorViewType::dest_view_type>::type -#else - typename Impl::MirrorType::view_type>::type -#endif create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::View & src); diff --git a/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp b/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp index 65b681444df4..b8bbc9652455 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/KokkosExp_View_MP_Vector_Contiguous.hpp @@ -128,11 +128,7 @@ inline auto create_mirror( arg_prop, std::string(src.label()).append("_mirror")); if constexpr (Impl::ViewCtorProp::has_memory_space){ -#if KOKKOS_VERSION >= 40499 return typename Impl::MirrorViewType::memory_space, T, P ...>::dest_view_type(prop_copy, layout); -#else - return typename Impl::MirrorType::memory_space, T, P ...>::view_type(prop_copy, layout); -#endif } else { return typename View::HostMirror(prop_copy, layout); } diff --git a/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp b/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp index c9cc310eda07..eed2482dd172 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/Kokkos_View_MP_Vector_Fwd.hpp @@ -37,10 +37,6 @@ namespace Sacado { namespace Kokkos { namespace Impl { -#if KOKKOS_VERSION < 40499 - template - struct MirrorType; -#endif template struct MirrorViewType; } From 6a817dfbf27334e0af571b2bbbebd9e04dda899c Mon Sep 17 00:00:00 2001 From: Nate Roberts Date: Thu, 10 Oct 2024 14:40:18 -0500 Subject: [PATCH 37/38] Intrepid2: increase max orders for basis, cubature (#13505) Intrepid2: increase max orders for basis, cubature (#13505). Increases the maximum polynomial order for bases from 10 to 20, and the maximum cubature (quadrature) order on edges from 20 to 61. Where necessary, tests which iterate over these orders have been modified to avoid too much increase in expense, with the goal of testing the additional high-order support while maintaining previous testing intensity/coverage at the lower orders. Similarly, some tolerances have been loosened, but we aim to maintain the prior tolerances for the lower orders, and use looser tolerances for newly supported orders. MueLu's p-coarsening tests (IntrepidPCoarsenFactory) have been modified to hard-code the previously-covered polynomial orders, because for higher orders it appears MueLu_IntrepidPCoarsenFactory_def.hpp will need modification to support equispaced nodal bases. The intent is to have a follow-on PR to address this and allow higher-order equispaced bases in IntrepidPCoarsenFactory. --- .../Integration/Intrepid2_CubatureTensor.hpp | 6 + .../Intrepid2_CubatureTensorPyr.hpp | 65 ++++++++- .../src/Shared/Intrepid2_TensorData.hpp | 3 + .../src/Shared/Intrepid2_TensorPoints.hpp | 2 + .../intrepid2/src/Shared/Intrepid2_Types.hpp | 4 +- .../Basis/HGRAD_HEX_Cn_FEM/test_02.hpp | 5 +- .../Basis/HGRAD_LINE_Cn_FEM/test_01.hpp | 3 +- .../Basis/HGRAD_TET_Cn_FEM/test_02.hpp | 2 +- .../Basis/HGRAD_TRI_Cn_FEM/test_01.hpp | 4 +- .../Basis/HVOL_TRI_Cn_FEM/test_01.hpp | 4 +- .../Discretization/Integration/test_01.hpp | 66 +++++++-- .../Discretization/Integration/test_02.hpp | 23 ++- .../Discretization/Integration/test_03.hpp | 32 ++--- .../Discretization/Integration/test_04.hpp | 35 ++--- .../Discretization/Integration/test_05.hpp | 47 ++++--- .../Discretization/Integration/test_06.hpp | 80 ++++++----- .../Discretization/Integration/test_07.hpp | 49 ++++--- .../Discretization/Integration/test_10.hpp | 24 ++-- .../Discretization/Integration/test_util.hpp | 133 ++++++++---------- .../unit-test/Shared/Polylib/test_01.hpp | 14 +- .../unit_tests/IntrepidPCoarsenFactory.cpp | 6 +- 21 files changed, 366 insertions(+), 241 deletions(-) diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensor.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensor.hpp index 6e676882ef8f..b56eafc5a7b9 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensor.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensor.hpp @@ -158,6 +158,12 @@ namespace Intrepid2 { ordinal_type getNumCubatures() const { return numCubatures_; } + + /** \brief Return the number of cubatures. + */ + CubatureDirect getCubatureComponent(ordinal_type i) const { + return cubatures_[i]; + } /** \brief Returns max. degree of polynomials that are integrated exactly. */ diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensorPyr.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensorPyr.hpp index 5892061379cb..b20679dafa17 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensorPyr.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureTensorPyr.hpp @@ -29,6 +29,10 @@ namespace Intrepid2 { class CubatureTensorPyr : public CubatureTensor { public: + using TensorPointDataType = typename CubatureTensor::TensorPointDataType; + using TensorWeightDataType = typename CubatureTensor::TensorWeightDataType; + using PointViewTypeAllocatable = typename CubatureTensor::PointViewTypeAllocatable; + using WeightViewTypeAllocatable = typename CubatureTensor::WeightViewTypeAllocatable; template @@ -68,7 +72,7 @@ namespace Intrepid2 { virtual void getCubature( PointViewType cubPoints, - weightViewType cubWeights ) const { + weightViewType cubWeights ) const override { getCubatureImpl( cubPoints, cubWeights ); } @@ -91,8 +95,65 @@ namespace Intrepid2 { const CubatureLineType2 line2 ) : CubatureTensor(line0, line1, line2) {} + + /** \brief Returns a points container appropriate for passing to getCubature(). + + \return cubPoints - Data structure sized for the cubature points. + */ + virtual TensorPointDataType allocateCubaturePoints() const override + { + std::vector< PointViewTypeAllocatable > cubaturePointComponents(1); + + int numCubatures = this->getNumCubatures(); + int numPoints = 1; + for (ordinal_type i=0;igetCubatureComponent(i).getNumPoints(); + } + + const int dim = 3; + cubaturePointComponents[0] = PointViewTypeAllocatable("cubature points", numPoints, dim); + + return TensorPointDataType(cubaturePointComponents); + } + + /** \brief Returns a weight container appropriate for passing to getCubature(). + + \return cubWeights - Data structure sized for the cubature weights. + */ + virtual TensorWeightDataType allocateCubatureWeights() const override + { + using WeightDataType = Data; + + std::vector< WeightDataType > cubatureWeightComponents(1); + int numPoints = 1; + int numCubatures = this->getNumCubatures(); + for (ordinal_type i=0;igetCubatureComponent(i).getNumPoints(); + } + + cubatureWeightComponents[0] = WeightDataType(WeightViewTypeAllocatable("cubature weights", numPoints)); + + return TensorWeightDataType(cubatureWeightComponents); + } + + /** \brief Returns tensor cubature points and weights. For non-tensor cubatures, the tensor structures are trivial, thin wrappers around the data returned by getCubature(). The provided containers should be pre-allocated through calls to allocateCubaturePoints() and allocateCubatureWeights(). + + \param cubPoints [out] - TensorPoints structure containing the cubature points. + \param cubWeights [out] - TensorData structure containing cubature weights. + */ + virtual + void + getCubature( const TensorPointDataType & tensorCubPoints, + const TensorWeightDataType & tensorCubWeights) const override { + // tensorCubPoints/Weights should have trivial tensor structure + auto points = tensorCubPoints.getTensorComponent(0); + auto weights = tensorCubWeights.getTensorComponent(0).getUnderlyingView(); + this->getCubature(points,weights); + } }; -} +} #include "Intrepid2_CubatureTensorPyrDef.hpp" diff --git a/packages/intrepid2/src/Shared/Intrepid2_TensorData.hpp b/packages/intrepid2/src/Shared/Intrepid2_TensorData.hpp index fe6ce3c30eae..5ee7e068491d 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TensorData.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TensorData.hpp @@ -28,6 +28,9 @@ namespace Intrepid2 */ template class TensorData { + public: + using value_type = Scalar; + using execution_space = typename DeviceType::execution_space; protected: Kokkos::Array< Data, Parameters::MaxTensorComponents> tensorComponents_; Kokkos::Array extents_; diff --git a/packages/intrepid2/src/Shared/Intrepid2_TensorPoints.hpp b/packages/intrepid2/src/Shared/Intrepid2_TensorPoints.hpp index fd6126e7fd47..446beaae4732 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TensorPoints.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TensorPoints.hpp @@ -24,6 +24,8 @@ namespace Intrepid2 { */ template class TensorPoints { + public: + using value_type = PointScalar; protected: Kokkos::Array< ScalarView, Parameters::MaxTensorComponents> pointTensorComponents_; // each component has dimensions (P,D) ordinal_type numTensorComponents_; diff --git a/packages/intrepid2/src/Shared/Intrepid2_Types.hpp b/packages/intrepid2/src/Shared/Intrepid2_Types.hpp index 8491dd36c376..bcecec34ee1f 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Types.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Types.hpp @@ -99,11 +99,11 @@ namespace Intrepid2 { /// The maximum number of points to eval in serial mode. static constexpr ordinal_type MaxNumPtsPerBasisEval= 1; /// The maximum reconstruction order. - static constexpr ordinal_type MaxOrder = 10; + static constexpr ordinal_type MaxOrder = 20; /// The maximum number of integration points for direct cubature rules. static constexpr ordinal_type MaxIntegrationPoints = 4893; /// The maximum degree of the polynomial that can be integrated exactly by a direct edge rule. - static constexpr ordinal_type MaxCubatureDegreeEdge= 20; + static constexpr ordinal_type MaxCubatureDegreeEdge= 61; /// The maximum degree of the polynomial that can be integrated exactly by a direct triangle rule. static constexpr ordinal_type MaxCubatureDegreeTri = 50; /// The maximum degree of the polynomial that can be integrated exactly by a direct tetrahedron rule. diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp index 4cab37d83103..b98955113bde 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp @@ -37,7 +37,8 @@ namespace Intrepid2 { int errorFlag = 0; try { - for (int order=1;order basis(order); // problem setup @@ -105,7 +106,7 @@ namespace Intrepid2 { for (size_t j=0;j 1.0e-9)) { std::cout << " order = " << order << " i = " << i << " j = " << j << " val A = " << outputValuesA_Host(i,j) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_01.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_01.hpp index 5cf12dcdc6a5..460ec21ebdec 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_01.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_01.hpp @@ -176,6 +176,7 @@ namespace Test { *outStream << " -- Testing " << EPointTypeToString(pts[idx]) << " -- \n"; for (auto ip=1;ip tol) { + if (std::isnan(val) || std::abs(val-exactVal) > pTol) { errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; *outStream << " Basis function at i= " << i << ", j=" << j << ": " diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp index d3db0bce7600..4f6c6c3a33e0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp @@ -37,7 +37,7 @@ namespace Intrepid2 { int errorFlag = 0; try { - for (int order=1;order basis(order); // problem setup diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_01.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_01.hpp index 62aa9d6e4322..79f93509d28c 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_01.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_01.hpp @@ -103,12 +103,12 @@ namespace Intrepid2 { // test for Kronecker property for (int i=0;i tol ) { + if ( i==j && std::abs( h_basisAtLattice(i,j) - 1.0 ) > tol * 10 ) { // relax tolerance now that we support up to order 20 errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; *outStream << " Basis function " << i << " does not have unit value at its node (" << h_basisAtLattice(i,j) <<")\n"; } - if ( i!=j && std::abs( h_basisAtLattice(i,j) ) > tol ) { + if ( i!=j && std::abs( h_basisAtLattice(i,j) ) > tol * 10 ) { // relax tolerance now that we support up to order 20 errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; *outStream << " Basis function " << i << " does not vanish at node " << j << "\n"; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_01.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_01.hpp index fa3cf91b60a4..15af05533501 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_01.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_01.hpp @@ -109,12 +109,12 @@ namespace Intrepid2 { // test for Kronecker property for (int i=0;i tol ) { + if ( i==j && std::abs( h_basisAtLattice(i,j) - 1.0 ) > tol * 10 ) { // relax tolerance now that we support orders up to 20 errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; *outStream << " Basis function " << i << " does not have unit value at its node (" << h_basisAtLattice(i,j) <<")\n"; } - if ( i!=j && std::abs( h_basisAtLattice(i,j) ) > tol ) { + if ( i!=j && std::abs( h_basisAtLattice(i,j) ) > tol * 10 ) { // relax tolerance now that we support orders up to 20 errorFlag++; *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; *outStream << " Basis function " << i << " does not vanish at node " << j << "\n"; diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_01.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_01.hpp index d0078f0eeb30..c13d97388e42 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_01.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_01.hpp @@ -260,9 +260,11 @@ namespace Intrepid2 { DynRankView ConstructWithLabel(cubPoints, Parameters::MaxIntegrationPoints, Parameters::MaxDimension); DynRankView ConstructWithLabel(cubWeights, Parameters::MaxIntegrationPoints); + int maxTotalCubatureDegree = Parameters::MaxCubatureDegreeEdge; // sum in all dimensions + *outStream << "-> Line testing\n\n"; { - for (ordinal_type deg=0;deg<=Parameters::MaxCubatureDegreeEdge;++deg) { + for (ordinal_type deg=0;deg<=maxTotalCubatureDegree;++deg) { CubatureLineType cub(deg); cub.getCubature(cubPoints, cubWeights); const auto npts = cub.getNumPoints(); @@ -316,8 +318,8 @@ namespace Intrepid2 { *outStream << "-> Quad testing\n\n"; { - for (ordinal_type y_deg=0;y_deg<=Parameters::MaxCubatureDegreeEdge;++y_deg) - for (ordinal_type x_deg=0;x_deg<=Parameters::MaxCubatureDegreeEdge;++x_deg) { + for (ordinal_type y_deg=0;y_deg<=maxTotalCubatureDegree-1;++y_deg) + for (ordinal_type x_deg=0;x_deg<=maxTotalCubatureDegree-y_deg;++x_deg) { const auto x_line = CubatureLineType(x_deg); const auto y_line = CubatureLineType(y_deg); CubatureTensorType cub( x_line, y_line ); @@ -377,10 +379,10 @@ namespace Intrepid2 { *outStream << "-> Hexahedron testing\n\n"; { - // when hex is tested with max cubature degree edge, it exceeds max integration points 1001 - for (ordinal_type z_deg=0;z_deg tol) { + *outStream << std::setw(30) << "Hexahedron volume computed with tensor-product cubature of degree (" << x_deg << ", " << y_deg << ", " << z_deg << ") --> " << std::setw(10) << std::scientific << testVol << + std::setw(10) << "diff = " << std::setw(10) << std::scientific << std::abs(testVol - refVol) << "\n"; + + ++errorFlag; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + } + } + } *outStream << "-> Prism testing\n\n"; { @@ -423,7 +451,8 @@ namespace Intrepid2 { } *outStream << "-> Prism symmetric quadrature testing\n\n"; - for (auto z_deg=0;z_deg tol) { + *outStream << std::setw(30) << "Wedge volume computed with symmetric tensor-product cubature of degree (" << xy_deg << ", " << z_deg << ") --> " << std::setw(10) << std::scientific << testVol << + std::setw(10) << "diff = " << std::setw(10) << std::scientific << std::abs(testVol - refVol) << "\n"; + ++errorFlag; + *outStream << std::setw(70) << "^^^^----FAILURE!" << "\n"; + } + } } // *outStream << "-> Pyramid testing: over-integration by 2 (due to duffy transformation) \n\n"; diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_02.hpp index af16b25f90e4..4c000c9abc0b 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_02.hpp @@ -84,7 +84,6 @@ namespace Intrepid2 { << "| TEST 1: integrals of monomials in 1D |\n" << "===============================================================================\n"; - typedef Kokkos::DynRankView DynRankView; typedef Kokkos::DynRankView DynRankViewHost; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) @@ -112,28 +111,26 @@ namespace Intrepid2 { const auto maxDeg = Parameters::MaxCubatureDegreeEdge; const auto polySize = maxDeg + 1; - // test inegral values + // test integral values DynRankViewHost ConstructWithLabel(testInt, maxDeg+1, polySize); // analytic integral values DynRankViewHost ConstructWithLabel(analyticInt, maxDeg+1, polySize); - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // compute integrals for (ordinal_type cubDeg=0;cubDeg<=maxDeg;++cubDeg) { CubatureLineType lineCub(cubDeg); + auto cubPoints = lineCub.allocateCubaturePoints(); + auto cubWeights = lineCub.allocateCubatureWeights(); + lineCub.getCubature(cubPoints, cubWeights); + Kokkos::Array degrees; for (ordinal_type polyDeg=0;polyDeg<=cubDeg;++polyDeg) - testInt(cubDeg, polyDeg) = computeIntegralOfMonomial(lineCub, - cubPoints, + { + degrees[0] = polyDeg; + testInt(cubDeg, polyDeg) = computeIntegralOfMonomial(cubPoints, cubWeights, - polyDeg); + degrees); + } } // get analytic values diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_03.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_03.hpp index 2c88fd62606f..209537894d79 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_03.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_03.hpp @@ -86,7 +86,6 @@ namespace Intrepid2 { << "| TEST 1: integrals of monomials in 2D |\n" << "===============================================================================\n"; - typedef Kokkos::DynRankView DynRankView; typedef Kokkos::DynRankView DynRankViewHost; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) @@ -113,7 +112,7 @@ namespace Intrepid2 { const auto maxDeg = Parameters::MaxCubatureDegreeEdge; const auto polySize = (maxDeg+1)*(maxDeg+2)/2; - // test inegral values + // test integral values DynRankViewHost ConstructWithLabel(testInt, maxDeg+1, polySize); // analytic integral values @@ -121,27 +120,28 @@ namespace Intrepid2 { const auto analyticPolySize = (analyticMaxDeg+1)*(analyticMaxDeg+2)/2; DynRankViewHost ConstructWithLabel(analyticInt, analyticPolySize, 1); - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // compute integrals for (auto cubDeg=0;cubDeg<=maxDeg;++cubDeg) { CubatureLineType line(cubDeg); CubatureTensorType quadCub( line, line ); + Kokkos::Array degrees; + + auto cubPoints = quadCub.allocateCubaturePoints(); + auto cubWeights = quadCub.allocateCubatureWeights(); + quadCub.getCubature(cubPoints, cubWeights); ordinal_type cnt = 0; - for (auto xDeg=0;xDeg<=cubDeg;++xDeg) - for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,++cnt) - testInt(cubDeg, cnt) = computeIntegralOfMonomial(quadCub, - cubPoints, + for (auto xDeg=0;xDeg<=cubDeg;++xDeg) + { + degrees[0] = xDeg; + for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,++cnt) + { + degrees[1] = yDeg; + testInt(cubDeg, cnt) = computeIntegralOfMonomial(cubPoints, cubWeights, - xDeg, - yDeg); + degrees); + } + } } // get analytic values diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_04.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_04.hpp index f691f3d5f685..3cbdd774238f 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_04.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_04.hpp @@ -85,7 +85,6 @@ namespace Intrepid2 { << "| TEST 1: integrals of monomials in 3D |\n" << "===============================================================================\n"; - typedef Kokkos::DynRankView DynRankView; typedef Kokkos::DynRankView DynRankViewHost; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) @@ -114,7 +113,7 @@ namespace Intrepid2 { const auto maxDeg = 10; // 19; //Parameters::MaxCubatureDegreeEdge; const auto polySize = (maxDeg+1)*(maxDeg+2)*(maxDeg+3)/6; - // test inegral values + // test integral values DynRankViewHost ConstructWithLabel(testInt, maxDeg+1, polySize); // analytic integral values @@ -123,31 +122,33 @@ namespace Intrepid2 { DynRankViewHost ConstructWithLabel(analyticInt, analyticPolySize, 1); - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // compute integrals for (auto cubDeg=0;cubDeg<=maxDeg;++cubDeg) { CubatureLineType line(cubDeg); CubatureTensorType hexCub( line, line, line ); *outStream << "Cubature order " << std::setw(2) << std::left << cubDeg << " Testing\n"; + Kokkos::Array degrees; + + auto cubPoints = hexCub.allocateCubaturePoints(); + auto cubWeights = hexCub.allocateCubatureWeights(); + hexCub.getCubature(cubPoints, cubWeights); + ordinal_type cnt = 0; - for (auto xDeg=0;xDeg<=cubDeg;++xDeg) - for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg) + for (auto xDeg=0;xDeg<=cubDeg;++xDeg) + { + degrees[0] = xDeg; + for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg) + { + degrees[1] = yDeg; for (auto zDeg=0;zDeg<=(cubDeg-xDeg-yDeg);++zDeg,++cnt) { - testInt(cubDeg, cnt) = computeIntegralOfMonomial(hexCub, - cubPoints, + degrees[2] = zDeg; + testInt(cubDeg, cnt) = computeIntegralOfMonomial(cubPoints, cubWeights, - xDeg, - yDeg, - zDeg); + degrees); } + } + } } diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_05.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_05.hpp index 466bc70be13a..fb472a015a8e 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_05.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_05.hpp @@ -86,7 +86,6 @@ namespace Intrepid2 { << "| TEST 1: integrals of monomials in 2D |\n" << "===============================================================================\n"; - typedef Kokkos::DynRankView DynRankView; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) typedef ValueType pointValueType; @@ -101,27 +100,25 @@ namespace Intrepid2 { // compute and compare integrals try { const auto maxDeg = Parameters::MaxCubatureDegreeTri; - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); // compute integrals for (auto cubDeg=0;cubDeg<=20;++cubDeg) { CubatureTriType triCub(cubDeg); + Kokkos::Array degrees; + + auto cubPoints = triCub.allocateCubaturePoints(); + auto cubWeights = triCub.allocateCubatureWeights(); + triCub.getCubature(cubPoints, cubWeights); *outStream << "Default Cubature of rder " << std::setw(2) << std::left << cubDeg << " Testing\n"; ordinal_type cnt = 0; for (auto xDeg=0;xDeg<=cubDeg;++xDeg) { + degrees[0] = xDeg; for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,++cnt) { - auto computedIntegral = computeIntegralOfMonomial(triCub, - cubPoints, - cubWeights, - xDeg, - yDeg); + degrees[1] = yDeg; + auto computedIntegral = computeIntegralOfMonomial(cubPoints, + cubWeights, + degrees); auto anlyticIntegral = analyticIntegralOfMonomialOverTri(xDeg,yDeg); const auto abstol = std::fabs(tol*anlyticIntegral ); @@ -143,33 +140,37 @@ namespace Intrepid2 { *outStream << "Symmetric Cubature of order " << std::setw(2) << std::left << cubDeg << " Testing\n"; CubatureTriSymType triCub(cubDeg); + auto cubPoints = triCub.allocateCubaturePoints(); + auto cubWeights = triCub.allocateCubatureWeights(); triCub.getCubature(cubPoints, cubWeights); - bool isInvariant = IsQuadratureInvariantToOrientation(triCub, cubPoints, cubWeights, shards::Triangle<3>::key); + bool isInvariant = IsQuadratureInvariantToOrientation(triCub, cubPoints, cubWeights, shards::Triangle<3>::key); if (!isInvariant) { errorFlag++; *outStream << " Cubature Rule is not invariant to rotations!\n" << std::right << std::setw(111) << "^^^^---FAILURE!\n"; } - auto cubWeights_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubWeights); - ValueType minWeigth = 1.0; + using HostDevice = Kokkos::HostSpace::device_type; + TensorData cubWeightsHost(cubWeights); // this constructor does any necessary allocation and copying to host + ValueType minWeight = 1.0; for(int i=0; i degrees; for (auto xDeg=0;xDeg<=cubDeg;++xDeg) { + degrees[0] = xDeg; for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,++cnt) { - auto computedIntegral = computeIntegralOfMonomial(triCub, - cubPoints, - cubWeights, - xDeg, - yDeg); + degrees[1] = yDeg; + auto computedIntegral = computeIntegralOfMonomial(cubPoints, + cubWeights, + degrees); auto anlyticIntegral = analyticIntegralOfMonomialOverTri(xDeg,yDeg); const auto abstol = std::fabs(tol*anlyticIntegral ); const auto absdiff = std::fabs(anlyticIntegral - computedIntegral); diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_06.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_06.hpp index b2e4a1943883..96d3f02e2285 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_06.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_06.hpp @@ -85,7 +85,6 @@ namespace Intrepid2 { << "| TEST 1: integrals of monomials in 3D |\n" << "===============================================================================\n"; - typedef Kokkos::DynRankView DynRankView; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) typedef ValueType pointValueType; @@ -108,43 +107,47 @@ namespace Intrepid2 { // analytic integral values const auto analyticMaxDeg = 20; - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // perform comparison for (auto cubDeg=0;cubDeg<=maxDeg;++cubDeg) { *outStream << "Testing Default Cubature of Order " << std::setw(2) << std::left << cubDeg << "\n"; CubatureTetType tetCub(cubDeg); + auto cubPoints = tetCub.allocateCubaturePoints(); + auto cubWeights = tetCub.allocateCubatureWeights(); + tetCub.getCubature(cubPoints, cubWeights); const auto y_offs = (analyticMaxDeg - cubDeg); const auto x_offs = y_offs*(y_offs + 1)/2; ordinal_type offset = 0; const auto oldFlag = errorFlag; - for (auto xDeg=0;xDeg<=cubDeg;++xDeg,offset += x_offs) - for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,offset += y_offs) - for (auto zDeg=0;zDeg<=(cubDeg-xDeg-yDeg);++zDeg) { + Kokkos::Array degrees; + for (auto xDeg=0;xDeg<=cubDeg;++xDeg,offset += x_offs) + { + degrees[0] = xDeg; + for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,offset += y_offs) + { + degrees[1] = yDeg; + for (auto zDeg=0;zDeg<=(cubDeg-xDeg-yDeg);++zDeg) + { + degrees[2] = zDeg; const auto analyticIntegral = analyticIntegralOfMonomialOverTet(xDeg,yDeg,zDeg); const auto abstol = std::fabs(tol*analyticIntegral ); - const auto computedIntegral = computeIntegralOfMonomial(tetCub,cubPoints,cubWeights,xDeg,yDeg,zDeg); + const auto computedIntegral = computeIntegralOfMonomial(cubPoints,cubWeights,degrees); const auto absdiff = std::fabs(analyticIntegral - computedIntegral); if (absdiff > abstol) { *outStream << "Default Cubature of Order " << std::setw(2) << std::left << cubDeg << " Integrating " - << "x^" << std::setw(2) << std::left << xDeg << " * y^" << std::setw(2) << yDeg - << " * z^" << std::setw(2) << zDeg << ":" << " " - << std::scientific << std::setprecision(16) - << computedIntegral << " " << analyticIntegral << " " - << std::setprecision(4) << absdiff << " " << " cubWeightsHost(cubWeights); // this constructor does any necessary allocation and copying to host + ValueType minWeight = 1.0; for(int i=0; i degrees; + for (auto xDeg=0;xDeg<=cubDeg;++xDeg,offset += x_offs) + { + degrees[0] = xDeg; + for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,offset += y_offs) + { + degrees[1] = yDeg; + for (auto zDeg=0;zDeg<=(cubDeg-xDeg-yDeg);++zDeg) + { + degrees[2] = zDeg; const auto analyticIntegral = analyticIntegralOfMonomialOverTet(xDeg,yDeg,zDeg); - const auto computedIntegral = computeIntegralOfMonomial(tetCub,cubPoints,cubWeights,xDeg,yDeg,zDeg); + const auto computedIntegral = computeIntegralOfMonomial(cubPoints,cubWeights,degrees); const auto abstol = std::fabs(tol*analyticIntegral ); const auto absdiff = std::fabs(analyticIntegral - computedIntegral); if (absdiff > abstol) { *outStream << "Symmetric Cubature order " << std::setw(2) << std::left << cubDeg << " integrating " - << "x^" << std::setw(2) << std::left << xDeg << " * y^" << std::setw(2) << yDeg - << " * z^" << std::setw(2) << zDeg << ":" << " " - << std::scientific << std::setprecision(16) - << computedIntegral << " " << analyticIntegral << " " - << std::setprecision(4) << absdiff << " " << " DynRankView; #define ConstructWithLabel(obj, ...) obj(#obj, __VA_ARGS__) typedef ValueType pointValueType; @@ -109,14 +108,6 @@ namespace Intrepid2 { // analytic integral values const auto analyticMaxDeg = 11; - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // perform comparison for (auto cubDeg=0;cubDeg<=maxDeg;++cubDeg) { @@ -125,13 +116,17 @@ namespace Intrepid2 { CubatureLineType xy_line(cubDeg); CubatureLineJacobiType z_line(cubDeg); CubatureTensorPyrType pyrCub( xy_line, xy_line, z_line ); + auto cubPoints = pyrCub.allocateCubaturePoints(); + auto cubWeights = pyrCub.allocateCubatureWeights(); pyrCub.getCubature(cubPoints, cubWeights); - auto cubWeights_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubWeights); - ValueType minWeigth = 1.0; + using HostDevice = Kokkos::HostSpace::device_type; + TensorData cubWeightsHost(cubWeights); // this constructor does any necessary allocation and copying to host + + ValueType minWeight = 1.0; for(int i=0; i degrees; + for (auto xDeg=0;xDeg<=cubDeg;++xDeg,offset += x_offs) + { + degrees[0] = xDeg; + for (auto yDeg=0;yDeg<=(cubDeg-xDeg);++yDeg,offset += y_offs) + { + degrees[1] = yDeg; + for (auto zDeg=0;zDeg<=(cubDeg-xDeg-yDeg);++zDeg) { + degrees[2] = zDeg; const auto analyticIntegral = analyticIntegralOfMonomialOverPyr(xDeg,yDeg,zDeg); - const auto computedIntegral = computeIntegralOfMonomial(pyrCub,cubPoints,cubWeights,xDeg,yDeg,zDeg); - const auto abstol = ( analyticIntegral == 0.0 ? tol : std::fabs(tol*analyticIntegral) ); + const auto computedIntegral = computeIntegralOfMonomial(cubPoints,cubWeights,degrees); + const auto abstol = ( analyticIntegral == 0.0 ? tol : std::fabs(tol*analyticIntegral) ); const auto absdiff = std::fabs(analyticIntegral - computedIntegral); if (absdiff > abstol) { *outStream << "Cubature order " << std::setw(2) << std::left << cubDeg << " integrating " - << "x^" << std::setw(2) << std::left << xDeg << " * y^" << std::setw(2) << yDeg - << " * z^" << std::setw(2) << zDeg << ":" << " " - << std::scientific << std::setprecision(16) - << computedIntegral << " " << analyticIntegral << " " - << std::setprecision(4) << absdiff << " " << " CubatureLineType; - const auto tol = 10.0 * tolerence(); + const auto tol = 100.0 * tolerence(); // relaxing this as we support higher-order cubature int errorFlag = 0; @@ -119,14 +119,6 @@ namespace Intrepid2 { ">>> ERROR (Integration::Test02): Cannot open analytic solution file" ); } - // storage for cubatrue points and weights - DynRankView ConstructWithLabel(cubPoints, - Parameters::MaxIntegrationPoints, - Parameters::MaxDimension); - - DynRankView ConstructWithLabel(cubWeights, - Parameters::MaxIntegrationPoints); - // compute integrals EPolyType polyType[4] = { POLYTYPE_GAUSS, POLYTYPE_GAUSS_RADAU_LEFT, @@ -138,11 +130,17 @@ namespace Intrepid2 { for (ordinal_type cubDeg=0;cubDeg<=maxDeg;++cubDeg) { CubatureLineType lineCub(cubDeg, ptype); - for (auto polyDeg=0;polyDeg<=cubDeg;++polyDeg) - testInt(cubDeg, polyDeg) = computeIntegralOfMonomial(lineCub, - cubPoints, + auto cubPoints = lineCub.allocateCubaturePoints(); + auto cubWeights = lineCub.allocateCubatureWeights(); + lineCub.getCubature(cubPoints,cubWeights); + Kokkos::Array degrees; + for (auto polyDeg=0;polyDeg<=cubDeg;++polyDeg) + { + degrees[0] = polyDeg; + testInt(cubDeg, polyDeg) = computeIntegralOfMonomial(cubPoints, cubWeights, - polyDeg); + degrees); + } } // perform comparison diff --git a/packages/intrepid2/unit-test/Discretization/Integration/test_util.hpp b/packages/intrepid2/unit-test/Discretization/Integration/test_util.hpp index 91574c4d3986..0b4ac29b576e 100644 --- a/packages/intrepid2/unit-test/Discretization/Integration/test_util.hpp +++ b/packages/intrepid2/unit-test/Discretization/Integration/test_util.hpp @@ -32,75 +32,50 @@ namespace Intrepid2 { typename cubWeightViewType::value_type computeRefVolume(const ordinal_type numPoints, const cubWeightViewType cubWeights) { - typename cubWeightViewType::value_type r_val = 0.0; + typename cubWeightViewType::value_type r_val; + Kokkos::parallel_reduce("computeRefVolume", + Kokkos::RangePolicy(0, numPoints), + KOKKOS_LAMBDA (const int& i, double& lsum) { + lsum += cubWeights(i); + }, r_val); Kokkos::fence(); - auto cubWeights_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubWeights); - for (auto i=0;i - ValueType computeMonomial(PointViewType p, - const ordinal_type xDeg, - const ordinal_type yDeg = 0, - const ordinal_type zDeg = 0) { - ValueType r_val = 1.0; - const ordinal_type polydeg[3] = { xDeg, yDeg, zDeg }; - const auto dim = p.extent(0); - Kokkos::fence(); + template + ValueType computeIntegralOfMonomial(CubPointViewType cubPoints, + CubWeightViewType cubWeights, + Kokkos::Array degrees) { + ValueType result = 0.0; - Kokkos::DynRankView p_device("p_device", p.extent(0)); - Kokkos::deep_copy(p_device, p); - auto p_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), p_device); - for (size_type i=0;i - ValueType computeIntegralOfMonomial(cubatureType cub, - cubPointViewType cubPoints, - cubWeightViewType cubWeights, - const ordinal_type xDeg, - const ordinal_type yDeg = 0, - const ordinal_type zDeg = 0) { - ValueType r_val = 0.0; - - // get cubature - cub.getCubature(cubPoints, cubWeights); - auto cubWeights_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubWeights); - Kokkos::fence(); - - const auto dim = cub.getDimension(); - const auto npts = cub.getNumPoints(); - typedef Kokkos::pair range_type; - - for (auto i=0;i(pt, xDeg, yDeg, zDeg)*cubWeights_host(i); - } - - return r_val; + using ExecutionSpace = typename CubWeightViewType::execution_space; + + Kokkos::parallel_reduce("computeIntegralOfMonomial", + Kokkos::RangePolicy(ExecutionSpace(), 0, numPoints), + KOKKOS_LAMBDA (const int& pointOrdinal, double& localSum ) + { + ValueType value = 1.0; + for (int d=0; d @@ -170,13 +145,14 @@ namespace Intrepid2 { x_i gets mapped into X_i by a change in orientation, then there is a pair (x_j, w_j) in Q such that (X_i, w_i) = (x_j, w_j). */ - template - bool IsQuadratureInvariantToOrientation(cubatureType cub, - cubPointViewType cubPoints, - cubWeightViewType cubWeights, - const unsigned cellTopoKey) { + template + bool IsQuadratureInvariantToOrientation(CubatureType cub, + CubPointViewType cubPoints, + CubWeightViewType cubWeights, + const unsigned cellTopoKey) { ordinal_type numOrts = -1; @@ -201,29 +177,34 @@ namespace Intrepid2 { bool r_val = true; const ordinal_type npts = cub.getNumPoints(); const ordinal_type dim = cub.getDimension(); - using DynRankView = Kokkos::DynRankView; - DynRankView cubPointsOriented("cubPointsOriented", npts,dim); - auto cubPoints_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubPoints); - auto cubWeights_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), cubWeights); - + using DynRankViewHost = Kokkos::DynRankView; + DynRankViewHost cubPointsOriented("cubPointsOriented", npts,dim); + + // these constructors do any necessary allocation and copying to host + using WeightValueType = typename CubWeightViewType::value_type; + using PointValueType = typename CubPointViewType::value_type; + using HostDevice = Kokkos::HostSpace::device_type; + TensorPoints cubPointsHost(cubPoints); + TensorData cubWeightsHost(cubWeights); + for (ordinal_type ort=0;ortprecision(5); - // this test upto order 30 - offset (1,2,3) - const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; + const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; // npUpper: 31 right now const ValueType tol = 1000.0 * tolerence(); + const double lowOrderTol = tol; + const double highOrderTol = tol * 100; try { Kokkos::View @@ -268,12 +269,13 @@ namespace Intrepid2 { ValueType beta = -0.5; while (beta <= 5.0) { for (auto np = npLower; np <= npUpper; ++np){ + const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); for (auto n = 2; n < 2*np-off; ++n){ Polylib::Serial::JacobiPolynomial(np, z, p, null, n, alpha, beta); const ValueType sum = ddot(np, w, p); - if (std::isnan(sum) || std::abs(sum) > tol) { + if (std::isnan(sum) || std::abs(sum) > localTol) { errorFlag = -1000; *outStream << "ERROR: alpha = " << alpha << ", beta = " << beta << ", np = " << np << ", n = " << n << " integral was " << sum << "\n"; @@ -296,6 +298,7 @@ namespace Intrepid2 { while (beta <= 5.0) { for (auto np = npLower; np <= npUpper; ++np) { Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); + const double localTol = (np > 20) ? highOrderTol : lowOrderTol; for (auto n = 2; n < np-1; ++n) { Polylib::Serial::getDerivative(d, z, np, alpha, beta, poly); @@ -307,7 +310,7 @@ namespace Intrepid2 { for (auto i = 0; i < np; ++i) sum += std::abs(ddot(np, Kokkos::subview(d, i, Kokkos::ALL()), p) - n*std::pow(z(i),n-1)); sum /= (ValueType)np; - if (std::abs(sum)>tol) { + if (std::abs(sum)>localTol) { errorFlag = -1000; *outStream << "ERROR: alpha = " << alpha << ", beta = " << beta << ", np = " << np << ", n = " << n << " difference " << sum << "\n"; @@ -330,6 +333,7 @@ namespace Intrepid2 { while (beta <= 5.0) { for (auto np = npLower; np <= npUpper; ++np) { + const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); for (auto n = 2; n < np-1; ++n) { @@ -343,7 +347,7 @@ namespace Intrepid2 { for (auto i = 0; i < np; ++i) sum += std::abs(ddot(np, Kokkos::subview(d, i, Kokkos::ALL()), p) - std::pow(w(i),n)); sum /= (ValueType)np; - if (std::abs(sum)>tol) { + if (std::abs(sum)>localTol) { errorFlag = -1000; *outStream << "ERROR: alpha = " << alpha << ", beta = " << beta << ", np = " << np << ", n = " << n << " difference " << sum << "\n"; diff --git a/packages/muelu/test/unit_tests/IntrepidPCoarsenFactory.cpp b/packages/muelu/test/unit_tests/IntrepidPCoarsenFactory.cpp index 44bd442d6d27..6a1082a19a06 100644 --- a/packages/muelu/test/unit_tests/IntrepidPCoarsenFactory.cpp +++ b/packages/muelu/test/unit_tests/IntrepidPCoarsenFactory.cpp @@ -46,9 +46,9 @@ namespace MueLuTests { /**** some helper methods and classes by Nate ****/ #ifndef TEST_MORE_COMBINATIONS -static const int MAX_LINE_DEGREE = Intrepid2::Parameters::MaxOrder; -static const int MAX_QUAD_DEGREE = Intrepid2::Parameters::MaxOrder; -static const int MAX_HEX_DEGREE = (Intrepid2::Parameters::MaxOrder < 4) ? Intrepid2::Parameters::MaxOrder : 4; +static const int MAX_LINE_DEGREE = (Intrepid2::Parameters::MaxOrder < 10) ? Intrepid2::Parameters::MaxOrder : 10; +static const int MAX_QUAD_DEGREE = (Intrepid2::Parameters::MaxOrder < 10) ? Intrepid2::Parameters::MaxOrder : 10; +static const int MAX_HEX_DEGREE = (Intrepid2::Parameters::MaxOrder < 4) ? Intrepid2::Parameters::MaxOrder : 4; static const int MAX_RANK_COUNT = 4; #else static const int MAX_LINE_DEGREE = Intrepid2::Parameters::MaxOrder; From 156678de7f54b1059abdfa74fbf5d2cd23ecf36d Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Thu, 10 Oct 2024 17:49:53 -0600 Subject: [PATCH 38/38] Github Actions: Fixing AT2 Permissions (#13515) Trying to make the OpenSSF scorecard happy Signed-off-by: Chris Siefert --- .github/workflows/AT2.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index cbb0b653bc4e..99c5dc2654fc 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -13,15 +13,17 @@ on: - develop workflow_dispatch: -# actions: write needed by skip-duplicate-actions -permissions: - actions: write +# actions: write needed by skip-duplicate-actions (handled below as per OpenSSF scorecard) +permissions: contents: read jobs: # Jobs depend on the output of pre-checks to run pre-checks: runs-on: ubuntu-latest + # actions: write needed by skip-duplicate-actions + permissions: + actions: write outputs: should_skip: ${{ steps.skip_check.outputs.should_skip }} steps: