Skip to content

Add orientation-dependent caching checks and option to disable caching #4119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions doc/html/examples/vector_fe_ex3.html
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,69 @@
***************************************************************
***************************************************************
* Running Example vector_fe_ex3:
* ./example-opt element_type=QUAD9 grid_size=5 refine=2 -pc_type lu
***************************************************************

Mesh Information:
elem_dimensions()={2}
elem_default_orders()={SECOND}
supported_nodal_order()=2
spatial_dimension()=2
n_nodes()=1681
n_local_nodes()=1681
n_elem()=525
n_local_elem()=525
n_active_elem()=400
n_subdomains()=1
n_elemsets()=0
n_partitions()=1
n_processors()=1
n_threads()=1
processor_id()=0
is_prepared()=true
is_replicated()=true

EquationSystems
n_systems()=1
System #0, "CurlCurl"
Type "Implicit"
Variables="u"
Finite Element Types="NEDELEC_ONE"
Approximation Orders="FIRST"
n_dofs()=840
n_local_dofs()=840
max(n_local_dofs())=840
n_constrained_dofs()=0
n_local_constrained_dofs()=0
max(local unconstrained dofs)=840
n_vectors()=1
n_matrices()=1
DofMap Sparsity
Average On-Processor Bandwidth <= 6.71429
Average Off-Processor Bandwidth <= 0
Maximum On-Processor Bandwidth <= 7
Maximum Off-Processor Bandwidth <= 0
DofMap Constraints
Number of DoF Constraints = 0

Assembling the System
Nonlinear Residual: 28.9779
Linear solve starting, tolerance 1e-12
Linear solve finished, step 1, residual 4.3648e-14
Trying full Newton step
Current Residual: 6.73932e-14
Nonlinear solver converged, step 0, residual reduction 2.32568e-15 < 1e-06
Nonlinear solver relative step size 1 > 1e-06
L2-Error is: 0.12862
HCurl semi-norm error is: 0.802879
HCurl-Error is: 0.813116

***************************************************************
* Done Running Example vector_fe_ex3:
* ./example-opt element_type=QUAD9 grid_size=5 refine=2 -pc_type lu
***************************************************************
***************************************************************
* Running Example vector_fe_ex3:
* ./example-opt order=2 element_type=TRI6 -pc_type lu
***************************************************************

Expand Down
3 changes: 3 additions & 0 deletions examples/vector_fe/vector_fe_ex3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ run_example_no_extra_options "$example_name" "$options"
options="element_type=QUAD9 -pc_type lu"
run_example_no_extra_options "$example_name" "$options"

options="element_type=QUAD9 grid_size=5 refine=2 -pc_type lu"
run_example_no_extra_options "$example_name" "$options"

options="order=2 element_type=TRI6 -pc_type lu"
run_example_no_extra_options "$example_name" "$options"

Expand Down
5 changes: 5 additions & 0 deletions examples/vector_fe/vector_fe_ex3/vector_fe_ex3.C
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "libmesh/mesh.h"
#include "libmesh/mesh_generation.h"
#include "libmesh/mesh_modification.h"
#include "libmesh/mesh_refinement.h"
#include "libmesh/exact_solution.h"
#include "libmesh/string_to_enum.h"
#include "libmesh/enum_solver_package.h"
Expand Down Expand Up @@ -93,6 +94,10 @@ int main (int argc, char ** argv)
// Make sure the code is robust against nodal reorderings.
MeshTools::Modification::permute_elements(mesh);

// Make sure the code is robust against mesh refinements.
MeshRefinement mesh_refinement(mesh);
mesh_refinement.uniformly_refine(infile("refine", 0));

// Make sure the code is robust against solves on 2d meshes rotated out of
// the xy plane. By default, all Euler angles are zero, the rotation matrix
// is the identity, and the mesh stays in place.
Expand Down
17 changes: 15 additions & 2 deletions include/fe/fe.h
Original file line number Diff line number Diff line change
Expand Up @@ -790,10 +790,23 @@ class FE : public FEGenericBase<typename FEOutputType<T>::type>
const unsigned vdim = 1);

/**
* An array of the node locations on the last
* element we computed on
* Vectors holding the node locations, edge and
* face orientations of the last element we cached.
*/
std::vector<Point> cached_nodes;
std::vector<bool> cached_edges, cached_faces;

/**
* Repopulate the element cache with the node locations,
* edge and face orientations of the element \p elem.
*/
void cache(const Elem * elem);

/**
* Check if the node locations, edge and face orientations
* held in the element cache match those of element \p elem.
*/
bool matches_cache(const Elem * elem);

/**
* The last side and last edge we did a reinit on
Expand Down
5 changes: 5 additions & 0 deletions include/fe/fe_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,11 @@ class FEInterface
*/
static FEFieldType field_type (const FEFamily & fe_family);

/**
* \returns Whether the element's shape functions are orientation-dependent.
*/
static bool orientation_dependent (const FEFamily & fe_family);

/**
* \returns The number of components of a vector-valued element.
* Scalar-valued elements return 1.
Expand Down
15 changes: 15 additions & 0 deletions include/geom/elem.h
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,12 @@ class Elem : public ReferenceCountedObject<Elem>,
*/
virtual unsigned int n_faces () const = 0;

/**
* \returns An integer range from 0 up to (but not including)
* the number of faces this element has.
*/
IntRange<unsigned short> face_index_range () const;

/**
* \returns The number of children the element that has been derived
* from this class may have.
Expand Down Expand Up @@ -2665,6 +2671,15 @@ Elem::edge_index_range() const



inline
IntRange<unsigned short>
Elem::face_index_range() const
{
return {0, cast_int<unsigned short>(this->n_faces())};
}



inline
IntRange<unsigned short>
Elem::side_index_range() const
Expand Down
98 changes: 70 additions & 28 deletions src/fe/fe.C
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "libmesh/tensor_value.h"
#include "libmesh/enum_elem_type.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/libmesh_singleton.h"

namespace {
// Put this outside a templated class, so we only get 1 warning
Expand All @@ -40,6 +41,19 @@ namespace {

namespace libMesh
{
// ------------------------------------------------------------
// Whether we cache the node locations, edge and face orientations of the last
// element we computed on as needed to avoid calling init_shape_functions and
// compute_shape_functions
static const bool * caching = nullptr;

class CachingSetup: public Singleton::Setup
{
private:
void setup() { caching = new bool(!on_command_line("--disable-caching")); }
public:
~CachingSetup() { delete caching; caching = nullptr; }
} caching_setup;


// ------------------------------------------------------------
Expand Down Expand Up @@ -135,6 +149,50 @@ void FE<Dim,T>::dofs_on_edge(const Elem * const elem,



template <unsigned int Dim, FEFamily T>
void FE<Dim,T>::cache(const Elem * elem)
{
cached_nodes.resize(elem->n_nodes());
for (auto n : elem->node_index_range())
cached_nodes[n] = elem->point(n);

if (FEInterface::orientation_dependent(T))
{
cached_edges.resize(elem->n_edges());
for (auto n : elem->edge_index_range())
cached_edges[n] = elem->positive_edge_orientation(n);

cached_faces.resize(elem->n_faces());
for (auto n : elem->face_index_range())
cached_faces[n] = elem->positive_face_orientation(n);
}
}



template <unsigned int Dim, FEFamily T>
bool FE<Dim,T>::matches_cache(const Elem * elem)
{
bool m = cached_nodes.size() == elem->n_nodes();
for (unsigned n = 1; m && n < elem->n_nodes(); n++)
m = (elem->point(n) - elem->point(0)).relative_fuzzy_equals(cached_nodes[n] - cached_nodes[0]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For correctness: shouldn't this be &= rather than =? Likewise in the for loops below?

And if that's right, then, for performance: should we bother tracking m to return it? If only 99% of cached geometry and orientation matches then we can't reuse cached data, so we might as well return false as soon as we see any mismatch or return true at the end otherwise.

And either way, if I'm right and we don't want to merge this without changes: how about matches_cache() for the name?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For correctness: shouldn't this be &= rather than =? Likewise in the for loops below?

We know m is true in the loop bodies, so the assignment is equivalent.

And if that's right, then, for performance: should we bother tracking m to return it? If only 99% of cached geometry and orientation matches then we can't reuse cached data, so we might as well return false as soon as we see any mismatch or return true at the end otherwise.

Yeah, I thought about that, but I really liked the symmetry with the code in the sister method. Besides, the loops short-circuit if m is false. For orientation-independent elements this means the code should be optimal (right?), for orientation-dependent elements, I can check m in the if-statement if you think it's needed.

And either way, if I'm right and we don't want to merge this without changes: how about matches_cache() for the name?

I can change the name anyway. :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, man, I missed the short-circuiting in the for loop parentheses. This is a really weird idiom but yeah, looks correct, and it's basically optimal - if you break out of the first for loop you're still requesting 2 unnecessary tests of m, but I'll bet the optimizer figures that out and gives the same code regardless, and either way you're not doing any unnecessary floating-point.

Damn it, I knew that if I thought I caught you in a correctness error then I must have overlooked something.


if (FEInterface::orientation_dependent(T))
{
m &= cached_edges.size() == elem->n_edges();
for (unsigned n = 0; m && n < elem->n_edges(); n++)
m = elem->positive_edge_orientation(n) == cached_edges[n];

m &= cached_faces.size() == elem->n_faces();
for (unsigned n = 0; m && n < elem->n_faces(); n++)
m = elem->positive_face_orientation(n) == cached_faces[n];
}

return m;
}



template <unsigned int Dim, FEFamily T>
void FE<Dim,T>::reinit(const Elem * elem,
const std::vector<Point> * const pts,
Expand All @@ -149,7 +207,7 @@ void FE<Dim,T>::reinit(const Elem * elem,

// Try to avoid calling init_shape_functions
// even when shapes_need_reinit
bool cached_nodes_still_fit = false;
bool cached_elem_still_fits = false;

// Most of the hard work happens when we have an actual element
if (elem)
Expand Down Expand Up @@ -202,48 +260,32 @@ void FE<Dim,T>::reinit(const Elem * elem,
this->_elem_type = elem->type();
this->_elem_p_level = elem->p_level();
this->_p_level = this->_add_p_level_in_reinit * elem->p_level();

// Initialize the shape functions
this->_fe_map->template init_reference_to_physical_map<Dim>
(this->qrule->get_points(), elem);
this->init_shape_functions (this->qrule->get_points(), elem);

if (this->shapes_need_reinit())
{
cached_nodes.resize(elem->n_nodes());
for (auto n : elem->node_index_range())
cached_nodes[n] = elem->point(n);
}
}
else
{
// libmesh_assert_greater (elem->n_nodes(), 1);
this->_elem = elem;

cached_nodes_still_fit = true;
if (cached_nodes.size() != elem->n_nodes())
cached_nodes_still_fit = false;
else
for (auto n : make_range(1u, elem->n_nodes()))
{
if (!(elem->point(n) - elem->point(0)).relative_fuzzy_equals
((cached_nodes[n] - cached_nodes[0]), 1e-13))
{
cached_nodes_still_fit = false;
break;
}
}

if (this->shapes_need_reinit() && !cached_nodes_still_fit)
// Check if cached element's nodes, edge and face orientations still fit
cached_elem_still_fits = this->matches_cache(elem);

// Initialize the shape functions if needed
if (this->shapes_need_reinit() && !cached_elem_still_fits)
{
this->_fe_map->template init_reference_to_physical_map<Dim>
(this->qrule->get_points(), elem);
this->init_shape_functions (this->qrule->get_points(), elem);
cached_nodes.resize(elem->n_nodes());
for (auto n : elem->node_index_range())
cached_nodes[n] = elem->point(n);
}
}

// Replace cached nodes, edge and face orientations if no longer fitting
if (this->shapes_need_reinit() && !cached_elem_still_fits && *caching)
this->cache(elem);

// The shape functions correspond to the qrule
this->shapes_on_quadrature = true;
}
Expand Down Expand Up @@ -299,7 +341,7 @@ void FE<Dim,T>::reinit(const Elem * elem,

// Compute the shape functions and the derivatives at all of the
// quadrature points.
if (!cached_nodes_still_fit)
if (!cached_elem_still_fits)
{
if (pts != nullptr)
this->compute_shape_functions (elem,*pts);
Expand Down
19 changes: 19 additions & 0 deletions src/fe/fe_interface.C
Original file line number Diff line number Diff line change
Expand Up @@ -2655,6 +2655,25 @@ FEFieldType FEInterface::field_type (const FEFamily & fe_family)
}
}

bool FEInterface::orientation_dependent (const FEFamily & fe_family)
{
switch (fe_family)
{
case HIERARCHIC:
case L2_HIERARCHIC:
case HIERARCHIC_VEC:
case L2_HIERARCHIC_VEC:
case BERNSTEIN:
case RATIONAL_BERNSTEIN:
case SZABAB:
case NEDELEC_ONE:
case RAVIART_THOMAS:
return true;
default:
return false;
}
}

unsigned int FEInterface::n_vec_dim (const MeshBase & mesh,
const FEType & fe_type)
{
Expand Down