diff --git a/libpympb/pympb.cpp b/libpympb/pympb.cpp index c6033cb99..5bee3941d 100644 --- a/libpympb/pympb.cpp +++ b/libpympb/pympb.cpp @@ -369,12 +369,9 @@ int mode_solver::mean_epsilon(symmetric_matrix *meps, symmetric_matrix *meps_inv bool o1_is_var = o1 && meep_geom::is_variable(o1->material); bool o2_is_var = o2 && meep_geom::is_variable(o2->material); - bool default_is_var_or_file = - meep_geom::is_variable(default_material) || meep_geom::is_file(default_material); + bool default_is_var = meep_geom::is_variable(default_material); - if (o1_is_var || o2_is_var || - (default_is_var_or_file && - (!o1 || !o2 || meep_geom::is_file(o1->material) || meep_geom::is_file(o2->material)))) { + if (o1_is_var || o2_is_var || (default_is_var && (!o1 || !o2))) { return 0; /* arbitrary material functions are non-analyzable */ } diff --git a/python/adjoint/filters.py b/python/adjoint/filters.py index 262f37937..e77b66855 100644 --- a/python/adjoint/filters.py +++ b/python/adjoint/filters.py @@ -6,6 +6,10 @@ from scipy import signal, special import meep as mp +from scipy import special +from scipy import signal +import skfmm +from scipy import interpolate def _proper_pad(x, n): @@ -874,3 +878,20 @@ def gray_indicator(x): density-based topology optimization. Archive of Applied Mechanics, 86(1-2), 189-218. """ return npa.mean(4 * x.flatten() * (1 - x.flatten())) * 100 + + +def make_sdf(data): + """ + Assume the input, data, is the desired output shape + (i.e. 1d, 2d, or 3d) and that it's values are between + 0 and 1. + """ + # create signed distance function + sd = skfmm.distance(data - 0.5, dx=1) + + # interpolate zero-levelset onto 0.5-levelset + x = [np.min(sd.flatten()), 0, np.max(sd.flatten())] + y = [0, 0.5, 1] + f = interpolate.interp1d(x, y, kind="linear") + + return f(sd) diff --git a/python/adjoint/optimization_problem.py b/python/adjoint/optimization_problem.py index 707eb8beb..a7899580e 100644 --- a/python/adjoint/optimization_problem.py +++ b/python/adjoint/optimization_problem.py @@ -226,7 +226,7 @@ def adjoint_run(self): # flip the m number if utils._check_if_cylindrical(self.sim): - self.sim.m = -self.sim.m + self.sim.change_m_number(-self.sim.m) # flip the k point if self.sim.k_point: @@ -261,11 +261,11 @@ def adjoint_run(self): # reset the m number if utils._check_if_cylindrical(self.sim): - self.sim.m = -self.sim.m + self.sim.change_m_number(-self.sim.m) # reset the k point if self.sim.k_point: - self.sim.k_point *= -1 + self.sim.change_k_point(-1 * self.sim.k_point) # update optimizer's state self.current_state = "ADJ" diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py index fb37d2b0b..9683348e1 100644 --- a/python/adjoint/utils.py +++ b/python/adjoint/utils.py @@ -20,7 +20,7 @@ _Z_AXIS = 1 # default finite difference step size when calculating Aᵤ -FD_DEFAULT = 1e-3 +FD_DEFAULT = 1e-6 class DesignRegion: diff --git a/python/meep.i b/python/meep.i index 66a4703cf..0cae141dd 100644 --- a/python/meep.i +++ b/python/meep.i @@ -1504,8 +1504,6 @@ void _get_gradient(PyObject *grad, double scalegrad, %ignore material_type_equal; %ignore is_variable; %ignore is_variable; -%ignore is_file; -%ignore is_file; %ignore is_medium; %ignore is_medium; %ignore is_metal; @@ -1986,7 +1984,7 @@ meep_geom::geom_epsilon* _set_materials(meep::structure * s, geps = existing_geps; } else { geps = meep_geom::make_geom_epsilon(s, &gobj_list, center, _ensure_periodicity, _default_material, - extra_materials); + extra_materials, use_anisotropic_averaging); } if (set_materials) { meep_geom::set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol, diff --git a/python/requirements.txt b/python/requirements.txt index 772aac158..c43d34869 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -7,3 +7,4 @@ numpy parameterized pytest scipy +scikit-fmm diff --git a/python/simulation.py b/python/simulation.py index a8afd9e3a..6c3e15d63 100644 --- a/python/simulation.py +++ b/python/simulation.py @@ -4279,6 +4279,23 @@ def change_k_point(self, k): py_v3_to_vec(self.dimensions, self.k_point, self.is_cylindrical) ) + def change_m_number(self, m): + """ + Change the `m` number (the angular ϕ dependence). + """ + self.m = m + + if self.fields: + needs_complex_fields = not (not self.m or self.m == 0) + + if needs_complex_fields and self.fields.is_real: + self.fields = None + self._is_initialized = False + self.init_sim() + else: + if self.m is not None: + self.fields.change_m_number(m) + def change_sources(self, new_sources): """ Change the list of sources in `Simulation.sources` to `new_sources`, and changes diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py index 284b5c4ed..c40a973cf 100644 --- a/python/tests/test_material_grid.py +++ b/python/tests/test_material_grid.py @@ -1,4 +1,13 @@ +""" +Checks that a material grid works with symmetries, and that the +new subpixel smoothing feature is accurate. + +TODO: + Create a 3D test that works well with the new smoothing +""" + import meep as mp +from meep import mpb try: import meep.adjoint as mpa @@ -8,7 +17,12 @@ import unittest import numpy as np -from scipy.ndimage import gaussian_filter +import unittest + +rad = 0.301943 +k_point = mp.Vector3(0.3892, 0.1597, 0) +Si = mp.Medium(index=3.5) +cell_size = mp.Vector3(1, 1, 0) def compute_transmittance(matgrid_symmetry=False): @@ -29,14 +43,17 @@ def compute_transmittance(matgrid_symmetry=False): rng = np.random.RandomState(2069588) w = rng.rand(Nx, Ny) - weights = w if matgrid_symmetry else 0.5 * (w + np.fliplr(w)) + # for subpixel smoothing, the underlying design grid must be smoothly varying + w = mpa.tanh_projection(rng.rand(Nx, Ny), 1e5, 0.5) + w = mpa.conic_filter(w, 0.25, matgrid_size.x, matgrid_size.y, matgrid_resolution) + weights = 0.5 * (w + np.fliplr(w)) if not matgrid_symmetry else w matgrid = mp.MaterialGrid( mp.Vector3(Nx, Ny), mp.air, mp.Medium(index=3.5), weights=weights, - do_averaging=False, + beta=np.inf, grid_type="U_MEAN", ) @@ -95,16 +112,19 @@ def compute_transmittance(matgrid_symmetry=False): ][0] tran = np.power(np.abs(mode_coeff), 2) - print(f'tran:, {"sym" if matgrid_symmetry else "nosym"}, {tran}') + print("tran:, {}, {}".format("sym" if matgrid_symmetry else "nosym", tran)) return tran -def compute_resonant_mode_2d(res, default_mat=False): - cell_size = mp.Vector3(1, 1, 0) - - rad = 0.301943 - +def compute_resonant_mode_2d( + res, + radius=rad, + default_mat=False, + cylinder=False, + cylinder_matgrid=True, + do_averaging=True, +): fcen = 0.3 df = 0.2 * fcen sources = [ @@ -115,8 +135,6 @@ def compute_resonant_mode_2d(res, default_mat=False): ) ] - k_point = mp.Vector3(0.3892, 0.1597, 0) - matgrid_size = mp.Vector3(1, 1, 0) matgrid_resolution = 1200 @@ -129,38 +147,46 @@ def compute_resonant_mode_2d(res, default_mat=False): x = np.linspace(-0.5 * matgrid_size.x, 0.5 * matgrid_size.x, Nx) y = np.linspace(-0.5 * matgrid_size.y, 0.5 * matgrid_size.y, Ny) xv, yv = np.meshgrid(x, y) - weights = np.sqrt(np.square(xv) + np.square(yv)) < rad - filtered_weights = gaussian_filter(weights, sigma=3.0, output=np.double) + weights = mpa.make_sdf(np.sqrt(np.square(xv) + np.square(yv)) < radius) + if cylinder: + weights = 0.0 * weights + 1.0 matgrid = mp.MaterialGrid( - mp.Vector3(Nx, Ny), - mp.air, - mp.Medium(index=3.5), - weights=filtered_weights, - do_averaging=True, - beta=1000, - eta=0.5, + mp.Vector3(Nx, Ny), mp.air, Si, weights=weights, beta=np.inf, eta=0.5 ) - geometry = [ - mp.Block( - center=mp.Vector3(), - size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0), - material=matgrid, - ) - ] + # Use a cylinder object, not a block object + if cylinder: + # within the cylinder, use a (uniform) material grid + if cylinder_matgrid: + geometry = [ + mp.Cylinder(center=mp.Vector3(), radius=radius, material=matgrid) + ] + # within the cylinder, just use a normal medium + else: + geometry = [mp.Cylinder(center=mp.Vector3(), radius=radius, material=Si)] + # use a block object + else: + geometry = [ + mp.Block( + center=mp.Vector3(), + size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0), + material=matgrid, + ) + ] sim = mp.Simulation( resolution=res, + eps_averaging=do_averaging, cell_size=cell_size, default_material=matgrid if default_mat else mp.Medium(), - geometry=[] if default_mat else geometry, + geometry=geometry if not default_mat else [], sources=sources, k_point=k_point, ) h = mp.Harminv(mp.Hz, mp.Vector3(0.3718, -0.2076), fcen, df) - sim.run(mp.after_sources(h), until_after_sources=200) + sim.run(mp.after_sources(h), until_after_sources=300) try: for m in h.modes: @@ -172,94 +198,81 @@ def compute_resonant_mode_2d(res, default_mat=False): return freq -def compute_resonant_mode_3d(use_matgrid=True): - resolution = 25 - - wvl = 1.27 - fcen = 1 / wvl - df = 0.02 * fcen - - nSi = 3.45 - Si = mp.Medium(index=nSi) - nSiO2 = 1.45 - SiO2 = mp.Medium(index=nSiO2) - - s = 1.0 - cell_size = mp.Vector3(s, s, s) - - rad = 0.34 # radius of sphere - - if use_matgrid: - matgrid_resolution = 2 * resolution - N = int(s * matgrid_resolution) - coord = np.linspace(-0.5 * s, 0.5 * s, N) - xv, yv, zv = np.meshgrid(coord, coord, coord) - - weights = np.sqrt(np.square(xv) + np.square(yv) + np.square(zv)) < rad - filtered_weights = gaussian_filter( - weights, sigma=4 / resolution, output=np.double - ) - - matgrid = mp.MaterialGrid( - mp.Vector3(N, N, N), - SiO2, - Si, - weights=filtered_weights, - do_averaging=True, - beta=1000, - eta=0.5, - ) - - geometry = [mp.Block(center=mp.Vector3(), size=cell_size, material=matgrid)] - else: - geometry = [mp.Sphere(center=mp.Vector3(), radius=rad, material=Si)] - - sources = [ - mp.Source( - src=mp.GaussianSource(fcen, fwidth=df), - size=mp.Vector3(), - center=mp.Vector3(0.13, 0.25, 0.06), - component=mp.Ez, - ) - ] - - k_point = mp.Vector3(0.23, -0.17, 0.35) +def compute_resonant_mode_mpb(resolution=512): + geometry = [mp.Cylinder(rad, material=Si)] + geometry_lattice = mp.Lattice(cell_size) - sim = mp.Simulation( - resolution=resolution, - cell_size=cell_size, - sources=sources, - default_material=SiO2, - k_point=k_point, + ms = mpb.ModeSolver( + num_bands=1, + k_points=[mp.cartesian_to_reciprocal(k_point, geometry_lattice)], geometry=geometry, + geometry_lattice=geometry_lattice, + resolution=resolution, ) - h = mp.Harminv(mp.Ez, mp.Vector3(-0.2684, 0.1185, 0.0187), fcen, df) - - sim.run(mp.after_sources(h), until_after_sources=200) - - try: - for m in h.modes: - print(f"harminv:, {resolution}, {m.freq}, {m.Q}") - freq = h.modes[0].freq - except: - raise RuntimeError("No resonant modes found.") - - return freq + ms.run_te() + return ms.freqs[0] class TestMaterialGrid(unittest.TestCase): def test_subpixel_smoothing(self): - # "exact" frequency computed using MaterialGrid at resolution = 300 - freq_ref = 0.29826813873225283 - - res = [25, 50] + res = 25 + + def subpixel_test_matrix(radius, do_averaging): + freq_cylinder = compute_resonant_mode_2d( + res, + radius, + default_mat=False, + cylinder=True, + cylinder_matgrid=False, + do_averaging=do_averaging, + ) + freq_matgrid = compute_resonant_mode_2d( + res, + radius, + default_mat=False, + cylinder=False, + do_averaging=do_averaging, + ) + freq_matgrid_cylinder = compute_resonant_mode_2d( + res, + radius, + default_mat=False, + cylinder=True, + cylinder_matgrid=True, + do_averaging=do_averaging, + ) + return [freq_cylinder, freq_matgrid, freq_matgrid_cylinder] + + # when smoothing is off, all three tests should be identical to machine precision + no_smoothing = subpixel_test_matrix(rad, False) + self.assertEqual(no_smoothing[0], no_smoothing[1]) + self.assertEqual(no_smoothing[1], no_smoothing[2]) + + # when we slightly perturb the radius, the results should be the same as before. + no_smoothing_perturbed = subpixel_test_matrix(rad + 0.01 / res, False) + self.assertEqual(no_smoothing, no_smoothing_perturbed) + + # when smoothing is on, the simple material results should be different from the matgrid results + smoothing = subpixel_test_matrix(rad, True) + self.assertNotEqual(smoothing[0], smoothing[1]) + self.assertAlmostEqual(smoothing[1], smoothing[2], 3) + + # when we slighty perturb the radius, the results should all be different from before. + smoothing_perturbed = subpixel_test_matrix(rad + 0.01 / res, True) + self.assertNotEqual(smoothing, smoothing_perturbed) + + # "exact" frequency computed using MPB + freq_ref = compute_resonant_mode_mpb() + print(freq_ref) + res = [50, 100] freq_matgrid = [] for r in res: - freq_matgrid.append(compute_resonant_mode_2d(r)) + freq_matgrid.append(compute_resonant_mode_2d(r, cylinder=False)) # verify that the frequency of the resonant mode is # approximately equal to the reference value self.assertAlmostEqual(freq_ref, freq_matgrid[-1], 2) + print("results: ", freq_ref, freq_matgrid) # verify that the relative error is decreasing with increasing resolution # and is better than linear convergence because of subpixel smoothing @@ -268,14 +281,10 @@ def test_subpixel_smoothing(self): abs(freq_matgrid[0] - freq_ref), ) - freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], True) + # ensure that a material grid as a default material works + freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], rad, True) self.assertAlmostEqual(freq_matgrid[0], freq_matgrid_default_mat) - def test_matgrid_3d(self): - freq_matgrid = compute_resonant_mode_3d(True) - freq_geomobj = compute_resonant_mode_3d(False) - self.assertAlmostEqual(freq_matgrid, freq_geomobj, places=2) - def test_symmetry(self): tran_nosym = compute_transmittance(False) tran_sym = compute_transmittance(True) diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp index ab8c8a294..0ad65c6bf 100644 --- a/python/typemap_utils.cpp +++ b/python/typemap_utils.cpp @@ -522,8 +522,16 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) { PyArrayObject *pao = (PyArrayObject *)po_dp; if (!PyArray_Check(pao)) { meep::abort("MaterialGrid weights failed to init."); } if (!PyArray_ISCARRAY(pao)) { meep::abort("Numpy array weights must be C-style contiguous."); } - md->weights = new double[PyArray_SIZE(pao)]; - memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double)); + md->weights.clear(); + for (size_t i = 0; i < PyArray_SIZE(pao); i++) { + md->weights.push_back(((double *)PyArray_DATA(pao))[i]); + } + md->weights.clear(); + for (size_t i = 0; i < PyArray_SIZE(pao); i++) { + md->weights.push_back(((double *)PyArray_DATA(pao))[i]); + } + // md->weights = new double[PyArray_SIZE(pao)]; + // memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double)); // if needed, combine sus structs to main object PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities"); diff --git a/src/fields.cpp b/src/fields.cpp index b9f43f338..6d16eb748 100644 --- a/src/fields.cpp +++ b/src/fields.cpp @@ -793,4 +793,13 @@ bool operator==(const comms_key &lhs, const comms_key &rhs) { return (lhs.ft == rhs.ft) && (lhs.phase == rhs.phase) && (lhs.pair == rhs.pair); } +void fields::change_m_number(double new_m) { + m = new_m; + for (int i = 0; i < num_chunks; i++) { + chunks[i]->change_m_number(new_m); + } +} + +void fields_chunk::change_m_number(double new_m) { m = new_m; } + } // namespace meep diff --git a/src/material_data.cpp b/src/material_data.cpp index a24d87cee..7848aae83 100644 --- a/src/material_data.cpp +++ b/src/material_data.cpp @@ -66,10 +66,10 @@ void material_data::copy_from(const material_data &from) { } grid_size = from.grid_size; - if (from.weights) { + if (from.weights.size() != 0) { size_t N = from.grid_size.x * from.grid_size.y * from.grid_size.z; - weights = new double[N]; - std::copy_n(from.weights, N, weights); + weights.clear(); + weights.insert(weights.end(), from.weights.begin(), from.weights.end()); } medium_1 = from.medium_1; diff --git a/src/material_data.hpp b/src/material_data.hpp index 4e1df20d5..ac6d57e24 100644 --- a/src/material_data.hpp +++ b/src/material_data.hpp @@ -25,6 +25,7 @@ #include #include +#include "support/dual.hpp" #include "meep.hpp" @@ -122,6 +123,9 @@ struct material_data { // these fields used only if which_subclass==MATERIAL_USER user_material_func user_func; void *user_data; + + // used for any variable material (USER, FILE, or GRID): + // indicates whether we should use the fallback subpixel averaging via quadrature bool do_averaging; // these fields used only if which_subclass==MATERIAL_FILE @@ -130,7 +134,7 @@ struct material_data { // these fields used only if which_subclass==MATERIAL_GRID vector3 grid_size; - double *weights; + std::vector weights; medium_struct medium_1; medium_struct medium_2; double beta; @@ -184,7 +188,7 @@ material_type make_user_material(user_material_func user_func, void *user_data); material_type make_file_material(char *epsilon_input_file); material_type make_material_grid(bool do_averaging, double beta, double eta, double damping); void read_epsilon_file(const char *eps_input_file); -void update_weights(material_type matgrid, double *weights); +void update_weights(material_type matgrid, duals::duald *weights); }; // namespace meep_geom diff --git a/src/meep.hpp b/src/meep.hpp index f5a721e81..9b82072a2 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -1546,6 +1546,7 @@ class fields_chunk { void remove_sources(); void remove_susceptibilities(bool shared_chunks); void zero_fields(); + void change_m_number(double new_m); // update_eh.cpp bool needs_W_prev(component c) const; @@ -1751,6 +1752,7 @@ class fields { void remove_fluxes(); void reset(); void log(const char *prefix = ""); + void change_m_number(double new_m); // time.cpp std::vector time_spent_on(time_sink sink); diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp index 6b83d109b..c254a9b96 100644 --- a/src/meepgeom.cpp +++ b/src/meepgeom.cpp @@ -14,11 +14,15 @@ % Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include #include #include "meepgeom.hpp" #include "meep_internals.hpp" +namespace dlit = duals::literals; +using namespace duals::literals; + namespace meep_geom { #define master_printf meep::master_printf @@ -121,9 +125,6 @@ void material_free(material_type m) { // object so will assume that the caller keeps track of its lifetime. delete[] m->epsilon_data; m->epsilon_data = NULL; - - delete[] m->weights; - m->weights = NULL; delete m; } @@ -148,9 +149,9 @@ bool material_type_equal(const material_type m1, const material_type m2) { /* rotate A by a unitary (real) rotation matrix R: RAR = transpose(R) * A * R */ -void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3][3]) { +void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const duals::duald R[3][3]) { int i, j; - double A[3][3], AR[3][3]; + duals::duald A[3][3], AR[3][3]; A[0][0] = A_->m00; A[1][1] = A_->m11; A[2][2] = A_->m22; @@ -173,8 +174,8 @@ void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3 /* Set Vinv = inverse of V, where both V and Vinv are real-symmetric matrices.*/ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) { - double m00 = V->m00, m11 = V->m11, m22 = V->m22; - double m01 = V->m01, m02 = V->m02, m12 = V->m12; + duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22; + duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12; if (m01 == 0.0 && m02 == 0.0 && m12 == 0.0) { /* optimize common case of a diagonal matrix: */ @@ -184,13 +185,13 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) { Vinv->m01 = Vinv->m02 = Vinv->m12 = 0.0; } else { - double detinv; + duals::duald detinv; /* compute the determinant: */ detinv = m00 * m11 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m01 * m01 * m22 - m12 * m12 * m00; - if (detinv == 0.0) meep::abort("singular 3x3 matrix"); + if (detinv.rpart() == 0.0) meep::abort("singular 3x3 matrix"); detinv = 1.0 / detinv; @@ -206,18 +207,19 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) { /* Returns whether or not V is positive-definite. */ int sym_matrix_positive_definite(symm_matrix *V) { - double det2, det3; - double m00 = V->m00, m11 = V->m11, m22 = V->m22; + duals::duald det2, det3; + duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22; #if defined(WITH_HERMITIAN_EPSILON) - scalar_complex m01 = V->m01, m02 = V->m02, m12 = V->m12; + // FIXME + scalar_complex m01 = V->m01.rpart(), m02 = V->m02.rpart(), m12 = V->m12.rpart(); det2 = m00 * m11 - CSCALAR_NORMSQR(m01); det3 = det2 * m22 - m11 * CSCALAR_NORMSQR(m02) - CSCALAR_NORMSQR(m12) * m00 + 2.0 * ((m01.re * m12.re - m01.im * m12.im) * m02.re + (m01.re * m12.im + m01.im * m12.re) * m02.im); #else /* real matrix */ - double m01 = V->m01, m02 = V->m02, m12 = V->m12; + duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12; det2 = m00 * m11 - m01 * m01; det3 = det2 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m12 * m12 * m00; @@ -285,15 +287,14 @@ bool is_material_grid(material_type mt) { } bool is_material_grid(void *md) { return is_material_grid((material_type)md); } +// return whether mt is spatially varying bool is_variable(material_type mt) { - return (mt->which_subclass == material_data::MATERIAL_USER) || - (mt->which_subclass == material_data::MATERIAL_GRID); + return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) || + (mt->which_subclass == material_data::MATERIAL_GRID) || + (mt->which_subclass == material_data::MATERIAL_FILE))); } bool is_variable(void *md) { return is_variable((material_type)md); } -bool is_file(material_type md) { return (md->which_subclass == material_data::MATERIAL_FILE); } -bool is_file(void *md) { return is_file((material_type)md); } - bool is_medium(material_type md, medium_struct **m) { if (md->which_subclass == material_data::MEDIUM) { *m = &(md->medium); @@ -304,24 +305,29 @@ bool is_medium(material_type md, medium_struct **m) { bool is_medium(void *md, medium_struct **m) { return is_medium((material_type)md, m); } +// note: is_metal assumes that eval_material_pt has already been called for variable materials bool is_metal(meep::field_type ft, const material_type *material) { material_data *md = *material; if (ft == meep::E_stuff) switch (md->which_subclass) { case material_data::MEDIUM: + case material_data::MATERIAL_USER: + case material_data::MATERIAL_FILE: case material_data::MATERIAL_GRID: return (md->medium.epsilon_diag.x < 0 || md->medium.epsilon_diag.y < 0 || md->medium.epsilon_diag.z < 0); case material_data::PERFECT_METAL: return true; - default: meep::abort("unknown material type"); return false; + default: meep::abort("unknown material type in is_metal"); return false; } else switch (md->which_subclass) { case material_data::MEDIUM: + case material_data::MATERIAL_USER: + case material_data::MATERIAL_FILE: case material_data::MATERIAL_GRID: return (md->medium.mu_diag.x < 0 || md->medium.mu_diag.y < 0 || md->medium.mu_diag.z < 0); case material_data::PERFECT_METAL: return false; // is an electric conductor, but not a magnetic conductor - default: meep::abort("unknown material type"); return false; + default: meep::abort("unknown material type in is_metal"); return false; } } @@ -337,37 +343,60 @@ bool has_offdiag(const medium_struct *material) { // computes the vector-Jacobian product of the gradient of the matgrid_val function v // with the Jacobian of the to_geom_box_coords function for geometric_object o -vector3 to_geom_object_coords_VJP(vector3 v, const geometric_object *o) { +cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) { if (!o) { meep::abort("must pass a geometric_object to to_geom_object_coords_VJP.\n"); } switch (o->which_subclass) { default: { - vector3 po = {0, 0, 0}; + cvector3 po = cvector_zero(); return po; } case geometric_object::SPHERE: { number radius = o->subclass.sphere_data->radius; - return vector3_scale(0.5 / radius, v); + return cvector3_scale(0.5 / radius, v); } /* case geometric_object::CYLINDER: NOT YET IMPLEMENTED */ case geometric_object::BLOCK: { + /* In order to leverage the underlying libctl infrastructure + *and* the dual library that makes computing derivatives so easy, + me perform a bit of a trick: we use the *complex* libctl vector, + storing the real and dual parts of the dual library and *manually* + propagate the dual through existing libraries as needed. + */ + vector3 v_real = cvector3_re(v); // real part + vector3 v_imag = cvector3_im(v); // "dual" part + vector3 size = o->subclass.block_data->size; - if (size.x != 0.0) v.x /= size.x; - if (size.y != 0.0) v.y /= size.y; - if (size.z != 0.0) v.z /= size.z; - return matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v); + + if (size.x != 0.0) { + v_real.x /= size.x; + v_imag.x /= size.x; + } + if (size.y != 0.0) { + v_real.y /= size.y; + v_imag.y /= size.y; + } + if (size.z != 0.0) { + v_real.z /= size.z; + v_imag.z /= size.z; + } + v_real = matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_real); + v_imag = matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_imag); + return make_cvector3(v_real, v_imag); } /* case geometric_object::PRISM: NOT YET IMPLEMENTED */ } } -meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o) { +cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object *o) { + /* computes the actual spatial gradient at point `p` + for the specified material grid `md`. */ + if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); } - meep::vec gradient(zero_vec(dim)); - double *data = md->weights; + cvector3 gradient = cvector_zero(); int nx = md->grid_size.x; int ny = md->grid_size.y; int nz = md->grid_size.z; @@ -397,25 +426,26 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec /* define a macro to give us data(x,y,z) on the grid, in row-major order: */ -#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride]) +#define D(x, y, z) ((md->weights)[(((x)*ny + (y)) * nz + (z)) * stride]) - double du_dx = + duals::duald du_dx = (signflip_dx ? -1.0 : 1.0) * (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) + (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) * (1.0 - dz) + ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) + (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) * dz); - double du_dy = (signflip_dy ? -1.0 : 1.0) * ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) + - (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) * - (1.0 - dz) + - (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) + - (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) * - dz); - double du_dz = (signflip_dz ? -1.0 : 1.0) * - (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) + - (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) + - ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) + - (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy)); + duals::duald du_dy = + (signflip_dy ? -1.0 : 1.0) * ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) + + (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) * + (1.0 - dz) + + (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) + + (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) * + dz); + duals::duald du_dz = (signflip_dz ? -1.0 : 1.0) * + (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) + + (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) + + ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) + + (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy)); #undef D @@ -424,25 +454,22 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec // respect to r2 where g(r2) is the to_geom_object_coords function (in libctl/utils/geom.c). // computing this quantity involves using the chain rule and thus the vector-Jacobian product // ∇u J where J is the Jacobian matrix of g. - vector3 grad_u; - grad_u.x = du_dx * nx; - grad_u.y = du_dy * ny; - grad_u.z = du_dz * nz; - if (o != NULL) { - vector3 grad_u_J = to_geom_object_coords_VJP(grad_u, o); - gradient.set_direction(meep::X, grad_u_J.x); - gradient.set_direction(meep::Y, grad_u_J.y); - gradient.set_direction(meep::Z, grad_u_J.z); - } + vector3 g_real = make_vector3(nx * du_dx.rpart(), ny * du_dy.rpart(), nz * du_dz.rpart()); + vector3 g_imag = make_vector3(nx * du_dx.dpart(), ny * du_dy.dpart(), nz * du_dz.dpart()); + cvector3 grad_u = make_cvector3(g_real, g_imag); + + if (o != NULL) { gradient = to_geom_object_coords_VJP(grad_u, o); } else { - gradient.set_direction(meep::X, - geometry_lattice.size.x == 0 ? 0 : grad_u.x / geometry_lattice.size.x); - gradient.set_direction(meep::Y, - geometry_lattice.size.y == 0 ? 0 : grad_u.y / geometry_lattice.size.y); - gradient.set_direction(meep::Z, - geometry_lattice.size.z == 0 ? 0 : grad_u.z / geometry_lattice.size.z); + g_real = make_vector3( + geometry_lattice.size.x == 0 ? 0 : nx * du_dx.rpart() / geometry_lattice.size.x, + geometry_lattice.size.y == 0 ? 0 : ny * du_dy.rpart() / geometry_lattice.size.y, + geometry_lattice.size.z == 0 ? 0 : nz * du_dz.rpart() / geometry_lattice.size.z); + g_imag = make_vector3( + geometry_lattice.size.x == 0 ? 0 : nx * du_dx.dpart() / geometry_lattice.size.x, + geometry_lattice.size.y == 0 ? 0 : ny * du_dy.dpart() / geometry_lattice.size.y, + geometry_lattice.size.z == 0 ? 0 : nz * du_dz.dpart() / geometry_lattice.size.z); + gradient = make_cvector3(g_real, g_imag); } - return gradient; } @@ -452,21 +479,30 @@ void map_lattice_coordinates(double &px, double &py, double &pz) { pz = geometry_lattice.size.z == 0 ? 0 : 0.5 + (pz - geometry_center.z) / geometry_lattice.size.z; } -meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) { +cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) { + /* loops through all the material grids at a current point + and computes the final *spatial* gradient (w.r.t. x,y,z) + after all appropriate transformations (e.g. due to + overlapping grids). Calls the helper function, `material_grid_grad`, + which is what actually computes the spatial gradient. + */ + + // check for proper overlapping grids if (md->material_grid_kinds == material_data::U_MIN || md->material_grid_kinds == material_data::U_PROD) meep::abort("%s:%i:matgrid_grad does not support overlapping grids with U_MIN or U_PROD\n", __FILE__, __LINE__); - meep::vec gradient(zero_vec(dim)); + cvector3 gradient = cvector_zero(); int matgrid_val_count = 0; // iterate through object tree at current point if (tp) { do { - gradient += - material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]), - (material_data *)tp->objects[oi].o->material, tp->objects[oi].o); + gradient = + cvector_add(gradient, material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]), + (material_data *)tp->objects[oi].o->material, + tp->objects[oi].o)); if (md->material_grid_kinds == material_data::U_DEFAULT) break; ++matgrid_val_count; tp = geom_tree_search_next(p, tp, &oi); @@ -480,29 +516,51 @@ meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) { ++matgrid_val_count; } + // compensate for overlapping grids if (md->material_grid_kinds == material_data::U_MEAN) - gradient = gradient * 1.0 / matgrid_val_count; + gradient = cvector3_scale(1.0 / matgrid_val_count, gradient); return gradient; } +duals::duald dual_linear_interpolate(double rx, double ry, double rz, + std::vector &data, int nx, int ny, int nz, + int stride) { + + int x1, y1, z1, x2, y2, z2; + double dx, dy, dz; + + meep::map_coordinates(rx, ry, rz, nx, ny, nz, x1, y1, z1, x2, y2, z2, dx, dy, dz); + + /* define a macro to give us data(x,y,z) on the grid, + in row-major order (the order used by HDF5): */ +#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride]) + + return (((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) + + (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) * + (1.0 - dz) + + ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) + + (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy) * + dz); -double material_grid_val(vector3 p, material_data *md) { +#undef D +} +duals::duald material_grid_val(vector3 p, material_data *md) { // given the relative location, p, interpolate the material grid point. - if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); } - return meep::linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, md->grid_size.y, - md->grid_size.z, 1); + if (!is_material_grid(md)) { abort(); } + return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, md->grid_size.y, + md->grid_size.z, 1); } -static double tanh_projection(double u, double beta, double eta) { +static duals::duald tanh_projection(duals::duald u, double beta, double eta) { if (beta == 0) return u; if (u == eta) return 0.5; // avoid NaN when beta is Inf - double tanh_beta_eta = tanh(beta * eta); + duals::duald tanh_beta_eta = tanh(beta * eta); return (tanh_beta_eta + tanh(beta * (u - eta))) / (tanh_beta_eta + tanh(beta * (1 - eta))); } -double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) { - double uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u; +duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) { + duals::duald uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u; int matgrid_val_count = 0; // iterate through object tree at current point @@ -569,7 +627,7 @@ static void interp_tensors(vector3 diag_in_1, vector3 offdiag_in_1, vector3 diag void epsilon_material_grid(material_data *md, double u) { // NOTE: assume p lies on normalized grid within (0,1) - if (!(md->weights)) meep::abort("material params were not initialized!"); + if (md->weights.size() == 0) meep::abort("material params were not initialized!"); medium_struct *mm = &(md->medium); medium_struct *m1 = &(md->medium_1); @@ -807,7 +865,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symm_mat epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0; break; - default: meep::abort("unknown material type"); + default: meep::abort("unknown material type in epsmu"); } else switch (md->which_subclass) { @@ -835,7 +893,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symm_mat epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0; break; - default: meep::abort("unknown material type"); + default: meep::abort("unknown material type in epsmu"); } } @@ -847,26 +905,31 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r) boolean inobject; material = (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject); - material_data *md = material; + eval_material_pt(material, p); +} - switch (md->which_subclass) { +// evaluate the material at the given point p if necessary — this is needed if +// the material is variable (a grid, function, or file); otherwise it is a no-op. +void geom_epsilon::eval_material_pt(material_type &material, vector3 p) { + switch (material->which_subclass) { // material grid: interpolate onto user specified material grid to get properties at r - case material_data::MATERIAL_GRID: - double u; + case material_data::MATERIAL_GRID: { + duals::duald u; int oi; geom_box_tree tp; tp = geom_tree_search(p, restricted_tree, &oi); // interpolate and project onto material grid - u = tanh_projection(matgrid_val(p, tp, oi, md) + this->u_p, md->beta, md->eta); + u = tanh_projection(matgrid_val(p, tp, oi, material), material->beta, material->eta); // interpolate material from material grid point - epsilon_material_grid(md, u); + epsilon_material_grid(material, u.rpart()); return; + } // material read from file: interpolate to get properties at r case material_data::MATERIAL_FILE: - if (md->epsilon_data) - epsilon_file_material(md, p); + if (material->epsilon_data) + epsilon_file_material(material, p); else material = (material_type)default_material; return; @@ -877,16 +940,16 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r) // the user's function only needs to fill in whatever is // different from vacuum. case material_data::MATERIAL_USER: - md->medium = medium_struct(); - md->user_func(p, md->user_data, &(md->medium)); - md->medium.check_offdiag_im_zero_or_abort(); + material->medium = medium_struct(); + material->user_func(p, material->user_data, &(material->medium)); + material->medium.check_offdiag_im_zero_or_abort(); return; // position-independent material or metal: there is nothing to do case material_data::MEDIUM: case material_data::PERFECT_METAL: return; - default: meep::abort("unknown material type"); + default: meep::abort("unknown material type in eval_material_pt"); }; } @@ -907,20 +970,21 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) { material_epsmu(ft, material, &chi1p1, &chi1p1_inv); material_gc(material); - return (chi1p1.m00 + chi1p1.m11 + chi1p1.m22) / 3; + return (chi1p1.m00.rpart() + chi1p1.m11.rpart() + chi1p1.m22.rpart()) / 3; } /* Find frontmost object in v, along with the constant material behind it. - Returns false if material behind the object is not constant. + Returns false if more than two objects & materials intersect the pixel. Requires moderately horrifying logic to figure things out properly, stolen from MPB. */ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, vector3 &pcenter, const geometric_object **o_front, vector3 &shiftby_front, - material_type &mat_front, material_type &mat_behind) { + material_type &mat_front, material_type &mat_behind, vector3 &p_front, + vector3 &p_behind) { vector3 p; const geometric_object *o1 = 0, *o2 = 0; - vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}; + vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0, 0, 0}, p2 = {0, 0, 0}; geom_box pixel; material_type mat1 = vacuum, mat2 = vacuum; int id1 = -1, id2 = -1; @@ -982,6 +1046,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, shiftby1 = shiftby; id1 = id; mat1 = mat; + p1 = q; } else if (id2 == -1 || ((id >= id1 && id >= id2) && (id1 == id2 || material_type_equal(mat1, mat2)))) { @@ -989,10 +1054,12 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, shiftby2 = shiftby; id2 = id; mat2 = mat; + p2 = q; } else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) && - !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))) + !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))) { return false; + } } // CHECK(id1 > -1, "bug in object_of_point_in_tree?"); @@ -1000,130 +1067,128 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, id2 = id1; o2 = o1; mat2 = mat1; + p2 = p1; shiftby2 = shiftby1; } - if ((o1 && is_variable(o1->material)) || (o2 && is_variable(o2->material)) || - ((is_variable(default_material) || is_file(default_material)) && - (!o1 || is_file(o1->material) || !o2 || is_file(o2->material)))) - return false; - if (id1 >= id2) { *o_front = o1; shiftby_front = shiftby1; mat_front = mat1; - if (id1 == id2) + p_front = p1; + if (id1 == id2) { mat_behind = mat1; - else + p_behind = p1; + } + else { mat_behind = mat2; + p_behind = p2; + } } if (id2 > id1) { *o_front = o2; shiftby_front = shiftby2; mat_front = mat2; + p_front = p2; mat_behind = mat1; + p_behind = p1; } return true; } void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v, double tol, int maxeval) { + /* for speed reasons, we need to "wrap" the + actual eff_chi1inv_row_grad function below, since + the original eff_chi1inv_row is a virtual function + and doesn't play well with default arguments */ + eff_chi1inv_row_grad(c, chi1inv_row, v, tol, maxeval, false); +} +void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], + const meep::volume &v, double tol, int maxeval, + bool needs_grad) { symm_matrix meps_inv; bool fallback; eff_chi1inv_matrix(c, &meps_inv, v, tol, maxeval, fallback); if (fallback) { fallback_chi1inv_row(c, chi1inv_row, v, tol, maxeval); } else { - switch (component_direction(c)) { - case meep::X: - case meep::R: - chi1inv_row[0] = meps_inv.m00; - chi1inv_row[1] = meps_inv.m01; - chi1inv_row[2] = meps_inv.m02; - break; - case meep::Y: - case meep::P: - chi1inv_row[0] = meps_inv.m01; - chi1inv_row[1] = meps_inv.m11; - chi1inv_row[2] = meps_inv.m12; - break; - case meep::Z: - chi1inv_row[0] = meps_inv.m02; - chi1inv_row[1] = meps_inv.m12; - chi1inv_row[2] = meps_inv.m22; - break; - case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break; + /* for gradient calculations, we need to return + the dual part of the calcuation, which corresponds + to the first derivative w.r.t. u_i */ + if (needs_grad) { + switch (component_direction(c)) { + case meep::X: + case meep::R: + chi1inv_row[0] = meps_inv.m00.dpart(); + chi1inv_row[1] = meps_inv.m01.dpart(); + chi1inv_row[2] = meps_inv.m02.dpart(); + break; + case meep::Y: + case meep::P: + chi1inv_row[0] = meps_inv.m01.dpart(); + chi1inv_row[1] = meps_inv.m11.dpart(); + chi1inv_row[2] = meps_inv.m12.dpart(); + break; + case meep::Z: + chi1inv_row[0] = meps_inv.m02.dpart(); + chi1inv_row[1] = meps_inv.m12.dpart(); + chi1inv_row[2] = meps_inv.m22.dpart(); + break; + case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break; + } } - } -} - -void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, - const meep::volume &v, double tol, int maxeval, - bool &fallback) { - const geometric_object *o; - material_type mat, mat_behind; - symm_matrix meps; - vector3 p, shiftby, normal; - fallback = false; - - if (maxeval == 0) { - noavg: - get_material_pt(mat, v.center()); - trivial: - material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); - material_gc(mat); - return; - } - - if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind)) { - get_material_pt(mat, v.center()); - if (mat && - (mat->which_subclass == material_data::MATERIAL_USER || - mat->which_subclass == material_data::MATERIAL_GRID) && - mat->do_averaging) { - fallback = true; - return; + else { // no gradient needed; standard epsilon evaluations + switch (component_direction(c)) { + case meep::X: + case meep::R: + chi1inv_row[0] = meps_inv.m00.rpart(); + chi1inv_row[1] = meps_inv.m01.rpart(); + chi1inv_row[2] = meps_inv.m02.rpart(); + break; + case meep::Y: + case meep::P: + chi1inv_row[0] = meps_inv.m01.rpart(); + chi1inv_row[1] = meps_inv.m11.rpart(); + chi1inv_row[2] = meps_inv.m12.rpart(); + break; + case meep::Z: + chi1inv_row[0] = meps_inv.m02.rpart(); + chi1inv_row[1] = meps_inv.m12.rpart(); + chi1inv_row[2] = meps_inv.m22.rpart(); + break; + case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break; + } } - else { goto trivial; } } +} - /* check for trivial case of only one object/material */ - if (material_type_equal(mat, mat_behind)) goto trivial; - - // it doesn't make sense to average metals (electric or magnetic) - if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg; - - normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o)); - if (normal.x == 0 && normal.y == 0 && normal.z == 0) - goto noavg; // couldn't get normal vector for this point, punt - geom_box pixel = gv2box(v); - pixel.low = vector3_minus(pixel.low, shiftby); - pixel.high = vector3_minus(pixel.high, shiftby); - - double fill = box_overlap_with_object(pixel, *o, tol, maxeval); +void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix &meps, + symm_matrix &eps2, cvector3 &cnormal, duals::duald fill) { + /* Ref: " Perturbation theory for anisotropic dielectric interfaces, + and application to subpixel smoothing of discretized numerical methods", + https://math.mit.edu/~stevenj/papers/KottkeFa08.pdf + */ - material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); - symm_matrix eps2, epsinv2; symm_matrix eps1, delta; - double Rot[3][3]; - material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2); + duals::duald Rot[3][3]; eps1 = meps; - Rot[0][0] = normal.x; - Rot[1][0] = normal.y; - Rot[2][0] = normal.z; - if (fabs(normal.x) > 1e-2 || fabs(normal.y) > 1e-2) { - Rot[0][2] = normal.y; - Rot[1][2] = -normal.x; + Rot[0][0] = cnormal.x.re + 1_e * cnormal.x.im; + Rot[1][0] = cnormal.y.re + 1_e * cnormal.y.im; + Rot[2][0] = cnormal.z.re + 1_e * cnormal.z.im; + if (fabs(cnormal.x.re) > 1e-2 || fabs(cnormal.y.re) > 1e-2) { + Rot[0][2] = cnormal.y.re + 1_e * cnormal.y.im; + Rot[1][2] = -(cnormal.x.re + 1_e * cnormal.x.im); Rot[2][2] = 0; } else { /* n is not parallel to z direction, use (x x n) instead */ Rot[0][2] = 0; - Rot[1][2] = -normal.z; - Rot[2][2] = normal.y; + Rot[1][2] = -(cnormal.z.re + 1_e * cnormal.z.im); + Rot[2][2] = cnormal.y.re + 1_e * cnormal.y.im; } { /* normalize second column */ - double s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2]; + duals::duald s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2]; s = 1.0 / sqrt(s); Rot[0][2] *= s; Rot[1][2] *= s; @@ -1172,7 +1237,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma #define SWAP(a, b) \ { \ - double xxx = a; \ + duals::duald xxx = a; \ a = b; \ b = xxx; \ } @@ -1191,59 +1256,264 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma sym_matrix_invert(chi1inv_matrix, &meps); } -static int eps_ever_negative = 0; -static meep::field_type func_ft = meep::E_stuff; +duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u, + double eta) { + /* The fill fraction should describe the amount of u=1 material in the current pixel. + Occasionally, the distance to the nearest interface is outside the current + sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again, + we don't know if that means we are in void or solid, however, until we look + at u. To make things easy, we'll assume the cap is solid until the end, when we + can verify. + */ + duals::duald rel_fill; + if (abs(d) >= abs(r)) { + return -1.0; // garbage fill + } + else { + if (dim == meep::D1) + return (r - d) / (2 * r); + else if (dim == meep::D2 || dim == meep::Dcyl) + return (1 / (r * r * meep::pi)) * (r * r * acos(d / r) - d * sqrt(r * r - d * d)); + else if (dim == meep::D3) + return (((r - d) * (r - d)) / (4 * meep::pi * r * r * r)) * (2 * r + d); + } -struct matgrid_volavg { - meep::ndim dim; // dimensionality of voxel - double rad; // (spherical) voxel radius - double uval; // bilinearly-interpolated weight at voxel center - double ugrad_abs; // magnitude of gradient of uval - double beta; // thresholding bias - double eta; // thresholding erosion/dilation - double eps1; // trace of epsilon tensor from medium 1 - double eps2; // trace of epsilon tensor from medium 2 -}; - -static void get_uproj_w(const matgrid_volavg *mgva, double x0, double &u_proj, double &w) { - // use a linear approximation for the material grid weights around the Yee grid point - u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs * x0, mgva->beta, mgva->eta); - if (mgva->dim == meep::D1) - w = 1 / (2 * mgva->rad); - else if (mgva->dim == meep::D2 || mgva->dim == meep::Dcyl) - w = 2 * sqrt(mgva->rad * mgva->rad - x0 * x0) / (meep::pi * mgva->rad * mgva->rad); - else if (mgva->dim == meep::D3) - w = meep::pi * (mgva->rad * mgva->rad - x0 * x0) / - (4 / 3 * meep::pi * mgva->rad * mgva->rad * mgva->rad); + return rel_fill; } -#ifdef CTL_HAS_COMPLEX_INTEGRATION -static cnumber matgrid_ceps_func(int n, number *x, void *mgva_) { - (void)n; // unused - double u_proj = 0, w = 0; - matgrid_volavg *mgva = (matgrid_volavg *)mgva_; - get_uproj_w(mgva, x[0], u_proj, w); - cnumber ret; - ret.re = (1 - u_proj) * mgva->eps1 + u_proj * mgva->eps2; - ret.im = (1 - u_proj) / mgva->eps1 + u_proj / mgva->eps2; - return ret * w; +duals::duald normalize_dual(cvector3 &cv) { + duals::duald d[3], norm; + // compute the norm, u_0' + d[0] = cv.x.re + 1_e * cv.x.im; + d[1] = cv.y.re + 1_e * cv.y.im; + d[2] = cv.z.re + 1_e * cv.z.im; + + norm = sqrt(abs(d[0]) * abs(d[0]) + abs(d[1]) * abs(d[1]) + abs(d[2]) * abs(d[2])); + + // normalize the normal vector + d[0] = d[0] / norm; + d[1] = d[1] / norm; + d[2] = d[2] / norm; + + cv.x.re = d[0].rpart(); + cv.x.im = d[0].dpart(); + cv.y.re = d[1].rpart(); + cv.y.im = d[1].dpart(); + cv.z.re = d[2].rpart(); + cv.z.im = d[2].dpart(); + + return norm; } -#else -static number matgrid_eps_func(int n, number *x, void *mgva_) { - (void)n; // unused - double u_proj = 0, w = 0; - matgrid_volavg *mgva = (matgrid_volavg *)mgva_; - get_uproj_w(mgva, x[0], u_proj, w); - return w * ((1 - u_proj) * mgva->eps1 + u_proj * mgva->eps2); -} -static number matgrid_inveps_func(int n, number *x, void *mgva_) { - (void)n; // unused - double u_proj = 0, w = 0; - matgrid_volavg *mgva = (matgrid_volavg *)mgva_; - get_uproj_w(mgva, x[0], u_proj, w); - return w * ((1 - u_proj) / mgva->eps1 + u_proj / mgva->eps2); + +duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta) { + duals::duald d[3], norm; + d[0] = cv.x.re + 1_e * cv.x.im; + d[1] = cv.y.re + 1_e * cv.y.im; + d[2] = cv.z.re + 1_e * cv.z.im; + + norm = sqrt(abs(d[0]) * abs(d[0]) + abs(d[1]) * abs(d[1]) + abs(d[2]) * abs(d[2])); + return (eta - uval) / norm; } -#endif + +void dual_interpolate_mat(material_type mat, symm_matrix &eps, duals::duald u) { + duals::duald u_bar = tanh_projection(u, mat->beta, mat->eta); // projection + if (std::isnan(u_bar.dpart())) + u_bar = u_bar.rpart(); // when beta=inf, we need to correct when gradient breaks down + eps.m00 = mat->medium_1.epsilon_diag.x + + u_bar * (mat->medium_2.epsilon_diag.x - mat->medium_1.epsilon_diag.x); + eps.m11 = mat->medium_1.epsilon_diag.y + + u_bar * (mat->medium_2.epsilon_diag.y - mat->medium_1.epsilon_diag.y); + eps.m22 = mat->medium_1.epsilon_diag.z + + u_bar * (mat->medium_2.epsilon_diag.z - mat->medium_1.epsilon_diag.z); + eps.m01 = mat->medium_1.epsilon_offdiag.x.re + + u_bar * (mat->medium_2.epsilon_offdiag.x.re - mat->medium_1.epsilon_offdiag.x.re); + eps.m02 = mat->medium_1.epsilon_offdiag.y.re + + u_bar * (mat->medium_2.epsilon_offdiag.y.re - mat->medium_1.epsilon_offdiag.y.re); + eps.m12 = mat->medium_1.epsilon_offdiag.z.re + + u_bar * (mat->medium_2.epsilon_offdiag.z.re - mat->medium_1.epsilon_offdiag.z.re); +} + +template +static bool AreEqual(T f1, T f2) { + return (std::fabs(f1 - f2) <= 2*std::numeric_limits::epsilon() * std::fmax(std::fabs(f1), std::fabs(f2))); +} + +void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, + const meep::volume &v, double tol, int maxeval, + bool &fallback) { + const geometric_object *o; + material_type mat, mat_behind; + vector3 p_mat, p_mat_behind, p, shiftby; + symm_matrix meps, eps2, epsinv2; + cvector3 normal; + duals::duald fill, uval; + int oi; + geom_box_tree tp; + fallback = false; + + /* first let's check if our pixel has more than a single + interface (i.e. a corner) in which case we abandon any + smoothing efforts*/ + if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)) { + noavg: + get_material_pt(mat, v.center()); + trivial: + material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); + material_gc(mat); + return; + } + + /* we may have entered this routine (e.g. because of a material grid + backprop routine) but don't want to actually do any averaging */ + if (maxeval == 0) { + get_material_pt(mat, v.center()); + if (is_material_grid(mat)) { + tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi); + uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat); + mgavg: + dual_interpolate_mat(mat, meps, uval); + sym_matrix_invert(chi1inv_matrix, &meps); + // get_material_pt(mat, v.center()); + // material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); + material_gc(mat); + return; + } + else { goto trivial; } + } + + // For variable materials with do_averaging == true, switch over to slow fallback integration + // method. For material grids, however, we have to do some more logic first... + if ((is_variable(mat) && mat->do_averaging) || + (is_variable(mat_behind) && mat_behind->do_averaging)) { + if ((!is_material_grid(mat)) && (!is_material_grid(mat_behind))) { + fallback = true; + return; + } + } + + /* ------------------------------------------- */ + // One material + /* ------------------------------------------- */ + /* check for case of only one object/material + in the case of the material grid, make sure we don't need + to do any interface averaging within. + */ + if (material_type_equal(mat, mat_behind)) { + /* if we have a material grid, we need to calculate the fill fraction, + normal vector, etc. we also need to determine if we really need + to do any averaging, or if the current pixel is smooth enough + to just evaluate. + */ + if (is_material_grid(mat)) { + // if we have a damping term and β≠∞, we can't smooth + if ((mat->damping != 0) && (mat->eta != std::numeric_limits::infinity())) goto noavg; + + tp = geom_tree_search(p_mat, restricted_tree, &oi); + uval = matgrid_val(p_mat, tp, oi, mat); + normal = matgrid_grad(p_mat, tp, oi, mat); + double TOL = 4*std::numeric_limits::epsilon(); + + if (AreEqual(cvector3_re(normal).x,0.0) && + AreEqual(cvector3_re(normal).y,0.0) && + AreEqual(cvector3_re(normal).z,0.0) + ) + /* couldn't get normal vector for this point; no averaging is needed, + but we still need to interpolate onto our grid using duals */ + goto mgavg; + duals::duald u_prime = normalize_dual(normal); + duals::duald d; + /* we need to be careful when mat->eta = uval and u_prime is very small...*/ + d = (mat->eta - uval) / u_prime; + double r = v.diameter() / 2; + fill = get_material_grid_fill(v.dim, d, r, uval, mat->eta); + if (fill < 0) { + /* we are far enough away from an interface that + we don't need any averaging -- but we still need + to interpolate from the material grid */ + goto mgavg; + } + else { + /* we have a material grid interface within our pixel. + we therefore need to set the two materials used for + averaging to our corresponding solid and void materials. + */ + symm_matrix eps_0, eps_plus, eps_minus; + dual_interpolate_mat(mat, eps_0, uval); + dual_interpolate_mat(mat, eps_plus, uval + r * u_prime); + dual_interpolate_mat(mat, eps_minus, uval - r * u_prime); + + if (d > 0) { + // Case 1: d > 0. Then let ε̃₊ = ε₊ and let ε̃₋ = [ε₀d + ε₋(Δx/2 – d)] / (Δx/2). + meps = eps_plus; + eps2 = (eps_0 * d + eps_minus * (r - d)) / (r); + } + else { + // Case 2: d < 0. Then let ε̃₋ = ε₋ and let ε̃₊ = [-ε₀d + ε₊(Δx/2 + d)] / (Δx/2). + eps2 = eps_minus; + meps = (-eps_0 * d + eps_plus * (r + d)) / (r); + } + } + } + else if (is_variable(mat)) { + // no averaging is needed (not a material grid) + eval_material_pt(mat, vec_to_vector3(v.center())); + goto trivial; + // materials are non variable and uniform -- no need to average + } + else { goto trivial; } + /* ------------------------------------------- */ + // Two different materials (perhaps a m.g. and a geom object) + /* ------------------------------------------- */ + } + else { + vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o)); + if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0) + goto noavg; // couldn't get normal vector for this point, punt + normal = make_cvector3(normal_re, make_vector3()); + geom_box pixel = gv2box(v); + pixel.low = vector3_minus(pixel.low, shiftby); + pixel.high = vector3_minus(pixel.high, shiftby); + fill = duals::duald(box_overlap_with_object(pixel, *o, tol, maxeval), 0); + /* Evaluate materials in case they are variable. This allows us to do fast subpixel averaging + at the boundary of an object with a variable material, while remaining accurate enough if the + material is continuous over the pixel. (We make a first-order error by averaging as if the + material were constant, but only in a boundary layer of thickness 1/resolution, so the net + effect should still be second-order.) */ + if (is_variable(mat)) { + eval_material_pt(mat, p_mat); + eval_material_pt(mat_behind, p_mat_behind); + if (material_type_equal(mat, mat_behind)) goto trivial; + } + + /* to properly determine the tensors of a material + that has a material grid, we need to be careful. + Specifically, we need to include the effects of u_i + in the dual part of the material tensor */ + if (is_material_grid(mat)) { + tp = geom_tree_search(p_mat, restricted_tree, &oi); + duals::duald uval = matgrid_val(p_mat, tp, oi, mat); + dual_interpolate_mat(mat, meps, uval); + sym_matrix_invert(chi1inv_matrix, &meps); + } + else { material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); } + if (is_material_grid(mat_behind)) { + tp = geom_tree_search(p_mat_behind, restricted_tree, &oi); + duals::duald uval = matgrid_val(p_mat_behind, tp, oi, mat); + dual_interpolate_mat(mat_behind, eps2, uval); + sym_matrix_invert(&epsinv2, &eps2); // not really needed + } + else { material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2); } + } + + // it doesn't make sense to average metals (electric or magnetic) + if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg; + + kottke_algorithm(c, chi1inv_matrix, meps, eps2, normal, fill); + material_gc(mat); +} + +static int eps_ever_negative = 0; +static meep::field_type func_ft = meep::E_stuff; #ifdef CTL_HAS_COMPLEX_INTEGRATION static cnumber ceps_func(int n, number *x, void *geomeps_) { @@ -1314,18 +1584,7 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3] material = (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject); material_data *md = material; - meep::vec gradient(zero_vec(v.dim)); - double uval = 0; - - if (md->which_subclass == material_data::MATERIAL_GRID) { - geom_box_tree tp; - int oi; - tp = geom_tree_search(p, restricted_tree, &oi); - gradient = matgrid_grad(p, tp, oi, md); - uval = matgrid_val(p, tp, oi, md) + this->u_p; - } - else { gradient = normal_vector(meep::type(c), v); } - + meep::vec gradient = normal_vector(meep::type(c), v); get_material_pt(material, v.center()); material_epsmu(meep::type(c), material, &chi1p1, &chi1p1_inv); material_gc(material); @@ -1333,19 +1592,19 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3] chi1p1.m11 != chi1p1.m22 || chi1p1.m00 != chi1p1.m22 || meep::abs(gradient) < 1e-8) { int rownum = meep::component_direction(c) % 3; if (rownum == 0) { - chi1inv_row[0] = chi1p1_inv.m00; - chi1inv_row[1] = chi1p1_inv.m01; - chi1inv_row[2] = chi1p1_inv.m02; + chi1inv_row[0] = chi1p1_inv.m00.rpart(); + chi1inv_row[1] = chi1p1_inv.m01.rpart(); + chi1inv_row[2] = chi1p1_inv.m02.rpart(); } else if (rownum == 1) { - chi1inv_row[0] = chi1p1_inv.m01; - chi1inv_row[1] = chi1p1_inv.m11; - chi1inv_row[2] = chi1p1_inv.m12; + chi1inv_row[0] = chi1p1_inv.m01.rpart(); + chi1inv_row[1] = chi1p1_inv.m11.rpart(); + chi1inv_row[2] = chi1p1_inv.m12.rpart(); } else { - chi1inv_row[0] = chi1p1_inv.m02; - chi1inv_row[1] = chi1p1_inv.m12; - chi1inv_row[2] = chi1p1_inv.m22; + chi1inv_row[0] = chi1p1_inv.m02.rpart(); + chi1inv_row[1] = chi1p1_inv.m12.rpart(); + chi1inv_row[2] = chi1p1_inv.m22.rpart(); } return; } @@ -1353,79 +1612,49 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3] integer errflag; double meps, minveps; - if (md->which_subclass == material_data::MATERIAL_GRID) { - number xmin[1], xmax[1]; - matgrid_volavg mgva; - mgva.dim = v.dim; - mgva.ugrad_abs = meep::abs(gradient); - mgva.uval = uval; - mgva.rad = v.diameter() / 2; - mgva.beta = md->beta; - mgva.eta = md->eta; - mgva.eps1 = - (md->medium_1.epsilon_diag.x + md->medium_1.epsilon_diag.y + md->medium_1.epsilon_diag.z) / - 3; - mgva.eps2 = - (md->medium_2.epsilon_diag.x + md->medium_2.epsilon_diag.y + md->medium_2.epsilon_diag.z) / - 3; - xmin[0] = -v.diameter() / 2; - xmax[0] = v.diameter() / 2; -#ifdef CTL_HAS_COMPLEX_INTEGRATION - cnumber ret = cadaptive_integration(matgrid_ceps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, - maxeval, &esterr, &errflag); - meps = ret.re; - minveps = ret.im; -#else - meps = adaptive_integration(matgrid_eps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, maxeval, - &esterr, &errflag); - minveps = adaptive_integration(matgrid_inveps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, - maxeval, &esterr, &errflag); -#endif + integer n; + number xmin[3], xmax[3]; + vector3 gvmin, gvmax; + gvmin = vec_to_vector3(v.get_min_corner()); + gvmax = vec_to_vector3(v.get_max_corner()); + xmin[0] = gvmin.x; + xmax[0] = gvmax.x; + if (dim == meep::Dcyl) { + xmin[1] = gvmin.z; + xmin[2] = gvmin.y; + xmax[1] = gvmax.z; + xmax[2] = gvmax.y; } else { - integer n; - number xmin[3], xmax[3]; - vector3 gvmin, gvmax; - gvmin = vec_to_vector3(v.get_min_corner()); - gvmax = vec_to_vector3(v.get_max_corner()); - xmin[0] = gvmin.x; - xmax[0] = gvmax.x; - if (dim == meep::Dcyl) { - xmin[1] = gvmin.z; - xmin[2] = gvmin.y; - xmax[1] = gvmax.z; - xmax[2] = gvmax.y; - } - else { - xmin[1] = gvmin.y; - xmin[2] = gvmin.z; - xmax[1] = gvmax.y; - xmax[2] = gvmax.z; - } - if (xmin[2] == xmax[2]) - n = xmin[1] == xmax[1] ? 1 : 2; - else - n = 3; - double vol = 1; - for (int i = 0; i < n; ++i) - vol *= xmax[i] - xmin[i]; - if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5; - eps_ever_negative = 0; - func_ft = meep::type(c); + xmin[1] = gvmin.y; + xmin[2] = gvmin.z; + xmax[1] = gvmax.y; + xmax[2] = gvmax.z; + } + if (xmin[2] == xmax[2]) + n = xmin[1] == xmax[1] ? 1 : 2; + else + n = 3; + double vol = 1; + for (int i = 0; i < n; ++i) + vol *= xmax[i] - xmin[i]; + if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5; + eps_ever_negative = 0; + func_ft = meep::type(c); #ifdef CTL_HAS_COMPLEX_INTEGRATION - cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, - &esterr, &errflag); - meps = ret.re / vol; - minveps = ret.im / vol; + cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, + &esterr, &errflag); + meps = ret.re / vol; + minveps = ret.im / vol; #else - meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr, - &errflag) / - vol; - minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, - &esterr, &errflag) / - vol; + meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr, + &errflag) / + vol; + minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr, + &errflag) / + vol; #endif - } + if (eps_ever_negative) // averaging negative eps causes instability minveps = 1.0 / (meps = eps(v.center())); { @@ -1667,7 +1896,7 @@ void geom_epsilon::sigma_row(meep::component c, double sigrow[3], const meep::ve tp = geom_tree_search(p, restricted_tree, &oi); // interpolate and project onto material grid - u = tanh_projection(matgrid_val(p, tp, oi, mat) + this->u_p, mat->beta, mat->eta); + u = tanh_projection(matgrid_val(p, tp, oi, mat), mat->beta, mat->eta).rpart(); epsilon_material_grid(mat, u); // interpolate material from material grid point mat->medium.check_offdiag_im_zero_or_abort(); } @@ -1936,7 +2165,8 @@ void add_absorbing_layer(absorber_list alist, double thickness, int direction, i if needed */ geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, vector3 center, bool _ensure_periodicity, material_type _default_material, - material_type_list extra_materials) { + material_type_list extra_materials, + bool _use_anisotropic_averaging) { // set global variables in libctlgeom based on data fields in s geom_initialize(); geometry_center = center; @@ -1987,6 +2217,7 @@ geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, ve } geom_epsilon *geps = new geom_epsilon(*g, extra_materials, gv.pad().surroundings()); + geps->use_anisotropic_averaging = _use_anisotropic_averaging; return geps; } @@ -1996,8 +2227,9 @@ void set_materials_from_geometry(meep::structure *s, geometric_object_list g, ve bool use_anisotropic_averaging, double tol, int maxeval, bool _ensure_periodicity, material_type _default_material, absorber_list alist, material_type_list extra_materials) { - meep_geom::geom_epsilon *geps = meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity, - _default_material, extra_materials); + meep_geom::geom_epsilon *geps = + meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity, _default_material, + extra_materials, use_anisotropic_averaging); set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol, maxeval, alist); delete geps; } @@ -2060,6 +2292,7 @@ material_type make_user_material(user_material_func user_func, void *user_data, material_type make_file_material(const char *eps_input_file) { material_data *md = new material_data(); md->which_subclass = material_data::MATERIAL_FILE; + md->do_averaging = false; md->epsilon_dims[0] = md->epsilon_dims[1] = md->epsilon_dims[2] = 1; if (eps_input_file && eps_input_file[0]) { // file specified @@ -2095,7 +2328,9 @@ material_type make_material_grid(bool do_averaging, double beta, double eta, dou void update_weights(material_type matgrid, double *weights) { size_t N = matgrid->grid_size.x * matgrid->grid_size.y * matgrid->grid_size.z; - memcpy(matgrid->weights, weights, N * sizeof(double)); + // memcpy(matgrid->weights, weights, N * sizeof(double)); + for (size_t i = 0; i < N; i++) + matgrid->weights.push_back(weights[i]); } /******************************************************************************/ @@ -2625,7 +2860,7 @@ get_material_gradient(const meep::vec &r, // current point geom_epsilon *geps, // material meep::grid_volume &gv, // simulation grid volume double du, // step size - double *u, // matgrid + std::vector &u, // matgrid int idx // matgrid index ) { /*Compute the Aᵤx product from the -λᵀAᵤx calculation. @@ -2649,18 +2884,21 @@ get_material_gradient(const meep::vec &r, // current point // locate the proper material material_type md; + if (!gv.contains(r)) meep::abort("Location (%f, %f, %f) is outside of grid volume\n",r.x(),r.y(),r.z()); geps->get_material_pt(md, r); + // get the tensor column index corresponding to the forward component int dir_idx = 0; - if (forward_c == meep::Dx || forward_c == meep::Dr) - dir_idx = 0; - else if (forward_c == meep::Dy || forward_c == meep::Dp) - dir_idx = 1; - else if (forward_c == meep::Dz) - dir_idx = 2; - else - meep::abort("Invalid adjoint field component"); + switch (meep::component_direction(forward_c)) { + case meep::X: + case meep::R: dir_idx = 0; break; + case meep::Y: + case meep::P: dir_idx = 1; break; + case meep::Z: dir_idx = 2; break; + case meep::NO_DIRECTION: meep::abort("Invalid forward component!\n"); + } + // materials are non-dispersive if (md->trivial) { const double sd = 1.0; // FIXME: make user-changable? meep::volume v(r); @@ -2668,30 +2906,29 @@ get_material_gradient(const meep::vec &r, // current point v.set_direction_min(d, r.in_direction(d) - 0.5 * gv.inva * sd); v.set_direction_max(d, r.in_direction(d) + 0.5 * gv.inva * sd); } - double row_1[3], row_2[3], dA_du[3]; - double orig = u[idx]; - u[idx] -= du; - geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, geps->maxeval); - u[idx] += 2 * du; - geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval); - u[idx] = orig; - - for (int i = 0; i < 3; i++) - dA_du[i] = (row_1[i] - row_2[i]) / (2 * du); - return dA_du[dir_idx] * fields_f; - } + double dA_du[3]; + double maxevals = geps->use_anisotropic_averaging ? geps->maxeval : 0; // punt if no smoothing + /* we can calculate the gradient w.r.t. u_i using + a dual method. We just need to "flag" the index + within our weights array with a dual value + of "1", and the duals library will take care of the + rest. */ + u[idx] += 1_e; // add dual component + geps->eff_chi1inv_row_grad(adjoint_c, dA_du, v, geps->tol, maxevals, true); + u[idx] -= 1_e; // remove dual component + return -dA_du[dir_idx] * fields_f; // note the minus sign! (from Steven's adjoint notes) + } + // materials have some dispersion else { - double orig = u[idx]; std::complex row_1[3], row_2[3], dA_du[3]; u[idx] -= du; eff_chi1inv_row_disp(adjoint_c, row_1, r, freq, geps); u[idx] += 2 * du; eff_chi1inv_row_disp(adjoint_c, row_2, r, freq, geps); - u[idx] = orig; + u[idx] -= du; - for (int i = 0; i < 3; i++) - dA_du[i] = (row_1[i] - row_2[i]) / (2 * du); - return dA_du[dir_idx] * fields_f * cond_cmp(forward_c, r, freq, geps); + return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du) * + cond_cmp(forward_c, r, freq, geps); } } @@ -2703,13 +2940,14 @@ much more complicated and it is easier to calculate the entire gradient using finite differences (at the cost of slightly less accurate gradients due to floating-point roundoff errors). */ void add_interpolate_weights(double rx, double ry, double rz, double *data, int nx, int ny, int nz, - int stride, double scaleby, double *udata, int ukind, double uval, - meep::vec r, geom_epsilon *geps, meep::component adjoint_c, - meep::component forward_c, std::complex fwd, - std::complex adj, double freq, meep::grid_volume &gv, - double du) { + int stride, double scaleby, std::vector &udata, + int ukind, duals::duald uval, meep::vec r, geom_epsilon *geps, + meep::component adjoint_c, meep::component forward_c, + std::complex fwd, std::complex adj, double freq, + meep::grid_volume &gv, double du) { int x1, y1, z1, x2, y2, z2; - double dx, dy, dz, u; + double dx, dy, dz; + duals::duald u; meep::map_coordinates(rx, ry, rz, nx, ny, nz, x1, y1, z1, x2, y2, z2, dx, dy, dz); int x_list[2] = {x1, x2}, y_list[2] = {y1, y2}, z_list[2] = {z1, z2}; @@ -2720,8 +2958,8 @@ void add_interpolate_weights(double rx, double ry, double rz, double *data, int /* define a macro to give us data(x,y,z) on the grid, in row-major order (the order used by HDF5): */ #define IDX(x, y, z) (((x)*ny + (y)) * nz + (z)) * stride -#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride]) -#define U(x, y, z) (udata[(((x)*ny + (y)) * nz + (z)) * stride]) +#define D(x, y, z) (data[IDX(x, y, z)]) +#define U(x, y, z) (udata[IDX(x, y, z)]) u = (((U(x1, y1, z1) * (1.0 - dx) + U(x2, y1, z1) * dx) * (1.0 - dy) + (U(x1, y2, z1) * (1.0 - dx) + U(x2, y2, z1) * dx) * dy) * @@ -2731,7 +2969,7 @@ in row-major order (the order used by HDF5): */ dz); if (ukind == material_data::U_MIN && u != uval) return; // TODO look into this - if (ukind == material_data::U_PROD) scaleby *= uval / u; + if (ukind == material_data::U_PROD) scaleby *= uval.rpart() / u.rpart(); for (int xi = 0; xi < lx; xi++) { for (int yi = 0; yi < ly; yi++) { @@ -2757,7 +2995,7 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge geom_box_tree tp; int oi, ois; material_data *mg, *mg_sum; - double uval; + duals::duald uval; int kind; tp = geom_tree_search(p, geps->geometry_tree, &oi); @@ -2804,13 +3042,12 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge it's up to the user to only have one unique design grid in this volume.*/ vector3 sz = mg->grid_size; double *vcur = v; - double *ucur = mg->weights; uval = tanh_projection(matgrid_val(p, tp, oi, mg), mg->beta, mg->eta); do { vector3 pb = to_geom_box_coords(p, &tp->objects[oi]); - add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, ucur, kind, - uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq, - gv, tol); + add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, mg->weights, + kind, uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, + freq, gv, tol); if (kind == material_data::U_DEFAULT) break; tp = geom_tree_search_next(p, tp, &oi); } while (tp && is_material_grid((material_data *)tp->objects[oi].o->material)); @@ -2820,10 +3057,10 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge map_lattice_coordinates(p.x, p.y, p.z); vector3 sz = mg->grid_size; double *vcur = v; - double *ucur = mg->weights; uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta); - add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, ucur, kind, uval, - vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq, gv, tol); + add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, mg->weights, kind, + uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq, gv, + tol); } } diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp index ef77c74ac..d5f0f6ef1 100644 --- a/src/meepgeom.hpp +++ b/src/meepgeom.hpp @@ -153,9 +153,71 @@ inline vector3 make_vector3(double x = 0.0, double y = 0.0, double z = 0.0) { return v; } -typedef struct { - double m00, m01, m02, m11, m12, m22; -} symm_matrix; +inline cvector3 cvector3_scale(number s, cvector3 v) { + return make_cvector3(vector3_scale(s, cvector3_re(v)), vector3_scale(s, cvector3_im(v))); +} + +inline cvector3 cvector_zero() { return make_cvector3(make_vector3(), make_vector3()); } + +inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2) { + return make_cvector3(vector3_plus(cvector3_re(cv1), cvector3_re(cv2)), + vector3_plus(cvector3_im(cv1), cvector3_im(cv2))); +} + +struct symm_matrix { + duals::duald m00, m01, m02, m11, m12, m22; + struct symm_matrix &operator+=(const symm_matrix &rhs) { + m00 += rhs.m00; + m11 += rhs.m11; + m22 += rhs.m22; + m01 += rhs.m01; + m02 += rhs.m02; + m12 += rhs.m12; + return *this; + } + struct symm_matrix &operator+=(const duals::duald &k) { + m00 += k; + m11 += k; + m22 += k; + return *this; + } + struct symm_matrix &operator+(const symm_matrix &rhs) { + m00 += rhs.m00; + m11 += rhs.m11; + m22 += rhs.m22; + m01 += rhs.m01; + m02 += rhs.m02; + m12 += rhs.m12; + return *this; + } + struct symm_matrix &operator*(const duals::duald &k) { + m00 *= k; + m11 *= k; + m22 *= k; + m01 *= k; + m02 *= k; + m12 *= k; + return *this; + } + struct symm_matrix &operator/(const duals::duald &k) { + m00 /= k; + m11 /= k; + m22 /= k; + m01 /= k; + m02 /= k; + m12 /= k; + return *this; + } + struct symm_matrix &operator-() { + m00 = -m00; + m11 = -m11; + m22 = -m22; + m01 = -m01; + m02 = -m02; + m12 = -m12; + return *this; + } +}; struct pol { meep_geom::susceptibility user_s; @@ -173,6 +235,7 @@ class geom_epsilon : public meep::material_function { public: double u_p = 0; + bool use_anisotropic_averaging; geom_box_tree geometry_tree; geom_box_tree restricted_tree; geometric_object_list geometry; @@ -206,6 +269,8 @@ class geom_epsilon : public meep::material_function { virtual double chi1p1(meep::field_type ft, const meep::vec &r); virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v, double tol, int maxeval); + void eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v, + double tol, int maxeval, bool needs_grad); void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, const meep::volume &v, double tol, int maxeval, bool &fallback); @@ -221,14 +286,18 @@ class geom_epsilon : public meep::material_function { private: material_type_list extra_materials; + void eval_material_pt(material_type &material, vector3 p); pol *current_pol; }; void set_dimensions(int dims); + geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, vector3 center = make_vector3(), bool ensure_periodicity = false, material_type _default_material = vacuum, - material_type_list extra_materials = material_type_list()); + material_type_list extra_materials = material_type_list(), + bool use_anisotropic_averaging = false); + //, geometric_object_list g, material_type_list extra_materials void set_materials_from_geometry(meep::structure *s, geometric_object_list g, vector3 center = make_vector3(), @@ -266,8 +335,6 @@ bool is_material_grid(material_type mt); bool is_material_grid(void *md); bool is_variable(material_type mt); bool is_variable(void *md); -bool is_file(material_type md); -bool is_file(void *md); bool is_medium(material_type md, medium_struct **m); bool is_medium(void *md, medium_struct **m); bool is_metal(meep::field_type ft, const material_type *material); @@ -284,10 +351,8 @@ void init_libctl(material_type default_mat, bool ensure_per, meep::grid_volume * // material grid functions /***************************************************************/ void update_weights(material_type matgrid, double *weights); -meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md); -meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o); -double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md); -double material_grid_val(vector3 p, material_data *md); +duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md); +duals::duald material_grid_val(vector3 p, material_data *md); geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g); void material_grids_addgradient(double *v, size_t ng, size_t nf, std::vector fields_a, diff --git a/src/support/Makefile.am b/src/support/Makefile.am index f65ba7ddb..603b0c461 100644 --- a/src/support/Makefile.am +++ b/src/support/Makefile.am @@ -1,3 +1,3 @@ noinst_LTLIBRARIES = libsupport.la -libsupport_la_SOURCES = mt19937ar.c meep_mt.h +libsupport_la_SOURCES = mt19937ar.c meep_mt.h dual.hpp diff --git a/src/support/dual.hpp b/src/support/dual.hpp new file mode 100644 index 000000000..8c18d8714 --- /dev/null +++ b/src/support/dual.hpp @@ -0,0 +1,1459 @@ +//===-- duals/dual - Dual number class --------------------------*- C++ -*-===// +// +// Part of the cppduals project. +// https://tesch1.gitlab.io/cppduals +// +// (c)2019 Michael Tesch. tesch1@gmail.com +// +// See https://gitlab.com/tesch1/cppduals/blob/master/LICENSE.txt for +// license information. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef CPPDUALS_DUAL +#define CPPDUALS_DUAL + +#ifndef PARSED_BY_DOXYGEN +#include +#include +#include +#include +#include +#include +#include +#endif + +#if !defined(CPPDUALS_IGNORE_COMPILER_VERSION) && !defined(_WIN32) +#if __cplusplus < 201103L +#error CPPDUALS needs at least a C++11 compliant compiler +#endif +#endif + +/// Configure whether system has POSIX extern int signgam; +#ifndef CPPDUALS_HAVE_SIGNGAM +#ifndef _WIN32 +#define CPPDUALS_HAVE_SIGNGAM 1 +#endif +#endif +namespace duals { + +/** +\file dual +\brief Dual number class. + +\mainpage cppduals + +\ref duals/dual is a single-header [Dual +number](https://en.wikipedia.org/wiki/Dual_number) C++ template +library that implements numbers of the type \f$(a + b \cdot +\epsilon)\f$ where \f$ \epsilon \ne 0\f$, and \f$\epsilon^2 = 0\f$. + +`duals::dual<>` can be used for "automatic" differentiation, it can +also recursively nest with itself for higher orders of differentiation +(ie for the second derivative use `duals::dual>`). It +can also be used to differentiate parts of complex functions as +`std::complex>`. This file can be used stand-alone for +dual number support, but for Eigen vectorization support rather the +file \ref duals/dual_eigen should be included. + +``` +#include + +using namespace duals::literals; + +template T f(T x) { return pow(x,pow(x,x)); } +template T df(T x) { return pow(x,-1. + x + pow(x,x)) * (1. + x*log(x) + +x*pow(log(x),2.)); } template T ddf(T x) { return (pow(x,pow(x,x)) * pow(pow(x,x - 1.) + +pow(x,x)*log(x)*(log(x) + 1.), 2.) + pow(x,pow(x,x)) * (pow(x,x - 1.) * log(x) + pow(x,x - 1.) * +(log(x) + 1.) + pow(x,x - 1.) * ((x - 1.)/x + log(x)) + pow(x,x) * log(x) * pow(log(x) + 1., 2.) )); +} + +int main() +{ + std::cout << " f(2.) = " << f(2.) << "\n"; + std::cout << " df(2.) = " << df(2.) << "\n"; + std::cout << "ddf(2.) = " << ddf(2.) << "\n"; + std::cout << " f(2+1_e) = " << f(2+1_e) << "\n"; + std::cout << " f(2+1_e).dpart() = " << f(2+1_e).dpart() << "\n"; + duals::hyperduald x(2+1_e,1+0_e); + std::cout << " f((2+1_e) + (1+0_e)_e).dpart().dpart() = " << f(x).dpart().dpart() << "\n"; +} +``` +Produces (notice the derivative in the dual-part): +``` + f(2.) = 16 + df(2.) = 107.11 +ddf(2.) = 958.755 + f(2+1_e) = (16+107.11_e) + f(2+1_e).dpart() = 107.11 + f((2+1_e) + (1+0_e)_e).dpart().dpart() = 958.755 +``` + +How this works can be seen by inspecting the infinite [Taylor +series](https://en.wikipedia.org/wiki/Taylor_series) expansion of a +function \f$ f(x) \f$ at \f$ a \f$ with \f$ x = a + b\epsilon \f$. +The series truncates itself due to the property \f$ \epsilon^2 = 0 +\f$, leaving the function's derivative at \f$ a \f$ in the "dual part" +of the result (when \f$ b = 1 \f$): + +\f[ +\begin{split} +f(a + b \epsilon) &= f(a) + f'(a)(b \epsilon) + \frac{f''(a)}{2!}(b \epsilon)^2 ++ \frac{f'''(a)}{3!}(b \epsilon)^3 + \ldots \\ \ +&= f(a) + f'(a)(b \epsilon) +\end{split} +\f] + +The class is contained in a single c++11 header `#include +`, for extended [Eigen](http://eigen.tuxfamily.org) +support, instead include the header \ref duals/dual_eigen "#include ". + +Type X in the templates below can be any value which can be +assigned to value_type. + +Type X also indicates a limitation to dual numbers of the same depth +but (possibly) different value_type as `duals::dual`. For example, +you can assign (or add/sub/mul/div) `duals::dual` and +`duals::dual`, but you can not assign +`duals::dual>` to `duals::dual`. + +Here is a synopsis of the class: + +``` +namespace duals { + +template class dual { + + typedef T value_type; + + dual(const & re = T(), const & du = T()); + dual(const dual &); + template dual(const dual &); + + T rpart() const; + T dpart() const; + + void rpart(T); + void dpart(T); + + dual operator-() const; + dual operator+() const; + + dual & operator= (const T &); + dual & operator+=(const T &); + dual & operator-=(const T &); + dual & operator*=(const T &); + dual & operator/=(const T &); + + dual & operator=(const dual &); + template dual & operator= (const dual &); + template dual & operator+=(const dual &); + template dual & operator-=(const dual &); + template dual & operator*=(const dual &); + template dual & operator/=(const dual &); + + // The comparison operators are not strictly well-defined, + // they are implemented as comparison of the real part. + + bool operator ==(const X &b) const; + bool operator !=(const X &b) const; + + bool operator <(const X &b) const; + bool operator >(const X &b) const; + bool operator <=(const X &b) const; + bool operator >=(const X &b) const; + bool operator <(const dual &b) const; + bool operator >(const dual &b) const; + bool operator <=(const dual &b) const; + bool operator >=(const dual &b) const; + +}; + +// Non-member functions: + +T rpart(dual) // Real part +T dpart(dual) // Dual part +dual dconj(dual) // Dual-conjugate + +dual random(dual low = {0,0}, dual high = {1,0}) + +dual exp(dual) +dual log(dual) +dual log10(dual) +dual pow(dual, U) +dual pow(U, dual) +dual pow(dual, dual) +dual sqrt(dual) +dual cbrt(dual) +dual sin(dual) +dual cos(dual) +dual tan(dual) +dual asin(dual) +dual acos(dual) +dual atan(dual) + +// TODO: +dual sinh(dual) +dual cosh(dual) +dual tanh(dual) +dual asinh(dual) +dual acosh(dual) +dual atanh(dual) + +// Non-differentiable operations on the real part. +T frexp(duals::dual arg, int* exp ); +duals::dual ldexp(duals::dual arg, int exp ); +T trunc(duals::dual d); +T floor(duals::dual d); +T ceil(duals::dual d); +T round(duals::dual d); +int fpclassify(duals::dual d); +bool isfinite(duals::dual d); +bool isnormal(duals::dual d); +bool isinf(duals::dual d); +bool isnan(duals::dual d); + +// Stream IO +template operator>>(basic_istream &, dual &); +template operator<<(basic_ostream &, const dual &); + +} +``` + +Some useful typedefs: + +``` +typedef dual dualf; +typedef dual duald; +typedef dual dualld; +typedef dual hyperdualf; +typedef dual hyperduald; +typedef dual hyperdualld; +typedef std::complex cdualf; +typedef std::complex cduald; +typedef std::complex cdualld; +``` + +There are also literals for dual parts defined in the inline +namespace duals::literals. They are available with `using +namespace duals;` or `using namespace duals::literals`. + +``` +using namespace duals::literals; +dualf x = 3 + 4_ef; +duald y = 3 + 4_e; +dualld z = 3 + 4_el; +``` + +And in case you dislike iostreams, there are some formatters for the +[`{fmt}`](https://github.com/fmtlib/fmt) formatting library. These +are disabled by default, but can be enabled by `#define +CPPDUALS_LIBFMT` for the `dual<>` formatter, and/or `#define +CPPDUALS_LIBFMT_COMPLEX` for the std::complex<> formatter. There are +three custom formatting flags that control how the dual numbers are +printed, these must come before the normal `{fmt}` formatting spec: +'$', '*', ','. For me these are about 3x faster than iostreams. + +``` +#define CPPDUALS_LIBFMT +#define CPPDUALS_LIBFMT_COMPLEX +#inlcude +using namespace duals::literals; + ... + string s = fmt::format("{:}", 1 + 2_e); // s = "(1.0+2.0_e)" + string s = fmt::format("{:g}", 1 + 2_e); // s = "(1+2_e)" + string s = fmt::format("{:*}", 1 + 2_e); // s = "(1.0+2.0*e)" + string s = fmt::format("{:,}", 1 + 2_e); // s = "(1.0,2.0)" + string s = fmt::format("{:*,g}", complexd(1,2_e)); // s = "((1)+(0,2)*i)" +``` + +The "archaic Greek epsilon" logo is from [Wikimedia +commons](https://commons.wikimedia.org/wiki/File:Greek_Epsilon_archaic.svg) + +Some casual reading material: + +- [ON QUATERNIONS, William Rowan Hamilton, Proceedings of the Royal Irish Academy, 3 (1847), pp. +1–16.](https://www.maths.tcd.ie/pub/HistMath/People/Hamilton/Quatern2/) +- [Basic Space-Time Transformations Expressed by Means of Two-Component Number +Systems](https://doi.org/10.12693%2Faphyspola.86.291) + + */ + +#ifndef PARSED_BY_DOXYGEN +template class dual; +#endif + +/// Check if T is dual<>, match non-duals. +template struct is_dual : std::false_type {}; + +#ifndef PARSED_BY_DOXYGEN + +/// Check if T is dual<>, match dual<>. +template struct is_dual > : std::true_type {}; + +#endif + +/// Check if T is std::complex<>, match non- std::complex<>. +template struct is_complex : std::false_type {}; + +#ifndef PARSED_BY_DOXYGEN + +/// Check if T is std::complex<>, match std::complex<>. +template struct is_complex > : std::true_type {}; + +#endif + +/// Dual_traits helper class. +template struct dual_traits { + /// Depth of T - for T=scalar this is 0. for dual_traits it + /// is 1. + enum { depth = 0 }; // -Wenum-compare + + /// The real storage type. + typedef T real_type; +}; + +#ifndef PARSED_BY_DOXYGEN + +/// dual_traits for dual<> types +template struct dual_traits > { + /// Depth to which this dual<> type is nested. One (1) is a + /// first-level dual, whereas non-duals have a depth of 0. + enum { depth = dual_traits::depth + 1 }; + + /// The real storage type. + typedef typename dual_traits::real_type real_type; +}; + +template struct dual_traits > > { + /// complex> have the same 'depth' as their dual. + enum { depth = dual_traits::depth }; + + /// The real storage type. + typedef typename dual_traits::real_type real_type; +}; + +namespace detail { + +template struct Void { typedef void type; }; +template struct has_member_type : std::false_type {}; +template +struct has_member_type::type> : std::true_type { + struct wrap { + typedef typename T::type type; + typedef typename T::type ReturnType; + }; +}; + +} // namespace detail + +/// Promote two types - default according to common_type +template struct promote : std::common_type {}; + +/// Can types A and B be promoted to a common type? +template using can_promote = detail::has_member_type >; + +template +struct promote< + dual, dual, + typename std::enable_if<(can_promote::value && + (int)dual_traits::depth == (int)dual_traits::depth)>::type> { + typedef dual::type> type; +}; +template +struct promote, U, + typename std::enable_if<(can_promote::value && + (int)dual_traits::depth >= (int)dual_traits::depth && + !is_complex::value)>::type> { + typedef dual::type> type; +}; +template +struct promote, + typename std::enable_if<(can_promote::value && + (int)dual_traits::depth >= (int)dual_traits::depth && + !is_complex::value)>::type> { + typedef dual::type> type; +}; +// ///////////////////////////////////////////////// +template +struct promote, std::complex, + typename std::enable_if<(can_promote::value && + (is_dual::value || is_dual::value))>::type> { + typedef std::complex::type> type; +}; +template +struct promote< + std::complex, U, + typename std::enable_if<(can_promote::value && (is_dual::value || is_dual::value) && + !is_complex::value)>::type> { + typedef std::complex::type> type; +}; +template +struct promote< + U, std::complex, + typename std::enable_if<(can_promote::value && (is_dual::value || is_dual::value) && + !is_complex::value)>::type> { + typedef std::complex::type> type; +}; +// ///////////////////////////////////////////////// + +#endif // PARSED_BY_DOXYGEN + +} // namespace duals + +#ifndef PARSED_BY_DOXYGEN + +#define NOMACRO // thwart macroification +#ifdef EIGEN_PI +#define MY_PI EIGEN_PI +#else +#define MY_PI M_PI +#endif + +#endif // PARSED_BY_DOXYGEN + +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#endif + +#ifdef CPPDUALS_ENABLE_STD_IS_ARITHMETIC + +/// Duals are as arithmetic as their value_type is arithmetic. +template struct is_arithmetic > : is_arithmetic {}; + +#endif // CPPDUALS_ENABLE_IS_ARITHMETIC + +/// Duals are compound types. +template struct is_compound > : true_type {}; + +// Modification of std::numeric_limits<> per +// C++03 17.4.3.1/1, and C++11 18.3.2.3/1. +template struct numeric_limits > : numeric_limits { + static constexpr bool is_specialized = true; + static constexpr duals::dual min NOMACRO() { return numeric_limits::min NOMACRO(); } + static constexpr duals::dual lowest() { return numeric_limits::lowest(); } + static constexpr duals::dual max NOMACRO() { return numeric_limits::max NOMACRO(); } + static constexpr duals::dual epsilon() { return numeric_limits::epsilon(); } + static constexpr duals::dual round_error() { return numeric_limits::round_error(); } + static constexpr duals::dual infinity() { return numeric_limits::infinity(); } + static constexpr duals::dual quiet_NaN() { return numeric_limits::quiet_NaN(); } + static constexpr duals::dual signaling_NaN() { return numeric_limits::signaling_NaN(); } + static constexpr duals::dual denorm_min() { return numeric_limits::denorm_min(); } +}; + +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +} // namespace std +#endif + +namespace duals { + +#ifndef PARSED_BY_DOXYGEN + +// T and X are wrapped in a dual<> +#define CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X) \ + typename std::enable_if<(int)duals::dual_traits::depth == (int)duals::dual_traits::depth, \ + int>::type = 0, \ + typename std::enable_if::value, int>::type = 0 + +// Both T and U are wrapped in a dual<> +#define CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T, U) \ + typename std::enable_if<(int)duals::dual_traits::depth == (int)duals::dual_traits::depth, \ + int>::type = 0, \ + typename std::enable_if::value, int>::type = 0, \ + typename common_t = dual::type> + +// T is wrapped in a dual<>, U may or may not be. +#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U) \ + typename std::enable_if<((int)duals::dual_traits::depth >= \ + (int)duals::dual_traits::depth), \ + int>::type = 0, \ + typename std::enable_if, U>::value, int>::type = 0, \ + typename common_t = typename duals::promote, U>::type + +// T is wrapped in complex> +#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T, U) \ + typename std::enable_if< \ + ((int)duals::dual_traits::depth >= (int)duals::dual_traits::depth), \ + int>::type = 0, \ + typename std::enable_if >, U>::value, int>::type = 0, \ + typename common_t = typename duals::promote >, U>::type + +#define CPPDUALS_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0 + +#endif + +/*! \page user Background + TODO: Add text here... +*/ + +/// Abstract dual number class. Can nest with other dual numbers and +/// complex numbers. +template class dual { +public: + /// Class type of rpart() and dpart(). This type can be nested + /// dual<> or std::complex<>. + typedef T value_type; + +private: + /// The real part. + value_type _real; + /// The dual part. + value_type _dual; + +public: + /// Construct dual from optional real and dual parts. + constexpr dual(const value_type re = value_type(), const value_type du = value_type()) + : _real(re), _dual(du) {} + + /// Copy construct from a dual of equal depth. + template ::value)> + dual(const dual &x) : _real(x.rpart()), _dual(x.dpart()) {} + + /// Cast to a complex> with real part equal to *this. + operator std::complex >() { return std::complex >(*this); } + + /// Explicit cast to an arithmetic type retains the rpart() + template ::value && !is_dual::value)> + explicit operator X() const { + return X(_real); + } + + /// Get the real part. + T rpart() const { return _real; } + + /// Get the dual part. + T dpart() const { return _dual; } + + /// Set the real part. + void rpart(value_type re) { _real = re; } + + /// Get the dual part. + void dpart(value_type du) { _dual = du; } + + /// Unary negation + dual operator-() const { return dual(-_real, -_dual); } + + /// Unary nothing + dual operator+() const { return *this; } + + /// Assignment of `value_type` assigns the real part and zeros the dual part. + dual &operator=(const T &x) { + _real = x; + _dual = value_type(); + return *this; + } + + /// Add a relatively-scalar to this dual. + dual &operator+=(const T &x) { + _real += x; + return *this; + } + + /// Subtract a relatively-scalar from this dual. + dual &operator-=(const T &x) { + _real -= x; + return *this; + } + + /// Multiply a relatively-scalar with this dual. + dual &operator*=(const T &x) { + _real *= x; + _dual *= x; + return *this; + } + + /// Divide this dual by relatively-scalar. + dual &operator/=(const T &x) { + _real /= x; + _dual /= x; + return *this; + } + + // dua & operator=(const dual & x) { _real = x.rpart(); _dual = x.dpart(); } + template dual &operator=(const dual &x) { + _real = x.rpart(); + _dual = x.dpart(); + return *this; + } + + /// Add a dual of the same depth to this dual. + template dual &operator+=(const dual &x) { + _real += x.rpart(); + _dual += x.dpart(); + return *this; + } + + /// Subtract a dual of the same depth from this dual. + template dual &operator-=(const dual &x) { + _real -= x.rpart(); + _dual -= x.dpart(); + return *this; + } + + /// Multiply this dual with a dual of same depth. + template dual &operator*=(const dual &x) { + _dual = _real * x.dpart() + _dual * x.rpart(); + _real = _real * x.rpart(); + return *this; + } + + /// Divide this dual by another dual of the same or lower depth. + template dual &operator/=(const dual &x) { + _dual = (_dual * x.rpart() - _real * x.dpart()) / (x.rpart() * x.rpart()); + _real = _real / x.rpart(); + return *this; + } + + // The following comparison operators are not strictly well-defined, + // they are implemented as comparison of the real parts. +#if 0 + /// Compare this dual with another dual, comparing real parts for equality. + template + bool operator ==(const dual &b) const { return _real == b.rpart(); } + + /// Compare this dual with another dual, comparing real parts for inequality. + template + bool operator !=(const dual &b) const { return _real != b.rpart(); } + + /// Compare real part against real part of b + template + bool operator <(const dual &b) const { return _real < b.rpart(); } + + /// Compare real part against real part of b + template + bool operator >(const dual &b) const { return _real > b.rpart(); } + + /// Compare real part against real part of b + template + bool operator <=(const dual &b) const { return _real <= b.rpart(); } + + /// Compare real part against real part of b + template + bool operator >=(const dual &b) const { return _real >= b.rpart(); } +#endif +}; + +/// Get the dual's real part. +template T rpart(const dual &x) { return x.rpart(); } + +/// Get the dual's dual part. +template T dpart(const dual &x) { return x.dpart(); } + +/// R-part of complex> is non-dual complex<> (not to be confused with real()) +template std::complex rpart(const std::complex > &x) { + return std::complex(x.real().rpart(), x.imag().rpart()); +} + +/// Dual part of complex> is complex<> +template std::complex dpart(const std::complex > &x) { + return std::complex(x.real().dpart(), x.imag().dpart()); +} + +/// Get a non-dual's real part. +template ::value && !std::is_compound::value) || + is_complex::value)> +T rpart(const T &x) { + return x; +} + +/// Get a non-dual's dual part := zero. +template ::value && !std::is_compound::value) || + is_complex::value)> +T dpart(const T &) { + return T(0); +} + +#ifndef PARSED_BY_DOXYGEN + +/// Dual +-*/ ops with another entity +#define CPPDUALS_BINARY_OP(op) \ + template \ + common_t operator op(const dual &z, const dual &w) { \ + common_t x(z); \ + return x op## = w; \ + } \ + template > >::value)> \ + common_t operator op(const dual &z, const U &w) { \ + common_t x(z); \ + return x op## = w; \ + } \ + template > >::value)> \ + common_t operator op(const U &z, const dual &w) { \ + common_t x(z); \ + return x op## = w; \ + } \ + template > >::value)> \ + common_t operator op(const std::complex > &z, const U &w) { \ + common_t x(z); \ + return x op## = w; \ + } \ + template > >::value)> \ + common_t operator op(const U &z, const std::complex > &w) { \ + common_t x(z); \ + return x op## = w; \ + } + +CPPDUALS_BINARY_OP(+) +CPPDUALS_BINARY_OP(-) +CPPDUALS_BINARY_OP(*) +CPPDUALS_BINARY_OP(/) + +/// Dual compared to a non-complex lower rank thing +#define CPPDUALS_LHS_COMPARISON(op) \ + template \ + bool operator op(const dual &a, const dual &b) { \ + return a.rpart() op b.rpart(); \ + } \ + template ::value)> \ + bool operator op(const U &a, const dual &b) { \ + return a op b.rpart(); \ + } \ + template ::value)> \ + bool operator op(const dual &a, const U &b) { \ + return a.rpart() op b; \ + } + +CPPDUALS_LHS_COMPARISON(<) +CPPDUALS_LHS_COMPARISON(>) +CPPDUALS_LHS_COMPARISON(<=) +CPPDUALS_LHS_COMPARISON(>=) +CPPDUALS_LHS_COMPARISON(==) +CPPDUALS_LHS_COMPARISON(!=) + +#endif // PARSED_BY_DOXYGEN + +} // namespace duals + +// some useful functions of solely the real part +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#endif + +#ifndef PARSED_BY_DOXYGEN + +#define make_math(T) \ + inline T(frexp)(const duals::dual &arg, int *exp) { return (frexp)(arg.rpart(), exp); } \ + inline duals::dual(ldexp)(const duals::dual &arg, int exp) { \ + return arg * std::pow((T)2, exp); \ + } \ + inline T(trunc)(const duals::dual &d) { return (trunc)(d.rpart()); } \ + inline T(floor)(const duals::dual &d) { return (floor)(d.rpart()); } \ + inline T(ceil)(const duals::dual &d) { return (ceil)(d.rpart()); } \ + inline T(round)(const duals::dual &d) { return (round)(d.rpart()); } \ + inline int(fpclassify)(const duals::dual &d) { return (fpclassify)(d.rpart()); } \ + inline bool(isfinite)(const duals::dual &d) { return (isfinite)(d.rpart()); } \ + inline bool(isnormal)(const duals::dual &d) { return (isnormal)(d.rpart()); } \ + inline bool(isinf)(const duals::dual &d) { return (isinf)(d.rpart()); } \ + inline bool(isnan)(const duals::dual &d) { return (isnan)(d.rpart()); } + +make_math(float) make_math(double) make_math(long double) + +#undef make_math + +#endif // PARSED_BY_DOXYGEN + +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD + _LIBCPP_END_NAMESPACE_STD +#else +} // namespace std +#endif + namespace duals { + + namespace randos { + + // Random real value between a and b. + template , + CPPDUALS_ENABLE_IF(!is_complex::value && !is_dual::value)> + T random(T a = T(0), T b = T(1)) { + static std::default_random_engine generator; + dist distribution(a, b); + return distribution(generator); + } + + template , + CPPDUALS_ENABLE_IF(!is_complex::value && !is_dual::value)> + T random2(T a = T(0), T b = T(1)) { + return random(a, b); + } + + // Helper class for testing - also random value in dual part. + template ::value)> + DT random2(DT a = DT(0, 0), DT b = DT(1, 1)) { + using randos::random; + return DT(a.rpart() + random2() * (b.rpart() - a.rpart()), + a.dpart() + random2() * (b.dpart() - a.dpart())); + } + + // Helper class for testing - also random value in dual part of the complex. + template ::value)> + CT random2(CT a = CT(0, 0), CT b = CT(1, 1)) { + return CT(a.real() + random2() * (b.real() - a.real()), + a.imag() + random2() * (b.imag() - a.imag())); + } + + } // namespace randos + + /// Random real and dual parts, used by Eigen's Random(), by default + // the returned value has zero dual part and is in the range [0+0_e, + // 1+0_e]. + template , + CPPDUALS_ENABLE_IF(is_dual
::value)> + DT random(DT a = DT(0, 0), DT b = DT(1, 0)) { + using randos::random; + return DT(random(a.rpart(), b.rpart()), + random(a.dpart(), b.dpart())); + } + + /// Complex Conjugate of a dual is just the dual. + template dual conj(const dual &x) { return x; } + + /// Conjugate a thing that's not dual and not complex -- it has no + /// complex value so just return it. This is different from + /// std::conj() which promotes the T to a std::complex. + template ::value && !is_complex::value && + std::is_arithmetic::value)> + T conj(const T &x) { + return x; + } + + /// Dual Conjugate, such that x*dconj(x) = rpart(x)^2. Different + /// function name from complex conjugate conj(). + template dual dconj(const dual &x) { return dual(x.rpart(), -x.dpart()); } + + /// Dual Conjugate of a complex, such that x*dconj(x) = rpart(x)^2. + /// Different function name from complex conjugate conj(). + template std::complex dconj(const std::complex &x) { + return std::complex(dconj(x.real()), dconj(x.imag())); + } + + /// Conjugate a thing that's not dual and not complex. + template ::value && !is_complex::value && + std::is_arithmetic::value)> + T dconj(const T &x) { + return x; + } + + /// Exponential e^x + template dual exp(const dual &x) { + using std::exp; + T v = exp(x.rpart()); + return dual(v, v * x.dpart()); + } + + /// Natural log ln(x) + template dual log(const dual &x) { + using std::log; + T v = log(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, x.dpart() / x.rpart()); + } + + template dual log10(const dual &x) { + using std::log; + return log(x) / log(static_cast(10)); + } + + template dual log2(const dual &x) { + using std::log; + return log(x) / log(static_cast(2)); + } + + template + common_t pow(const dual &x, const U &y) { + using std::pow; + typedef typename common_t::value_type V; + + return common_t(pow(x.rpart(), y), + x.dpart() == V(0) ? V(0) : x.dpart() * y * pow(x.rpart(), y - U(1))); + } + + template + common_t pow(const U &x, const dual &y) { + return pow(common_t(x), y); + } + + template + common_t pow(const dual &f, const dual &g) { + using std::log; + using std::pow; +#if 1 + using std::floor; + typedef typename common_t::value_type V; + common_t result; + + if (f.rpart() == T(0) && g.rpart() >= U(1)) { + if (g.rpart() > U(1)) { result = common_t(0); } + else { result = f; } + } + else { + if (f.rpart() < T(0) && g.rpart() == floor(g.rpart())) { + V const tmp = g.rpart() * pow(f.rpart(), g.rpart() - U(1.0)); + result = common_t(pow(f.rpart(), g.rpart()), f.dpart() == T(0) ? T(0) : f.dpart() * tmp); + if (g.dpart() != U(0.0)) { + // Return a NaN when g.dpart() != 0. + result.dpart(std::numeric_limits::quiet_NaN()); + } + } + else { + // Handle the remaining cases. For cases 4,5,6,9 we allow the log() + // function to generate -HUGE_VAL or NaN, since those cases result in a + // nonfinite derivative. + V const tmp1 = pow(f.rpart(), g.rpart()); + V const tmp2 = g.rpart() * pow(f.rpart(), g.rpart() - T(1.0)); + V const tmp3 = tmp1 * log(f.rpart()); + result = common_t(tmp1, f.dpart() == T(0) && g.dpart() == U(0) + ? V(0) + : tmp2 * f.dpart() + tmp3 * g.dpart()); + } + } + + return result; +#else + T v = pow(f.rpart(), g.rpart()); + return common_t(v, pow(f.rpart(), g.rpart() - T(1)) * + (g.rpart() * f.dpart() + f.rpart() * log(f.rpart()) * g.dpart())); +#endif + } + + namespace utils { + template int sgn(T val) { return (T(0) < val) - (val < T(0)); } + } // namespace utils + + template dual abs(const dual &x) { + using std::abs; + return dual(abs(x.rpart()), x.dpart() * utils::sgn(x.rpart())); + } + + template dual fabs(const dual &x) { + using std::fabs; + return dual(fabs(x.rpart()), x.dpart() * utils::sgn(x.rpart())); + } + +#if 0 +// TODO? +template dual abs2(const dual & x) { + using std::abs; + return dual(x.rpart() * x.rpart(), + xxx x.dpart() * utils::sgn(x.rpart())); +} +#endif + + template duals::dual copysign(const duals::dual &x, const duals::dual &y) { + using std::copysign; + T r = copysign(x.rpart(), y.rpart()); + return duals::dual(r, r == x.rpart() ? x.dpart() : -x.dpart()); + } + + template duals::dual hypot(const duals::dual &x, const duals::dual &y) { + return sqrt(x * x + y * y); + } + + template duals::dual scalbn(const duals::dual &arg, int ex) { + return arg * std::pow((T)2, ex); + } + + template duals::dual(fmax)(const duals::dual &x, const duals::dual &y) { + return x.rpart() > y.rpart() ? x : y; + } + + template duals::dual(fmin)(const duals::dual &x, const duals::dual &y) { + return x.rpart() <= y.rpart() ? x : y; + } + + template duals::dual logb(const duals::dual &x) { return duals::log2(x); } + + template int(fpclassify)(const duals::dual &d) { + using std::fpclassify; + return (fpclassify)(d.rpart()); + } + template bool(isfinite)(const duals::dual &d) { + using std::isfinite; + return (isfinite)(d.rpart()); + } + template bool(isnormal)(const duals::dual &d) { + using std::isnormal; + return (isnormal)(d.rpart()); + } + template bool(isinf)(const duals::dual &d) { + using std::isinf; + return (isinf)(d.rpart()); + } + template bool(isnan)(const duals::dual &d) { + using std::isnan; + return (isnan)(d.rpart()); + } + template bool(signbit)(const duals::dual &d) { + using std::signbit; + return (signbit)(d.rpart()); + } + + template dual sqrt(const dual &x) { + using std::sqrt; + T v = sqrt(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, x.dpart() / (T(2) * v)); + } + + template dual cbrt(const dual &x) { + using std::cbrt; + T v = cbrt(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, x.dpart() / (T(3) * v * v)); + } + + template dual sin(const dual &x) { + using std::cos; + using std::sin; + return dual(sin(x.rpart()), x.dpart() * cos(x.rpart())); + } + + template dual cos(const dual &x) { + using std::cos; + using std::sin; + return dual(cos(x.rpart()), -sin(x.rpart()) * x.dpart()); + } + + template dual tan(const dual &x) { + using std::tan; + T v = tan(x.rpart()); + return dual(v, x.dpart() * (v * v + 1)); + } + + template dual asin(const dual &x) { + using std::asin; + using std::sqrt; + T v = asin(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, x.dpart() / sqrt(1 - x.rpart() * x.rpart())); + } + + template dual acos(const dual &x) { + using std::acos; + using std::sqrt; + T v = acos(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, -x.dpart() / sqrt(1 - x.rpart() * x.rpart())); + } + + template dual atan(const dual &x) { + using std::atan; + T v = atan(x.rpart()); + if (x.dpart() == T(0)) + return v; + else + return dual(v, x.dpart() / (1 + x.rpart() * x.rpart())); + } + + template dual atan2(const dual &y, const dual &x) { + using std::atan2; + T v = atan2(y.rpart(), x.rpart()); + return dual(v, (x.rpart() * y.dpart() - y.rpart() * x.dpart()) / + (y.rpart() * y.rpart() + x.rpart() * x.rpart())); + } + + // TODO + template + common_t atan2(const dual &y, const U &x) { + using std::atan2; + T v = atan2(y.rpart(), x); + return dual(v, (x * y.dpart()) / (y.rpart() * y.rpart() + x * x)); + } + + // TODO + template + common_t atan2(const U &y, const dual &x) { + using std::atan2; + T v = atan2(y, x.rpart()); + return dual(v, (-y * x.dpart()) / (y * y + x.rpart() * x.rpart())); + } + + // TODO + template dual sinh(const dual &x); + template dual cosh(const dual &x); + template dual tanh(const dual &x); + template dual asinh(const dual &x); + template dual acosh(const dual &x); + template dual atanh(const dual &x); + template dual log1p(const dual &x); + template dual expm1(const dual &x); + + //////////////////////////////////////////////////////////////// + // added for meep forward-diff operations + //////////////////////////////////////////////////////////////// + + /// The tanh function. Make sure to `#include ` before + /// `#include ` to use this function. + template dual tanh(const dual &x) { + using std::tanh; + T sech = 1.0 / std::cosh(x.rpart()); + return dual(tanh(x.rpart()), x.dpart() * sech * sech); + } + + //////////////////////////////////////////////////////////////// + // + //////////////////////////////////////////////////////////////// + + /// The error function. Make sure to `#include ` before + /// `#include ` to use this function. + template dual erf(const dual &x) { + using std::erf; + using std::exp; + using std::pow; + using std::sqrt; + return dual(erf(x.rpart()), x.dpart() * T(2) / sqrt(T(MY_PI)) * exp(-pow(x.rpart(), T(2)))); + } + + /// Error function complement (1 - erf()). + template dual erfc(const dual &x) { + using std::erfc; + using std::exp; + using std::pow; + using std::sqrt; + return dual(erfc(x.rpart()), + x.dpart() * -T(2) / sqrt(T(MY_PI)) * exp(-pow(x.rpart(), T(2)))); + } + + /// Gamma function. Approximation of the dual part. + // TODO specialize for integers + template dual tgamma(const dual &x) { + using std::tgamma; + T v = tgamma(x.rpart()); + if (x.dpart() == T(0)) + return v; + else { + int errno_saved = errno; + T h(T(1) / (1ull << (std::numeric_limits::digits / 3))); + T w((tgamma(x.rpart() + h) - tgamma(x.rpart() - h)) / (2 * h)); + errno = errno_saved; + return dual(v, x.dpart() * w); + } + } + + /// Log of absolute value of gamma function. Approximation of the dual part. + template dual lgamma(const dual &x) { + using std::lgamma; + T v = lgamma(x.rpart()); + if (x.dpart() == T(0)) + return v; + else { +#if CPPDUALS_HAVE_SIGNGAM + int signgam_saved = signgam; +#endif + int errno_saved = errno; + T h(T(1) / (1ull << (std::numeric_limits::digits / 3))); + T w((lgamma(x.rpart() + h) - lgamma(x.rpart() - h)) / (2 * h)); +#if CPPDUALS_HAVE_SIGNGAM + signgam = signgam_saved; +#endif + errno = errno_saved; + return dual(v, x.dpart() * w); + } + } + + /// Putto operator + template + std::basic_ostream<_CharT, _Traits> &operator<<(std::basic_ostream<_CharT, _Traits> &os, + const dual &x) { + std::basic_ostringstream<_CharT, _Traits> s; + s.flags(os.flags()); + s.imbue(os.getloc()); + s.precision(os.precision()); + s << '(' << x.rpart() << (x.dpart() < 0 ? "" : "+") << x.dpart() << "_e" + << (std::is_same::type, float>::value ? "f" + : std::is_same::type, double>::value ? "" + : std::is_same::type, long double>::value ? "l" + : "") + << ")"; + return os << s.str(); + } + + /// Stream reader + template + std::basic_istream &operator>>(std::basic_istream &is, dual &x) { + if (is.good()) { + ws(is); + if (is.peek() == CharT('(')) { + is.get(); + T r; + is >> r; + if (!is.fail()) { + CharT c = is.peek(); + if (c != CharT('_')) { + ws(is); + c = is.peek(); + } + if (c == CharT('+') || c == CharT('-') || c == CharT('_')) { + if (c == CharT('+')) is.get(); + T d; + if (c == CharT('_')) { + d = r; + r = 0; + } + else + is >> d; + if (!is.fail()) { + ws(is); + c = is.peek(); + if (c == CharT('_')) { + is.get(); + c = is.peek(); + if (c == CharT('e')) { + is.get(); + c = is.peek(); + if ((c == 'f' && !std::is_same::type, float>::value) || + (c == 'l' && !std::is_same::type, long double>::value)) + is.setstate(std::ios_base::failbit); + else { + if (c == 'f' || c == 'l') is.get(); + ws(is); + c = is.peek(); + if (c == ')') { + is.get(); + x = dual(r, d); + } + else + is.setstate(std::ios_base::failbit); + } + } + else + is.setstate(std::ios_base::failbit); + } + else + is.setstate(std::ios_base::failbit); + } + else + is.setstate(std::ios_base::failbit); + } + else if (c == CharT(')')) { + is.get(); + x = dual(r, T(0)); + } + else + is.setstate(std::ios_base::failbit); + } + else + is.setstate(std::ios_base::failbit); + } + else { + T r; + is >> r; + if (!is.fail()) + x = dual(r, T(0)); + else + is.setstate(std::ios_base::failbit); + } + } + else + is.setstate(std::ios_base::failbit); + return is; + } + +#if __cpp_user_defined_literals >= 200809 + /// Dual number literals in namespace duals::literals + inline namespace literals { + using duals::dual; + constexpr dual operator"" _ef(long double du) { return {0.0f, static_cast(du)}; } + constexpr dual operator"" _ef(unsigned long long du) { + return {0.0f, static_cast(du)}; + } + constexpr dual operator"" _e(long double du) { return {0.0, static_cast(du)}; } + constexpr dual operator"" _e(unsigned long long du) { + return {0.0, static_cast(du)}; + } + constexpr dual operator"" _el(long double du) { return {0.0l, du}; } + constexpr dual operator"" _el(unsigned long long du) { + return {0.0l, static_cast(du)}; + } + } // namespace literals +#endif + + typedef dual dualf; + typedef dual duald; + typedef dual dualld; + typedef dual hyperdualf; + typedef dual hyperduald; + typedef dual hyperdualld; + typedef std::complex cdualf; + typedef std::complex cduald; + typedef std::complex cdualld; + +} // namespace duals + +#ifdef CPPDUALS_LIBFMT +#include +/// duals::dual<> Formatter for libfmt https://github.com/fmtlib/fmt +/// +/// Formats a dual number (r,d) as (r+d_e), offering the same +/// formatting options as the underlying type - with the addition of +/// three optional format options, only one of which may appear +/// directly after the ':' in the format spec: '$', '*', and ','. The +/// '*' flag changes the separating _ to a *, producing (r+d*e), where +/// r and d are the formatted value_type values. The ',' flag simply +/// prints the real and dual parts separated by a comma. As a +/// concrete exmple, this formatter can produce either (3+5.4_e) or +/// (3+5.4*e) or (3,5.4) for a dual using the specs {:g}, +/// {:*g}, or {:,g}, respectively. When the '*' is NOT specified, the +/// output should be compatible with the input operator>> and the dual +/// literals below. (this implementation is a bit hacky - glad for +/// cleanups). +template +struct fmt::formatter, Char> : public fmt::formatter { + typedef fmt::formatter base; + enum style { expr, star, pair } style_ = expr; + fmt::detail::dynamic_format_specs specs_; + FMT_CONSTEXPR auto parse(format_parse_context &ctx) -> decltype(ctx.begin()) { + using handler_type = fmt::detail::dynamic_specs_handler; + auto type = fmt::detail::type_constant::value; + fmt::detail::specs_checker handler(handler_type(specs_, ctx), type); + auto it = ctx.begin(); + if (it != ctx.end()) switch (*it) { + case '$': + style_ = style::expr; + ctx.advance_to(++it); + break; + case '*': + style_ = style::star; + ctx.advance_to(++it); + break; + case ',': + style_ = style::pair; + ctx.advance_to(++it); + break; + default: break; + } + parse_format_specs(ctx.begin(), ctx.end(), handler); + return base::parse(ctx); + } + template + auto format(const duals::dual &x, FormatCtx &ctx) -> decltype(ctx.out()) { + format_to(ctx.out(), "("); + if (style_ == style::pair) { + base::format(x.rpart(), ctx); + format_to(ctx.out(), ","); + base::format(x.dpart(), ctx); + return format_to(ctx.out(), ")"); + } + if (x.rpart() || !x.dpart()) base::format(x.rpart(), ctx); + if (x.dpart()) { + if (x.rpart() && x.dpart() >= 0 && specs_.sign != sign::plus) format_to(ctx.out(), "+"); + base::format(x.dpart(), ctx); + if (style_ == style::star) + format_to(ctx.out(), "*e"); + else + format_to(ctx.out(), "_e"); + if (std::is_same::type, float>::value) format_to(ctx.out(), "f"); + if (std::is_same::type, long double>::value) format_to(ctx.out(), "l"); + } + return format_to(ctx.out(), ")"); + } +}; +#endif + +#ifdef CPPDUALS_LIBFMT_COMPLEX +#ifndef CPPDUALS_LIBFMT +#include +#endif +/// std::complex<> Formatter for libfmt https://github.com/fmtlib/fmt +/// +/// libfmt does not provide a formatter for std::complex<>, although +/// one is proposed for c++20. Anyway, at the expense of a k or two, +/// you can define CPPDUALS_LIBFMT_COMPLEX and get this one. +/// +/// The standard iostreams formatting of complex numbers is (a,b), +/// where a and b are the real and imaginary parts. This formats a +/// complex number (a+bi) as (a+bi), offering the same formatting +/// options as the underlying type - with the addition of three +/// optional format options, only one of which may appear directly +/// after the ':' in the format spec (before any fill or align): '$' +/// (the default if no flag is specified), '*', and ','. The '*' flag +/// adds a * before the 'i', producing (a+b*i), where a and b are the +/// formatted value_type values. The ',' flag simply prints the real +/// and complex parts separated by a comma (same as iostreams' format). +/// As a concrete exmple, this formatter can produce either (3+5.4i) +/// or (3+5.4*i) or (3,5.4) for a complex using the specs {:g} +/// | {:$g}, {:*g}, or {:,g}, respectively. (this implementation is a +/// bit hacky - glad for cleanups). +/// +template +struct fmt::formatter, Char> : public fmt::formatter { + typedef fmt::formatter base; + enum style { expr, star, pair } style_ = expr; + fmt::detail::dynamic_format_specs specs_; + FMT_CONSTEXPR auto parse(format_parse_context &ctx) -> decltype(ctx.begin()) { + using handler_type = fmt::detail::dynamic_specs_handler; + auto type = fmt::detail::type_constant::value; + fmt::detail::specs_checker handler(handler_type(specs_, ctx), type); + auto it = ctx.begin(); + if (it != ctx.end()) { + switch (*it) { + case '$': + style_ = style::expr; + ctx.advance_to(++it); + break; + case '*': + style_ = style::star; + ctx.advance_to(++it); + break; + case ',': + style_ = style::pair; + ctx.advance_to(++it); + break; + default: break; + } + } + parse_format_specs(ctx.begin(), ctx.end(), handler); + // todo: fixup alignment + return base::parse(ctx); + } + template + auto format(const std::complex &x, FormatCtx &ctx) -> decltype(ctx.out()) { + format_to(ctx.out(), "("); + if (style_ == style::pair) { + base::format(x.real(), ctx); + format_to(ctx.out(), ","); + base::format(x.imag(), ctx); + return format_to(ctx.out(), ")"); + } + if (x.real() || !x.imag()) base::format(x.real(), ctx); + if (x.imag()) { + if (x.real() && x.imag() >= 0 && specs_.sign != sign::plus) format_to(ctx.out(), "+"); + base::format(x.imag(), ctx); + if (style_ == style::star) + format_to(ctx.out(), "*i"); + else + format_to(ctx.out(), "i"); + if (std::is_same::type, float>::value) format_to(ctx.out(), "f"); + if (std::is_same::type, long double>::value) format_to(ctx.out(), "l"); + } + return format_to(ctx.out(), ")"); + } +}; +#endif + +#endif // CPPDUALS_DUAL