24 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
25 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
27 #if defined(GMM_USES_BLAS)
32 #define GA_DEBUG_INFO(a)
39 template <
class VEC1,
class VEC2>
40 inline void copy_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
41 auto it1 = v1.begin();
42 auto it2 = v2.begin(), it2e = v2.end();
45 *it2++ = (*it1++) * a;
46 *it2++ = (*it1++) * a;
47 *it2++ = (*it1++) * a;
48 *it2++ = (*it1++) * a;
51 *it2++ = (*it1++) * a;
54 template <
class VEC1,
class VEC2>
55 inline void add_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
56 auto it1 = v1.begin();
57 auto it2 = v2.begin(), it2e = v2.end();
60 *it2++ += (*it1++) * a;
61 *it2++ += (*it1++) * a;
62 *it2++ += (*it1++) * a;
63 *it2++ += (*it1++) * a;
66 *it2++ += (*it1++) * a;
69 template <
class VEC1,
class VEC2>
70 inline void copy_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
71 auto it1 = v1.begin();
72 auto it2 = v2.begin(), it2e = v2.end();
75 *it2++ = (*it1++) * a;
76 *it2++ = (*it1++) * a;
77 *it2++ = (*it1++) * a;
78 *it2++ = (*it1++) * a;
79 *it2++ = (*it1++) * a;
80 *it2++ = (*it1++) * a;
81 *it2++ = (*it1++) * a;
82 *it2++ = (*it1++) * a;
85 *it2++ = (*it1++) * a;
88 template <
class VEC1,
class VEC2>
89 inline void add_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
90 auto it1 = v1.begin();
91 auto it2 = v2.begin(), it2e = v2.end();
94 *it2++ += (*it1++) * a;
95 *it2++ += (*it1++) * a;
96 *it2++ += (*it1++) * a;
97 *it2++ += (*it1++) * a;
98 *it2++ += (*it1++) * a;
99 *it2++ += (*it1++) * a;
100 *it2++ += (*it1++) * a;
101 *it2++ += (*it1++) * a;
104 *it2++ += (*it1++) * a;
107 bool operator <(
const gauss_pt_corresp &gpc1,
108 const gauss_pt_corresp &gpc2) {
109 if (gpc1.pai != gpc2.pai)
110 return (gpc1.pai < gpc2.pai );
111 if (gpc1.nodes.size() != gpc2.nodes.size())
112 return (gpc1.nodes.size() < gpc2.nodes.size());
113 for (
size_type i = 0; i < gpc1.nodes.size(); ++i)
114 if (gpc1.nodes[i] != gpc2.nodes[i])
115 return (gpc1.nodes[i] < gpc2.nodes[i]);
116 if (gpc1.pgt1 != gpc2.pgt1)
117 return (gpc1.pgt1 < gpc2.pgt1);
118 if (gpc1.pgt2 != gpc2.pgt2)
119 return (gpc1.pgt2 < gpc2.pgt2);
123 bool operator <(
const ga_instruction_set::region_mim &rm1,
124 const ga_instruction_set::region_mim &rm2) {
125 if (rm1.mim() != rm2.mim())
return (rm1.mim() < rm2.mim());
126 if (rm1.region() != rm2.region())
return (rm1.region() < rm2.region());
127 return (rm1.psd() < rm2.psd());
134 struct ga_instruction_extract_local_im_data :
public ga_instruction {
137 papprox_integration &pai;
138 const base_vector &U;
139 const fem_interpolation_context &ctx;
142 GA_DEBUG_INFO(
"Instruction: extract local im data");
146 GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
147 ->approx_method() == pai,
"Im data have to be used only "
148 "on their original integration method.");
150 size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
152 "Im data with no data on the current integration point.");
153 auto it = U.begin()+ipt*qdim;
154 std::copy(it, it+qdim, t.begin());
157 ga_instruction_extract_local_im_data
158 (base_tensor &t_,
const im_data &imd_,
const base_vector &U_,
159 papprox_integration &pai_,
const fem_interpolation_context &ctx_,
161 : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
166 struct ga_instruction_slice_local_dofs :
public ga_instruction {
168 const base_vector &U;
169 const fem_interpolation_context &ctx;
173 GA_DEBUG_INFO(
"Instruction: Slice local dofs");
174 GMM_ASSERT1(qmult1 != 0 && qmult2 != 0,
"Internal error");
176 coeff, qmult1, qmult2);
179 ga_instruction_slice_local_dofs(
const mesh_fem &mf_,
const base_vector &U_,
180 const fem_interpolation_context &ctx_,
183 : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
184 qmult1(qmult1_), qmult2(qmult2_) {}
187 struct ga_instruction_update_pfp :
public ga_instruction {
189 const fem_interpolation_context &ctx;
190 fem_precomp_pool &fp_pool;
194 GA_DEBUG_INFO(
"Instruction: Pfp update");
195 if (ctx.have_pgp()) {
197 ? ctx.convex_num() : mf.convex_index().first_true();
198 pfem pf = mf.fem_of_element(cv);
199 if (!pfp || pf != pfp->get_pfem() ||
200 ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
201 pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
209 ga_instruction_update_pfp(
const mesh_fem &mf_, pfem_precomp &pfp_,
210 const fem_interpolation_context &ctx_,
211 fem_precomp_pool &fp_pool_)
212 : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
215 struct ga_instruction_first_ind_tensor :
public ga_instruction {
217 const fem_interpolation_context &ctx;
219 const mesh_fem *mfn, **mfg;
222 GA_DEBUG_INFO(
"Instruction: adapt first index of tensor");
223 const mesh_fem &mf = *(mfg ? *mfg : mfn);
224 GA_DEBUG_ASSERT(mfg ? *mfg : mfn,
"Internal error");
225 size_type cv_1 = ctx.is_convex_num_valid()
226 ? ctx.convex_num() : mf.convex_index().first_true();
227 pfem pf = mf.fem_of_element(cv_1);
228 GMM_ASSERT1(pf,
"An element without finite element method defined");
229 size_type Qmult = qdim / pf->target_dim();
231 if (t.sizes()[0] != s)
232 { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
236 ga_instruction_first_ind_tensor(base_tensor &t_,
237 const fem_interpolation_context &ctx_,
239 const mesh_fem **mfg_)
240 : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
243 struct ga_instruction_second_ind_tensor
244 :
public ga_instruction_first_ind_tensor {
247 GA_DEBUG_INFO(
"Instruction: adapt second index of tensor");
248 const mesh_fem &mf = *(mfg ? *mfg : mfn);
249 size_type cv_1 = ctx.is_convex_num_valid()
250 ? ctx.convex_num() : mf.convex_index().first_true();
251 pfem pf = mf.fem_of_element(cv_1);
252 GMM_ASSERT1(pf,
"An element without finite element methode defined");
253 size_type Qmult = qdim / pf->target_dim();
255 if (t.sizes()[1] != s)
256 { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
260 ga_instruction_second_ind_tensor(base_tensor &t_,
261 fem_interpolation_context &ctx_,
263 const mesh_fem **mfg_)
264 : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
268 struct ga_instruction_two_first_ind_tensor :
public ga_instruction {
270 const fem_interpolation_context &ctx1, &ctx2;
272 const mesh_fem *mfn1, **mfg1;
274 const mesh_fem *mfn2, **mfg2;
277 GA_DEBUG_INFO(
"Instruction: adapt two first indices of tensor");
278 const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
279 const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
280 size_type cv_1 = ctx1.is_convex_num_valid()
281 ? ctx1.convex_num() : mf1.convex_index().first_true();
282 size_type cv_2 = ctx2.is_convex_num_valid()
283 ? ctx2.convex_num() : mf2.convex_index().first_true();
284 pfem pf1 = mf1.fem_of_element(cv_1);
285 GMM_ASSERT1(pf1,
"An element without finite element method defined");
286 pfem pf2 = mf2.fem_of_element(cv_2);
287 GMM_ASSERT1(pf2,
"An element without finite element method defined");
288 size_type Qmult1 = qdim1 / pf1->target_dim();
289 size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
290 size_type Qmult2 = qdim2 / pf2->target_dim();
291 size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
292 GMM_ASSERT1(s1 > 0 && s2 >0,
"Element without degrees of freedom");
293 if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
294 bgeot::multi_index mi = t.sizes();
295 mi[0] = s1; mi[1] = s2;
301 ga_instruction_two_first_ind_tensor
302 (base_tensor &t_,
const fem_interpolation_context &ctx1_,
303 const fem_interpolation_context &ctx2_,
304 size_type qdim1_,
const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
305 size_type qdim2_,
const mesh_fem *mfn2_,
const mesh_fem **mfg2_)
306 : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
307 mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
311 struct ga_instruction_X_component :
public ga_instruction {
313 const fem_interpolation_context &ctx;
317 GA_DEBUG_INFO(
"Instruction: X component");
322 ga_instruction_X_component
323 (scalar_type &t_,
const fem_interpolation_context &ctx_,
size_type n_)
324 : t(t_), ctx(ctx_), n(n_) {}
327 struct ga_instruction_X :
public ga_instruction {
329 const fem_interpolation_context &ctx;
332 GA_DEBUG_INFO(
"Instruction: X");
333 GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(),
"dimensions mismatch");
338 ga_instruction_X(base_tensor &t_,
const fem_interpolation_context &ctx_)
339 : t(t_), ctx(ctx_) {}
342 struct ga_instruction_copy_small_vect :
public ga_instruction {
344 const base_small_vector &vec;
347 GA_DEBUG_INFO(
"Instruction: copy small vector");
348 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
352 ga_instruction_copy_small_vect(base_tensor &t_,
353 const base_small_vector &vec_)
354 : t(t_), vec(vec_) {}
357 struct ga_instruction_copy_Normal :
public ga_instruction_copy_small_vect {
360 GA_DEBUG_INFO(
"Instruction: unit normal vector");
361 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
362 "vector. Possible reasons: not on boundary or "
363 "transformation failed.");
367 ga_instruction_copy_Normal(base_tensor &t_,
368 const base_small_vector &Normal_)
369 : ga_instruction_copy_small_vect(t_, Normal_) {}
372 struct ga_instruction_level_set_normal_vector :
public ga_instruction {
374 const mesh_im_level_set *mimls;
375 const fem_interpolation_context &ctx;
376 base_small_vector vec;
379 GA_DEBUG_INFO(
"Instruction: unit normal vector to a level-set");
380 mimls->compute_normal_vector(ctx, vec);
381 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
382 "vector. Possible reasons: not on boundary or "
383 "transformation failed.");
387 ga_instruction_level_set_normal_vector
388 (base_tensor &t_,
const mesh_im_level_set *mimls_,
389 const fem_interpolation_context &ctx_)
390 : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
393 struct ga_instruction_element_size :
public ga_instruction {
398 GA_DEBUG_INFO(
"Instruction: element_size");
399 GMM_ASSERT1(t.size() == 1,
"Invalid element size.");
403 ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
407 struct ga_instruction_element_K :
public ga_instruction {
409 const fem_interpolation_context &ctx;
412 GA_DEBUG_INFO(
"Instruction: element_K");
413 GMM_ASSERT1(t.size() == (ctx.K()).size(),
"Invalid tensor size.");
414 gmm::copy(ctx.K().as_vector(), t.as_vector());
417 ga_instruction_element_K(base_tensor &t_,
418 const fem_interpolation_context &ct)
422 struct ga_instruction_element_B :
public ga_instruction {
424 const fem_interpolation_context &ctx;
427 GA_DEBUG_INFO(
"Instruction: element_B");
428 GMM_ASSERT1(t.size() == (ctx.B()).size(),
"Invalid tensor size.");
429 gmm::copy(ctx.B().as_vector(), t.as_vector());
432 ga_instruction_element_B(base_tensor &t_,
433 const fem_interpolation_context &ct)
437 struct ga_instruction_val_base :
public ga_instruction {
439 fem_interpolation_context &ctx;
441 const pfem_precomp &pfp;
444 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
449 if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
451 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
452 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
458 ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
459 const mesh_fem &mf_,
const pfem_precomp &pfp_)
460 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
463 struct ga_instruction_xfem_plus_val_base :
public ga_instruction {
465 fem_interpolation_context &ctx;
470 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
471 if (ctx.have_pgp()) ctx.set_pfp(pfp);
472 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
473 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
474 int old_xfem_side = ctx.xfem_side();
475 ctx.set_xfem_side(1);
477 ctx.set_xfem_side(old_xfem_side);
481 ga_instruction_xfem_plus_val_base(base_tensor &tt,
482 fem_interpolation_context &ct,
483 const mesh_fem &mf_, pfem_precomp &pfp_)
484 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
487 struct ga_instruction_xfem_minus_val_base :
public ga_instruction {
489 fem_interpolation_context &ctx;
494 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
495 if (ctx.have_pgp()) ctx.set_pfp(pfp);
496 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
497 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
498 int old_xfem_side = ctx.xfem_side();
499 ctx.set_xfem_side(-1);
501 ctx.set_xfem_side(old_xfem_side);
505 ga_instruction_xfem_minus_val_base
506 (base_tensor &tt, fem_interpolation_context &ct,
507 const mesh_fem &mf_, pfem_precomp &pfp_)
508 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
511 struct ga_instruction_grad_base :
public ga_instruction_val_base {
514 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
519 if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
521 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
522 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
523 ctx.grad_base_value(t);
528 ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
529 const mesh_fem &mf_, pfem_precomp &pfp_)
530 : ga_instruction_val_base(tt, ct, mf_, pfp_)
534 struct ga_instruction_xfem_plus_grad_base :
public ga_instruction_val_base {
537 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
538 if (ctx.have_pgp()) ctx.set_pfp(pfp);
539 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
540 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
541 int old_xfem_side = ctx.xfem_side();
542 ctx.set_xfem_side(1);
543 ctx.grad_base_value(t);
544 ctx.set_xfem_side(old_xfem_side);
548 ga_instruction_xfem_plus_grad_base
549 (base_tensor &tt, fem_interpolation_context &ct,
550 const mesh_fem &mf_, pfem_precomp &pfp_)
551 : ga_instruction_val_base(tt, ct, mf_, pfp_)
555 struct ga_instruction_xfem_minus_grad_base :
public ga_instruction_val_base {
558 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
559 if (ctx.have_pgp()) ctx.set_pfp(pfp);
560 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
561 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
562 int old_xfem_side = ctx.xfem_side();
563 ctx.set_xfem_side(-1);
564 ctx.grad_base_value(t);
565 ctx.set_xfem_side(old_xfem_side);
569 ga_instruction_xfem_minus_grad_base
570 (base_tensor &tt, fem_interpolation_context &ct,
571 const mesh_fem &mf_, pfem_precomp &pfp_)
572 : ga_instruction_val_base(tt, ct, mf_, pfp_)
577 struct ga_instruction_hess_base :
public ga_instruction_val_base {
580 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
581 if (ctx.have_pgp()) ctx.set_pfp(pfp);
582 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
583 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
584 ctx.hess_base_value(t);
588 ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
589 const mesh_fem &mf_, pfem_precomp &pfp_)
590 : ga_instruction_val_base(tt, ct, mf_, pfp_)
594 struct ga_instruction_xfem_plus_hess_base :
public ga_instruction_val_base {
597 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
598 if (ctx.have_pgp()) ctx.set_pfp(pfp);
599 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
600 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
601 int old_xfem_side = ctx.xfem_side();
602 ctx.set_xfem_side(1);
603 ctx.hess_base_value(t);
604 ctx.set_xfem_side(old_xfem_side);
608 ga_instruction_xfem_plus_hess_base
609 (base_tensor &tt, fem_interpolation_context &ct,
610 const mesh_fem &mf_, pfem_precomp &pfp_)
611 : ga_instruction_val_base(tt, ct, mf_, pfp_)
615 struct ga_instruction_xfem_minus_hess_base :
public ga_instruction_val_base {
618 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
619 if (ctx.have_pgp()) ctx.set_pfp(pfp);
620 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
621 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
622 int old_xfem_side = ctx.xfem_side();
623 ctx.set_xfem_side(-1);
624 ctx.hess_base_value(t);
625 ctx.set_xfem_side(old_xfem_side);
629 ga_instruction_xfem_minus_hess_base
630 (base_tensor &tt, fem_interpolation_context &ct,
631 const mesh_fem &mf_, pfem_precomp &pfp_)
632 : ga_instruction_val_base(tt, ct, mf_, pfp_)
636 struct ga_instruction_val :
public ga_instruction {
639 const base_tensor &Z;
640 const base_vector &coeff;
644 GA_DEBUG_INFO(
"Instruction: variable value");
646 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
647 GA_DEBUG_ASSERT(t.size() == qdim,
"dimensions mismatch");
650 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
651 "Wrong size for coeff vector");
652 auto itc = coeff.begin();
auto itZ = Z.begin();
653 a = (*itc++) * (*itZ++);
654 while (itc != coeff.end()) a += (*itc++) * (*itZ++);
657 if (target_dim == 1) {
658 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
659 "Wrong size for coeff vector");
660 auto itc = coeff.begin();
auto itZ = Z.begin();
661 for (
auto it = t.begin(); it != t.end(); ++it)
662 *it = (*itc++) * (*itZ);
664 for (
size_type j = 1; j < ndof; ++j, ++itZ) {
665 for (
auto it = t.begin(); it != t.end(); ++it)
666 *it += (*itc++) * (*itZ);
670 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
671 "Wrong size for coeff vector");
674 auto itc = coeff.begin();
677 for (
size_type q = 0; q < Qmult; ++q, ++itc) {
678 for (
size_type r = 0; r < target_dim; ++r)
679 *it++ += (*itc) * Z[j + r*ndof];
687 ga_instruction_val(base_tensor &tt,
const base_tensor &Z_,
689 : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
692 struct ga_instruction_grad :
public ga_instruction_val {
695 GA_DEBUG_INFO(
"Instruction: gradient");
697 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
700 GA_DEBUG_ASSERT(t.size() == N,
"dimensions mismatch");
701 GA_DEBUG_ASSERT(coeff.size() == ndof,
"Wrong size for coeff vector");
702 auto itZ = Z.begin();
703 for (
auto it = t.begin(); it != t.end(); ++it) {
704 auto itc = coeff.begin();
705 *it = (*itc++) * (*itZ++);
706 while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
710 if (target_dim == 1) {
711 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
712 GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
713 "Wrong size for coeff vector");
715 auto itZ = Z.begin();
auto it = t.begin() + q;
718 auto itc = coeff.begin() + q;
719 *it = (*itc) * (*itZ++);
721 { itc += qdim; *it += (*itc) * (*itZ++); }
726 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
727 GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
728 "Wrong size for coeff vector");
731 auto itZ = Z.begin();
733 for (
size_type r = 0; r < target_dim; ++r)
735 t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
742 ga_instruction_grad(base_tensor &tt,
const base_tensor &Z_,
744 : ga_instruction_val(tt, Z_, co, q)
749 struct ga_instruction_hess :
public ga_instruction_val {
752 GA_DEBUG_INFO(
"Instruction: Hessian");
754 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
755 size_type NN = gmm::sqr(t.sizes().back());
756 GA_DEBUG_ASSERT(NN == Z.sizes()[2],
"Internal error");
758 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
759 "Wrong size for coeff vector");
760 auto it = Z.begin();
auto itt = t.begin();
761 for (
size_type kl = 0; kl < NN; ++kl, ++itt) {
762 *itt = scalar_type(0);
763 for (
auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
764 *itt += (*itc) * (*it);
766 GMM_ASSERT1(itt == t.end(),
"dimensions mismatch");
769 if (target_dim == 1) {
770 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
771 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
772 "Wrong size for coeff vector");
775 base_tensor::const_iterator it = Z.begin();
777 for (
size_type j = 0; j < ndof; ++j, ++it)
778 t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
782 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
783 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
784 "Wrong size for coeff vector");
787 base_tensor::const_iterator it = Z.begin();
789 for (
size_type r = 0; r < target_dim; ++r)
790 for (
size_type j = 0; j < ndof; ++j, ++it)
791 t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
798 ga_instruction_hess(base_tensor &tt,
const base_tensor &Z_,
800 : ga_instruction_val(tt, Z_, co, q)
804 struct ga_instruction_diverg :
public ga_instruction_val {
807 GA_DEBUG_INFO(
"Instruction: divergence");
809 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
813 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
814 "Dimensions mismatch for divergence operator");
815 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
816 "Wrong size for coeff vector");
818 t[0] = scalar_type(0);
819 base_tensor::const_iterator it = Z.begin();
822 if (k) it += (N*ndof + 1);
825 t[0] += coeff[j] * (*it);
833 t[0] += coeff[j*N+k] * (*it);
839 ga_instruction_diverg(base_tensor &tt,
const base_tensor &Z_,
841 : ga_instruction_val(tt, Z_, co, q)
845 struct ga_instruction_copy_val_base :
public ga_instruction {
847 const base_tensor &Z;
851 GA_DEBUG_INFO(
"Instruction: value of test functions");
853 GA_DEBUG_ASSERT(t.size() == Z.size(),
"Wrong size for base vector");
854 std::copy(Z.begin(), Z.end(), t.begin());
859 std::copy(Z.begin(), Z.end(), t.begin());
861 if (target_dim == 1) {
863 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
864 "Wrong size for base vector");
865 std::fill(t.begin(), t.end(), scalar_type(0));
866 auto itZ = Z.begin();
871 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
875 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
879 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
880 "Wrong size for base vector");
881 std::fill(t.begin(), t.end(), scalar_type(0));
882 auto itZ = Z.begin();
883 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
886 for (
size_type k = 0; k < target_dim; ++k) {
887 auto it = t.begin() + (ss * k);
888 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
893 { it2 += sss; *it2 = *itZ; }
902 ga_instruction_copy_val_base(base_tensor &tt,
const base_tensor &Z_,
906 struct ga_instruction_copy_grad_base :
public ga_instruction_copy_val_base {
909 GA_DEBUG_INFO(
"Instruction: gradient of test functions");
911 std::copy(Z.begin(), Z.end(), t.begin());
916 std::copy(Z.begin(), Z.end(), t.begin());
918 if (target_dim == 1) {
921 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
922 "Wrong size for gradient vector");
923 std::fill(t.begin(), t.end(), scalar_type(0));
924 base_tensor::const_iterator itZ = Z.begin();
925 size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
929 base_tensor::iterator it = t.begin() + (ssss*l);
930 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
932 base_tensor::iterator it2 = it;
934 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
940 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
941 "Wrong size for gradient vector");
942 std::fill(t.begin(), t.end(), scalar_type(0));
943 base_tensor::const_iterator itZ = Z.begin();
944 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
949 for (
size_type k = 0; k < target_dim; ++k) {
950 base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
951 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
953 base_tensor::iterator it2 = it;
955 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
964 ga_instruction_copy_grad_base(base_tensor &tt,
const base_tensor &Z_,
966 : ga_instruction_copy_val_base(tt,Z_,q) {}
969 struct ga_instruction_copy_vect_val_base :
public ga_instruction {
971 const base_tensor &Z;
975 GA_DEBUG_INFO(
"Instruction: vectorized value of test functions");
978 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
979 "Wrong size for base vector");
981 auto itZ = Z.begin();
986 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
990 for (
size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
995 ga_instruction_copy_vect_val_base(base_tensor &tt,
const base_tensor &Z_,
999 struct ga_instruction_copy_vect_grad_base
1000 :
public ga_instruction_copy_vect_val_base {
1002 virtual int exec() {
1003 GA_DEBUG_INFO(
"Instruction: vectorized gradient of test functions");
1006 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1007 "Wrong size for gradient vector");
1009 base_tensor::const_iterator itZ = Z.begin();
1010 size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1014 base_tensor::iterator it = t.begin() + (ssss*l);
1015 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1017 base_tensor::iterator it2 = it;
1019 for (
size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1025 ga_instruction_copy_vect_grad_base(base_tensor &tt,
const base_tensor &Z_,
1027 : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1030 struct ga_instruction_copy_hess_base :
public ga_instruction_copy_val_base {
1032 virtual int exec() {
1033 GA_DEBUG_INFO(
"Instruction: Hessian of test functions");
1037 gmm::copy(Z.as_vector(), t.as_vector());
1040 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1041 "Wrong size for Hessian vector");
1043 base_tensor::const_iterator itZ = Z.begin();
1044 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1047 size_type NNdim = Z.sizes()[2]*target_dim;
1048 for (
size_type klm = 0; klm < NNdim; ++klm) {
1049 base_tensor::iterator it = t.begin() + (ss * klm);
1050 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1052 base_tensor::iterator it2 = it;
1054 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1061 ga_instruction_copy_hess_base(base_tensor &tt,
const base_tensor &Z_,
1063 : ga_instruction_copy_val_base(tt, Z_, q) {}
1066 struct ga_instruction_copy_diverg_base :
public ga_instruction_copy_val_base {
1068 virtual int exec() {
1069 GA_DEBUG_INFO(
"Instruction: divergence of test functions");
1074 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1075 "Dimensions mismatch for divergence operator");
1076 GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1077 "Wrong size for divergence vector");
1079 base_tensor::const_iterator itZ = Z.begin();
1083 base_tensor::iterator it = t.begin();
1084 if (l) itZ += target_dim*ndof+1;
1086 if (i) { ++it; ++itZ; }
1093 base_tensor::iterator it = t.begin() + j;
1096 if (i) { it += Qmult; ++itZ; }
1104 ga_instruction_copy_diverg_base(base_tensor &tt,
const base_tensor &Z_,
1106 : ga_instruction_copy_val_base(tt, Z_, q) {}
1109 struct ga_instruction_elementary_trans {
1110 const base_vector &coeff_in;
1111 base_vector coeff_out;
1112 pelementary_transformation elemtrans;
1113 const mesh_fem &mf1, &mf2;
1114 const fem_interpolation_context &ctx;
1119 if (icv != ctx.convex_num() || M.size() == 0) {
1120 M.base_resize(m, n);
1121 icv = ctx.convex_num();
1122 elemtrans->give_transformation(mf1, mf2, icv, M);
1124 coeff_out.resize(gmm::mat_nrows(M));
1128 ga_instruction_elementary_trans
1129 (
const base_vector &co, pelementary_transformation e,
1130 const mesh_fem &mf1_,
const mesh_fem &mf2_,
1131 const fem_interpolation_context &ctx_, base_matrix &M_,
1133 : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1135 ~ga_instruction_elementary_trans() {};
1138 struct ga_instruction_elementary_trans_val
1139 :
public ga_instruction_val, ga_instruction_elementary_trans {
1141 virtual int exec() {
1142 GA_DEBUG_INFO(
"Instruction: variable value with elementary "
1146 do_transformation(coeff_in.size(), ndof*Qmult);
1147 return ga_instruction_val::exec();
1150 ga_instruction_elementary_trans_val
1151 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1152 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1153 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1154 : ga_instruction_val(tt, Z_, coeff_out, q),
1155 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1158 struct ga_instruction_elementary_trans_grad
1159 :
public ga_instruction_grad, ga_instruction_elementary_trans {
1161 virtual int exec() {
1162 GA_DEBUG_INFO(
"Instruction: gradient with elementary transformation");
1165 do_transformation(coeff_in.size(), ndof*Qmult);
1166 return ga_instruction_grad::exec();
1169 ga_instruction_elementary_trans_grad
1170 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1171 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1172 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1173 : ga_instruction_grad(tt, Z_, coeff_out, q),
1174 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1177 struct ga_instruction_elementary_trans_hess
1178 :
public ga_instruction_hess, ga_instruction_elementary_trans {
1180 virtual int exec() {
1181 GA_DEBUG_INFO(
"Instruction: Hessian with elementary transformation");
1184 do_transformation(coeff_in.size(), ndof*Qmult);
1185 return ga_instruction_hess::exec();
1188 ga_instruction_elementary_trans_hess
1189 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1190 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1191 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1192 : ga_instruction_hess(tt, Z_, coeff_out, q),
1193 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1196 struct ga_instruction_elementary_trans_diverg
1197 :
public ga_instruction_diverg, ga_instruction_elementary_trans {
1199 virtual int exec() {
1200 GA_DEBUG_INFO(
"Instruction: divergence with elementary transformation");
1203 do_transformation(coeff_in.size(), ndof*Qmult);
1204 return ga_instruction_diverg::exec();
1207 ga_instruction_elementary_trans_diverg
1208 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1209 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1210 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1211 : ga_instruction_diverg(tt, Z_, coeff_out, q),
1212 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1215 struct ga_instruction_update_group_info :
public ga_instruction {
1216 const ga_workspace &workspace;
1217 const ga_instruction_set &gis;
1218 const ga_instruction_set::interpolate_info &inin;
1219 const std::string gname;
1220 ga_instruction_set::variable_group_info &vgi;
1222 virtual int exec() {
1223 GA_DEBUG_INFO(
"Instruction: Update group info for "+gname);
1224 if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1227 vgi.cached_mesh = inin.m;
1228 const std::string &varname
1229 = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1230 : workspace.first_variable_of_group(gname);
1231 vgi.varname = &varname;
1232 vgi.mf = workspace.associated_mf(varname);
1233 GA_DEBUG_ASSERT(vgi.mf,
"Group variable should always have a mesh_fem");
1234 vgi.reduced_mf = vgi.mf->is_reduced();
1235 if (vgi.reduced_mf) {
1236 const auto it = gis.really_extended_vars.find(varname);
1237 GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1238 "Variable " << varname <<
" not in extended variables");
1239 vgi.U = &(it->second);
1240 vgi.I = &(workspace.temporary_interval_of_variable(varname));
1242 vgi.U = &(workspace.value(varname));
1243 vgi.I = &(workspace.interval_of_variable(varname));
1245 vgi.alpha = workspace.factor_of_variable(varname);
1249 ga_instruction_update_group_info
1250 (
const ga_workspace &workspace_,
const ga_instruction_set &gis_,
1251 const ga_instruction_set::interpolate_info &inin_,
1252 const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1253 : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1257 struct ga_instruction_interpolate_filter :
public ga_instruction {
1259 const ga_instruction_set::interpolate_info &inin;
1263 virtual int exec() {
1264 GA_DEBUG_INFO(
"Instruction: interpolated filter");
1265 if ((pt_type ==
size_type(-1) && inin.pt_type) ||
1266 (pt_type !=
size_type(-1) && inin.pt_type == pt_type)) {
1267 GA_DEBUG_INFO(
"Instruction: interpolated filter: pass");
1271 GA_DEBUG_INFO(
"Instruction: interpolated filter: filtered");
1278 ga_instruction_interpolate_filter
1279 (base_tensor &t_,
const ga_instruction_set::interpolate_info &inin_,
1281 : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1284 struct ga_instruction_copy_interpolated_small_vect :
public ga_instruction {
1286 const base_small_vector &vec;
1287 const ga_instruction_set::interpolate_info &inin;
1289 virtual int exec() {
1290 GA_DEBUG_INFO(
"Instruction: copy small vector");
1291 GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1292 "Invalid element, probably transformation failed");
1293 GMM_ASSERT1(t.size() == vec.size(),
1294 "Invalid vector size: " << t.size() <<
"!=" << vec.size());
1298 ga_instruction_copy_interpolated_small_vect
1299 (base_tensor &t_,
const base_small_vector &vec_,
1300 const ga_instruction_set::interpolate_info &inin_)
1301 : t(t_), vec(vec_), inin(inin_) {}
1304 struct ga_instruction_interpolate :
public ga_instruction {
1307 const mesh_fem *mfn, **mfg;
1308 const base_vector *Un, **Ug;
1309 fem_interpolation_context &ctx;
1313 fem_precomp_pool &fp_pool;
1314 ga_instruction_set::interpolate_info &inin;
1316 virtual int exec() {
1317 GMM_ASSERT1(ctx.is_convex_num_valid(),
"No valid element for the "
1318 "transformation. Probably transformation failed");
1319 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1320 const base_vector &U = *(Ug ? *Ug : Un);
1321 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1322 "on another mesh than the one it is defined on");
1324 pfem pf = mf.fem_of_element(ctx.convex_num());
1325 GMM_ASSERT1(pf,
"Undefined finite element method");
1326 if (ctx.have_pgp()) {
1328 inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1329 ctx.set_pfp(inin.pfps[&mf]);
1336 ga_instruction_interpolate
1337 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1338 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1340 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1341 : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1342 ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1345 struct ga_instruction_interpolate_val :
public ga_instruction_interpolate {
1347 virtual int exec() {
1348 GA_DEBUG_INFO(
"Instruction: interpolated variable value");
1349 ga_instruction_interpolate::exec();
1350 ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1355 ga_instruction_interpolate_val
1356 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1357 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1359 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1360 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1365 struct ga_instruction_interpolate_grad :
public ga_instruction_interpolate {
1367 virtual int exec() {
1368 GA_DEBUG_INFO(
"Instruction: interpolated variable grad");
1369 ga_instruction_interpolate::exec();
1370 base_matrix v(qdim, ctx.N());
1371 ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1372 gmm::copy(v.as_vector(), t.as_vector());
1376 ga_instruction_interpolate_grad
1377 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1378 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1380 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1381 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1386 struct ga_instruction_interpolate_hess :
public ga_instruction_interpolate {
1388 virtual int exec() {
1389 GA_DEBUG_INFO(
"Instruction: interpolated variable hessian");
1390 ga_instruction_interpolate::exec();
1391 base_matrix v(qdim, ctx.N()*ctx.N());
1392 ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1393 gmm::copy(v.as_vector(), t.as_vector());
1397 ga_instruction_interpolate_hess
1398 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1399 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1401 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1402 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1407 struct ga_instruction_interpolate_diverg :
public ga_instruction_interpolate {
1409 virtual int exec() {
1410 GA_DEBUG_INFO(
"Instruction: interpolated variable divergence");
1411 ga_instruction_interpolate::exec();
1412 ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1416 ga_instruction_interpolate_diverg
1417 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1418 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1420 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1421 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1426 struct ga_instruction_interpolate_base {
1429 const mesh_fem *mfn, **mfg;
1431 ga_instruction_set::interpolate_info &inin;
1432 fem_precomp_pool &fp_pool;
1434 virtual int exec() {
1435 GMM_ASSERT1(inin.ctx.is_convex_num_valid(),
"No valid element for "
1436 "the transformation. Probably transformation failed");
1437 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1438 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1439 "on another mesh than the one it is defined on");
1441 pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1442 GMM_ASSERT1(pf,
"Undefined finite element method");
1444 if (inin.ctx.have_pgp()) {
1446 inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1447 inin.ctx.set_pfp(inin.pfps[&mf]);
1449 inin.ctx.set_pf(pf);
1454 ga_instruction_interpolate_base
1455 (
const mesh **m_,
const mesh_fem *mfn_,
const mesh_fem **mfg_,
1456 const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1457 fem_precomp_pool &fp_pool_)
1458 : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1459 fp_pool(fp_pool_) {}
1462 struct ga_instruction_interpolate_val_base
1463 :
public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1465 virtual int exec() {
1466 GA_DEBUG_INFO(
"Instruction: interpolated base value");
1467 ga_instruction_interpolate_base::exec();
1468 inin.ctx.pf()->real_base_value(inin.ctx, ZZ);
1469 return ga_instruction_copy_val_base::exec();
1472 ga_instruction_interpolate_val_base
1473 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1475 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1476 : ga_instruction_copy_val_base(t_, ZZ, q),
1477 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1481 struct ga_instruction_interpolate_grad_base
1482 :
public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1484 virtual int exec() {
1485 GA_DEBUG_INFO(
"Instruction: interpolated base grad");
1486 ga_instruction_interpolate_base::exec();
1487 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1488 return ga_instruction_copy_grad_base::exec();
1491 ga_instruction_interpolate_grad_base
1492 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1494 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1495 : ga_instruction_copy_grad_base(t_, ZZ, q),
1496 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1500 struct ga_instruction_interpolate_hess_base
1501 :
public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1503 virtual int exec() {
1504 GA_DEBUG_INFO(
"Instruction: interpolated base hessian");
1505 ga_instruction_interpolate_base::exec();
1506 inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ);
1507 return ga_instruction_copy_hess_base::exec();
1510 ga_instruction_interpolate_hess_base
1511 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1513 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1514 : ga_instruction_copy_hess_base(t_, ZZ, q),
1515 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1519 struct ga_instruction_interpolate_diverg_base
1520 :
public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1522 virtual int exec() {
1523 GA_DEBUG_INFO(
"Instruction: interpolated base divergence");
1524 ga_instruction_interpolate_base::exec();
1525 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1526 return ga_instruction_copy_diverg_base::exec();
1529 ga_instruction_interpolate_diverg_base
1530 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1532 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1533 : ga_instruction_copy_diverg_base(t_, ZZ, q),
1534 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1539 struct ga_instruction_elementary_trans_base {
1542 pelementary_transformation elemtrans;
1543 const mesh_fem &mf1, &mf2;
1544 const fem_interpolation_context &ctx;
1549 if (icv != ctx.convex_num() || M.size() == 0) {
1550 M.base_resize(m, n);
1551 icv = ctx.convex_num();
1552 elemtrans->give_transformation(mf1, mf2, icv, M);
1554 t_out.mat_reduction(t_in, M, 0);
1557 ga_instruction_elementary_trans_base
1558 (base_tensor &t_, pelementary_transformation e,
const mesh_fem &mf1_,
1559 const mesh_fem &mf2_,
1560 const fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1561 : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1565 struct ga_instruction_elementary_trans_val_base
1566 :
public ga_instruction_copy_val_base,
1567 ga_instruction_elementary_trans_base {
1569 virtual int exec() {
1570 GA_DEBUG_INFO(
"Instruction: value of test functions with elementary "
1574 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1575 ga_instruction_copy_val_base::exec();
1576 do_transformation(t_out.sizes()[0], ndof*Qmult);
1580 ga_instruction_elementary_trans_val_base
1581 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1582 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1583 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1584 : ga_instruction_copy_val_base(t_in, Z_, q),
1585 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1589 struct ga_instruction_elementary_trans_grad_base
1590 :
public ga_instruction_copy_grad_base,
1591 ga_instruction_elementary_trans_base {
1593 virtual int exec() {
1594 GA_DEBUG_INFO(
"Instruction: gradient of test functions with elementary "
1598 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1599 ga_instruction_copy_grad_base::exec();
1600 do_transformation(t_out.sizes()[0], ndof*Qmult);
1604 ga_instruction_elementary_trans_grad_base
1605 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1606 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1607 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1608 : ga_instruction_copy_grad_base(t_in, Z_, q),
1609 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1613 struct ga_instruction_elementary_trans_hess_base
1614 :
public ga_instruction_copy_hess_base,
1615 ga_instruction_elementary_trans_base {
1617 virtual int exec() {
1618 GA_DEBUG_INFO(
"Instruction: Hessian of test functions with elementary "
1622 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1623 ga_instruction_copy_hess_base::exec();
1624 do_transformation(t_out.sizes()[0], ndof*Qmult);
1628 ga_instruction_elementary_trans_hess_base
1629 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1630 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1631 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1632 : ga_instruction_copy_hess_base(t_in, Z_, q),
1633 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1637 struct ga_instruction_elementary_trans_diverg_base
1638 :
public ga_instruction_copy_diverg_base,
1639 ga_instruction_elementary_trans_base {
1641 virtual int exec() {
1642 GA_DEBUG_INFO(
"Instruction: divergence of test functions with elementary "
1646 t_in.adjust_sizes(Qmult*ndof);
1647 ga_instruction_copy_diverg_base::exec();
1648 do_transformation(t_out.sizes()[0], ndof*Qmult);
1652 ga_instruction_elementary_trans_diverg_base
1653 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1654 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1655 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1656 : ga_instruction_copy_diverg_base(t_in, Z_, q),
1657 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1662 struct ga_instruction_add :
public ga_instruction {
1664 const base_tensor &tc1, &tc2;
1665 virtual int exec() {
1666 GA_DEBUG_INFO(
"Instruction: addition");
1667 GA_DEBUG_ASSERT(t.size() == tc1.size(),
1668 "internal error " << t.size() <<
" != " << tc1.size());
1669 GA_DEBUG_ASSERT(t.size() == tc2.size(),
1670 "internal error " << t.size() <<
" != " << tc2.size());
1671 gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1674 ga_instruction_add(base_tensor &t_,
1675 const base_tensor &tc1_,
const base_tensor &tc2_)
1676 : t(t_), tc1(tc1_), tc2(tc2_) {}
1679 struct ga_instruction_add_to :
public ga_instruction {
1681 const base_tensor &tc1;
1682 virtual int exec() {
1683 GA_DEBUG_INFO(
"Instruction: addition");
1684 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1685 <<
" incompatible with " << tc1.size());
1686 gmm::add(tc1.as_vector(), t.as_vector());
1689 ga_instruction_add_to(base_tensor &t_,
const base_tensor &tc1_)
1690 : t(t_), tc1(tc1_) {}
1693 struct ga_instruction_add_to_coeff :
public ga_instruction {
1695 const base_tensor &tc1;
1697 virtual int exec() {
1698 GA_DEBUG_INFO(
"Instruction: addition with scale");
1699 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1700 <<
" incompatible with " << tc1.size());
1701 gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1704 ga_instruction_add_to_coeff(base_tensor &t_,
const base_tensor &tc1_,
1705 scalar_type &coeff_)
1706 : t(t_), tc1(tc1_), coeff(coeff_) {}
1709 struct ga_instruction_sub :
public ga_instruction {
1711 const base_tensor &tc1, &tc2;
1712 virtual int exec() {
1713 GA_DEBUG_INFO(
"Instruction: subtraction");
1714 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1716 gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1720 ga_instruction_sub(base_tensor &t_,
1721 const base_tensor &tc1_,
const base_tensor &tc2_)
1722 : t(t_), tc1(tc1_), tc2(tc2_) {}
1725 struct ga_instruction_opposite :
public ga_instruction {
1727 virtual int exec() {
1728 GA_DEBUG_INFO(
"Instruction: multiplication with -1");
1729 gmm::scale(t.as_vector(), scalar_type(-1));
1732 ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1735 struct ga_instruction_print_tensor :
public ga_instruction {
1737 pga_tree_node pnode;
1738 const fem_interpolation_context &ctx;
1740 virtual int exec() {
1741 GA_DEBUG_INFO(
"Instruction: tensor print");
1742 cout <<
"Print term "; ga_print_node(pnode, cout);
1743 cout <<
" on Gauss point " << ipt <<
"/" << nbpt <<
" of element "
1744 << ctx.convex_num() <<
": " << t << endl;
1747 ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1748 const fem_interpolation_context &ctx_,
1750 : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1753 struct ga_instruction_copy_tensor :
public ga_instruction {
1755 const base_tensor &tc1;
1756 virtual int exec() {
1757 GA_DEBUG_INFO(
"Instruction: tensor copy");
1758 std::copy(tc1.begin(), tc1.end(), t.begin());
1762 ga_instruction_copy_tensor(base_tensor &t_,
const base_tensor &tc1_)
1763 : t(t_), tc1(tc1_) {}
1766 struct ga_instruction_clear_tensor :
public ga_instruction {
1768 virtual int exec() {
1769 GA_DEBUG_INFO(
"Instruction: clear tensor");
1770 std::fill(t.begin(), t.end(), scalar_type(0));
1773 ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1776 struct ga_instruction_copy_tensor_possibly_void :
public ga_instruction {
1778 const base_tensor &tc1;
1779 virtual int exec() {
1780 GA_DEBUG_INFO(
"Instruction: tensor copy possibly void");
1782 gmm::copy(tc1.as_vector(), t.as_vector());
1787 ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1788 const base_tensor &tc1_)
1789 : t(t_), tc1(tc1_) {}
1792 struct ga_instruction_copy_scalar :
public ga_instruction {
1793 scalar_type &t;
const scalar_type &t1;
1794 virtual int exec() {
1795 GA_DEBUG_INFO(
"Instruction: scalar copy");
1799 ga_instruction_copy_scalar(scalar_type &t_,
const scalar_type &t1_)
1803 struct ga_instruction_copy_vect :
public ga_instruction {
1805 const base_vector &t1;
1806 virtual int exec() {
1807 GA_DEBUG_INFO(
"Instruction: fixed size tensor copy");
1811 ga_instruction_copy_vect(base_vector &t_,
const base_vector &t1_)
1815 struct ga_instruction_trace :
public ga_instruction {
1817 const base_tensor &tc1;
1820 virtual int exec() {
1821 GA_DEBUG_INFO(
"Instruction: Trace");
1822 GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(),
"Wrong sizes");
1824 auto it = t.begin();
1825 auto it1 = tc1.begin();
1826 for (; it != t.end(); ++it, ++it1) {
1829 for (
size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1834 ga_instruction_trace(base_tensor &t_,
const base_tensor &tc1_,
size_type n_)
1835 : t(t_), tc1(tc1_), n(n_) {}
1838 struct ga_instruction_deviator :
public ga_instruction {
1840 const base_tensor &tc1;
1843 virtual int exec() {
1844 GA_DEBUG_INFO(
"Instruction: Deviator");
1845 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1847 gmm::copy(tc1.as_vector(), t.as_vector());
1851 base_tensor::iterator it = t.begin();
1852 base_tensor::const_iterator it1 = tc1.begin();
1853 for (; j < nb; ++it, ++it1, ++j) {
1855 base_tensor::const_iterator it2 = it1;
1857 for (
size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1858 tr /= scalar_type(n);
1860 base_tensor::iterator it3 = it;
1862 for (
size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1867 ga_instruction_deviator(base_tensor &t_,
const base_tensor &tc1_,
1869 : t(t_), tc1(tc1_), n(n_) {}
1872 struct ga_instruction_transpose :
public ga_instruction {
1874 const base_tensor &tc1;
1876 virtual int exec() {
1877 GA_DEBUG_INFO(
"Instruction: transpose");
1878 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1881 auto it = t.begin();
1893 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1896 ga_instruction_transpose(base_tensor &t_,
const base_tensor &tc1_,
1898 : t(t_), tc1(tc1_), J(J_), K(K_), I(I_) {}
1901 struct ga_instruction_swap_indices :
public ga_instruction {
1903 const base_tensor &tc1;
1905 virtual int exec() {
1906 GA_DEBUG_INFO(
"Instruction: swap indices");
1907 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1908 size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1910 auto it = t.begin();
1915 size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1916 for (
size_type m = 0; m < ii1; ++m, ++it)
1919 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1922 ga_instruction_swap_indices(base_tensor &t_,
const base_tensor &tc1_,
1925 : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1928 struct ga_instruction_index_move_last :
public ga_instruction {
1930 const base_tensor &tc1;
1932 virtual int exec() {
1933 GA_DEBUG_INFO(
"Instruction: swap indices");
1934 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1937 auto it = t.begin();
1941 for (
size_type k = 0; k < ii1; ++k, ++it)
1944 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1947 ga_instruction_index_move_last(base_tensor &t_,
const base_tensor &tc1_,
1949 : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1952 struct ga_instruction_transpose_no_test :
public ga_instruction {
1954 const base_tensor &tc1;
1956 virtual int exec() {
1957 GA_DEBUG_INFO(
"Instruction: transpose");
1958 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1960 auto it = t.begin();
1965 for (
size_type k = 0; k < n2; ++k, ++it)
1966 *it = tc1[s2 + k*n1];
1969 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1972 ga_instruction_transpose_no_test(base_tensor &t_,
const base_tensor &tc1_,
1975 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1978 struct ga_instruction_transpose_test :
public ga_instruction {
1980 const base_tensor &tc1;
1981 virtual int exec() {
1982 GA_DEBUG_INFO(
"Instruction: copy tensor and transpose test functions");
1983 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1984 GA_DEBUG_ASSERT(t.sizes().size() >= 2,
"Wrong sizes");
1986 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1988 base_tensor::iterator it = t.begin();
1991 for (
size_type i = 0; i < s1; ++i, ++it)
1992 *it = tc1[j+s2*i+k*s3];
1995 ga_instruction_transpose_test(base_tensor &t_,
const base_tensor &tc1_)
1996 : t(t_), tc1(tc1_) {}
1999 struct ga_instruction_sym :
public ga_instruction {
2001 const base_tensor &tc1;
2002 virtual int exec() {
2003 GA_DEBUG_INFO(
"Instruction: symmetric part");
2004 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2006 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2010 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2011 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2012 it1T = tc1.begin() + s*(j + s2*i);
2013 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2017 ga_instruction_sym(base_tensor &t_,
const base_tensor &tc1_)
2018 : t(t_), tc1(tc1_) {}
2021 struct ga_instruction_skew :
public ga_instruction {
2023 const base_tensor &tc1;
2024 virtual int exec() {
2025 GA_DEBUG_INFO(
"Instruction: skew-symmetric part");
2026 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2028 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2032 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2033 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2034 it1T = tc1.begin() + s*(j + s2*i);
2035 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2039 ga_instruction_skew(base_tensor &t_,
const base_tensor &tc1_)
2040 : t(t_), tc1(tc1_) {}
2043 struct ga_instruction_scalar_add :
public ga_instruction {
2045 const scalar_type &c, &d;
2046 virtual int exec() {
2047 GA_DEBUG_INFO(
"Instruction: scalar addition");
2051 ga_instruction_scalar_add(scalar_type &t_,
const scalar_type &c_,
2052 const scalar_type &d_)
2053 : t(t_), c(c_), d(d_) {}
2056 struct ga_instruction_scalar_sub :
public ga_instruction {
2058 const scalar_type &c, &d;
2059 virtual int exec() {
2060 GA_DEBUG_INFO(
"Instruction: scalar subtraction");
2064 ga_instruction_scalar_sub(scalar_type &t_,
const scalar_type &c_,
2065 const scalar_type &d_)
2066 : t(t_), c(c_), d(d_) {}
2069 struct ga_instruction_scalar_scalar_mult :
public ga_instruction {
2071 const scalar_type &c, &d;
2072 virtual int exec() {
2073 GA_DEBUG_INFO(
"Instruction: scalar multiplication");
2077 ga_instruction_scalar_scalar_mult(scalar_type &t_,
const scalar_type &c_,
2078 const scalar_type &d_)
2079 : t(t_), c(c_), d(d_) {}
2082 struct ga_instruction_scalar_scalar_div :
public ga_instruction {
2084 const scalar_type &c, &d;
2085 virtual int exec() {
2086 GA_DEBUG_INFO(
"Instruction: scalar division");
2090 ga_instruction_scalar_scalar_div(scalar_type &t_,
const scalar_type &c_,
2091 const scalar_type &d_)
2092 : t(t_), c(c_), d(d_) {}
2095 template<
int I>
inline void dax__(base_tensor::iterator &it,
2096 base_tensor::const_iterator &itx,
2097 const scalar_type &a) {
2098 constexpr
int I1 = I/8;
2099 constexpr
int I2 = I - I1*8;
2100 for (
int i=0; i < I1; ++i)
2101 dax__<8>(it, itx , a);
2102 dax__<I2>(it, itx , a);
2104 template<>
inline void dax__<8>(base_tensor::iterator &it,
2105 base_tensor::const_iterator &itx,
2106 const scalar_type &a) {
2116 template<>
inline void dax__<7>(base_tensor::iterator &it,
2117 base_tensor::const_iterator &itx,
2118 const scalar_type &a) {
2127 template<>
inline void dax__<6>(base_tensor::iterator &it,
2128 base_tensor::const_iterator &itx,
2129 const scalar_type &a) {
2137 template<>
inline void dax__<5>(base_tensor::iterator &it,
2138 base_tensor::const_iterator &itx,
2139 const scalar_type &a) {
2146 template<>
inline void dax__<4>(base_tensor::iterator &it,
2147 base_tensor::const_iterator &itx,
2148 const scalar_type &a) {
2154 template<>
inline void dax__<3>(base_tensor::iterator &it,
2155 base_tensor::const_iterator &itx,
2156 const scalar_type &a) {
2161 template<>
inline void dax__<2>(base_tensor::iterator &it,
2162 base_tensor::const_iterator &itx,
2163 const scalar_type &a) {
2167 template<>
inline void dax__<1>(base_tensor::iterator &it,
2168 base_tensor::const_iterator &itx,
2169 const scalar_type &a) {
2172 template<>
inline void dax__<0>(base_tensor::iterator &,
2173 base_tensor::const_iterator &,
2174 const scalar_type &) {}
2177 template<
int I>
inline
2178 void reduc_elem_unrolled__(base_tensor::iterator &it,
2179 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2181 *it = it1[0] * it2[0];
2182 for (
int i=1; i < I; ++i)
2183 *it += it1[i*s1] * it2[i*s2];
2186 void reduc_elem_unrolled__<9>(base_tensor::iterator &it,
2187 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2189 *it = it1[0] * it2[0]
2191 + it1[2*s1] * it2[2*s2]
2192 + it1[3*s1] * it2[3*s2]
2193 + it1[4*s1] * it2[4*s2]
2194 + it1[5*s1] * it2[5*s2]
2195 + it1[6*s1] * it2[6*s2]
2196 + it1[7*s1] * it2[7*s2]
2197 + it1[8*s1] * it2[8*s2];
2200 void reduc_elem_unrolled__<8>(base_tensor::iterator &it,
2201 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2203 *it = it1[0] * it2[0]
2205 + it1[2*s1] * it2[2*s2]
2206 + it1[3*s1] * it2[3*s2]
2207 + it1[4*s1] * it2[4*s2]
2208 + it1[5*s1] * it2[5*s2]
2209 + it1[6*s1] * it2[6*s2]
2210 + it1[7*s1] * it2[7*s2];
2213 void reduc_elem_unrolled__<7>(base_tensor::iterator &it,
2214 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2216 *it = it1[0] * it2[0]
2218 + it1[2*s1] * it2[2*s2]
2219 + it1[3*s1] * it2[3*s2]
2220 + it1[4*s1] * it2[4*s2]
2221 + it1[5*s1] * it2[5*s2]
2222 + it1[6*s1] * it2[6*s2];
2225 void reduc_elem_unrolled__<6>(base_tensor::iterator &it,
2226 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2228 *it = it1[0] * it2[0]
2230 + it1[2*s1] * it2[2*s2]
2231 + it1[3*s1] * it2[3*s2]
2232 + it1[4*s1] * it2[4*s2]
2233 + it1[5*s1] * it2[5*s2];
2236 void reduc_elem_unrolled__<5>(base_tensor::iterator &it,
2237 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2239 *it = it1[0] * it2[0]
2241 + it1[2*s1] * it2[2*s2]
2242 + it1[3*s1] * it2[3*s2]
2243 + it1[4*s1] * it2[4*s2];
2246 void reduc_elem_unrolled__<4>(base_tensor::iterator &it,
2247 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2249 *it = it1[0] * it2[0]
2251 + it1[2*s1] * it2[2*s2]
2252 + it1[3*s1] * it2[3*s2];
2255 void reduc_elem_unrolled__<3>(base_tensor::iterator &it,
2256 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2258 *it = it1[0] * it2[0]
2260 + it1[2*s1] * it2[2*s2];
2263 void reduc_elem_unrolled__<2>(base_tensor::iterator &it,
2264 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2266 *it = it1[0] * it2[0]
2267 + it1[s1] * it2[s2];
2270 void reduc_elem_unrolled__<1>(base_tensor::iterator &it,
2271 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2273 { *it = it1[0] * it2[0]; }
2276 struct ga_instruction_scalar_mult :
public ga_instruction {
2278 const base_tensor &tc1;
2279 const scalar_type &c;
2280 virtual int exec() {
2281 GA_DEBUG_INFO(
"Instruction: multiplication of a tensor by a scalar " << c);
2282 gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2285 ga_instruction_scalar_mult(base_tensor &t_,
2286 const base_tensor &tc1_,
const scalar_type &c_)
2287 : t(t_), tc1(tc1_), c(c_) {}
2290 struct ga_instruction_scalar_div :
public ga_instruction {
2292 const base_tensor &tc1;
2293 const scalar_type &c;
2294 virtual int exec() {
2295 GA_DEBUG_INFO(
"Instruction: division of a tensor by a scalar");
2296 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2297 base_tensor::iterator it = t.begin();
2298 base_tensor::const_iterator it1 = tc1.cbegin();
2299 for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2302 ga_instruction_scalar_div(base_tensor &t_,
2303 const base_tensor &tc1_,
const scalar_type &c_)
2304 : t(t_), tc1(tc1_), c(c_) {}
2308 struct ga_instruction_cross_product_tf :
public ga_instruction {
2310 const base_tensor &tc1, &tc2;
2312 virtual int exec() {
2313 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2315 size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2316 GA_DEBUG_ASSERT(t.size() == nn*3,
"Bad tensor size for cross product");
2317 size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2318 base_tensor::iterator it = t.begin();
2319 base_tensor::const_iterator it2 = tc2.cbegin();
2321 for (
size_type i = 0; i < n2; ++i, ++it2) {
2322 base_tensor::const_iterator it1 = tc1.cbegin();
2323 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2324 *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2325 it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2326 it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2330 for (
size_type i = 0; i < n2; ++i, ++it2) {
2331 base_tensor::const_iterator it1 = tc1.cbegin();
2332 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2333 *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2334 it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2335 it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2341 ga_instruction_cross_product_tf(base_tensor &t_,
2342 const base_tensor &tc1_,
2343 const base_tensor &tc2_,
bool inv_)
2344 : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2348 struct ga_instruction_cross_product :
public ga_instruction {
2350 const base_tensor &tc1, &tc2;
2351 virtual int exec() {
2352 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2353 GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2354 "Bad tensor size for cross product");
2355 t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2356 t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2357 t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2360 ga_instruction_cross_product(base_tensor &t_,
2361 const base_tensor &tc1_,
const base_tensor &tc2_)
2362 : t(t_), tc1(tc1_), tc2(tc2_) {}
2368 struct ga_instruction_dotmult :
public ga_instruction {
2370 const base_tensor &tc1, &tc2;
2371 virtual int exec() {
2372 GA_DEBUG_INFO(
"Instruction: componentwise multiplication");
2373 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2374 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2376 base_tensor::iterator it = t.begin();
2378 for (
size_type m = 0; m < s1_1; ++m, ++it)
2379 *it = tc1[m+s1_1*i] * tc2[i];
2382 ga_instruction_dotmult(base_tensor &t_,
2383 const base_tensor &tc1_,
const base_tensor &tc2_)
2384 : t(t_), tc1(tc1_), tc2(tc2_) {}
2387 struct ga_instruction_dotdiv :
public ga_instruction {
2389 const base_tensor &tc1, &tc2;
2390 virtual int exec() {
2391 GA_DEBUG_INFO(
"Instruction: componentwise division");
2392 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2393 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2395 base_tensor::iterator it = t.begin();
2397 for (
size_type m = 0; m < s1_1; ++m, ++it)
2398 *it = tc1[m+s1_1*i] / tc2[i];
2401 ga_instruction_dotdiv(base_tensor &t_,
2402 const base_tensor &tc1_,
const base_tensor &tc2_)
2403 : t(t_), tc1(tc1_), tc2(tc2_) {}
2407 struct ga_instruction_dotmult_spec :
public ga_instruction {
2409 const base_tensor &tc1, &tc2;
2410 virtual int exec() {
2411 GA_DEBUG_INFO(
"Instruction: specific componentwise multiplication");
2412 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2415 base_tensor::iterator it = t.begin();
2418 for (
size_type m = 0; m < s1_1; ++m, ++it)
2419 *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2420 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2423 ga_instruction_dotmult_spec(base_tensor &t_,
2424 const base_tensor &tc1_,
const base_tensor &tc2_)
2425 : t(t_), tc1(tc1_), tc2(tc2_) {}
2429 struct ga_instruction_contract_1_1 :
public ga_instruction {
2431 const base_tensor &tc1;
2433 virtual int exec() {
2434 GA_DEBUG_INFO(
"Instruction: single contraction on a single tensor");
2436 size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2438 base_tensor::iterator it = t.begin();
2441 for (
size_type k = 0; k < ii1; ++k, ++it) {
2442 *it = scalar_type(0);
2443 size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2445 *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2448 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2451 ga_instruction_contract_1_1(base_tensor &t_,
const base_tensor &tc1_,
2453 : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2457 struct ga_instruction_contract_2_1 :
public ga_instruction {
2459 const base_tensor &tc1, &tc2;
2461 virtual int exec() {
2462 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2464 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2465 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2467 base_tensor::iterator it = t.begin();
2473 for (
size_type q = 0; q < ift1; ++q, ++it) {
2474 *it = scalar_type(0);
2475 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2476 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2478 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2481 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2484 ga_instruction_contract_2_1(base_tensor &t_,
2485 const base_tensor &tc1_,
const base_tensor &tc2_,
2488 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2489 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2493 struct ga_instruction_contract_2_1_rev :
public ga_instruction {
2495 const base_tensor &tc1, &tc2;
2497 virtual int exec() {
2498 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2500 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2501 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2503 base_tensor::iterator it = t.begin();
2509 for (
size_type p = 0; p < ift2; ++p, ++it) {
2510 *it = scalar_type(0);
2511 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2512 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2514 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2517 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2520 ga_instruction_contract_2_1_rev(base_tensor &t_,
2521 const base_tensor &tc1_,
const base_tensor &tc2_,
2524 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2525 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2529 struct ga_instruction_contract_2_2 :
public ga_instruction {
2531 const base_tensor &tc1, &tc2;
2532 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2534 virtual int exec() {
2535 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2537 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2538 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2540 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2541 if (inv_tc2) std::swap(sn1, sn2);
2543 base_tensor::iterator it = t.begin();
2551 for (
size_type s = 0; s < ift1; ++s, ++it) {
2552 *it = scalar_type(0);
2554 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2556 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2559 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2560 * tc2[ind2+n1*sn1+n2*sn2];
2563 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2566 ga_instruction_contract_2_2(base_tensor &t_,
2567 const base_tensor &tc1_,
const base_tensor &tc2_,
2572 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2573 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2578 struct ga_instruction_contract_2_2_rev :
public ga_instruction {
2580 const base_tensor &tc1, &tc2;
2581 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2583 virtual int exec() {
2584 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2586 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2587 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2589 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2590 if (inv_tc2) std::swap(sn1, sn2);
2592 base_tensor::iterator it = t.begin();
2600 for (
size_type r = 0; r < ift2; ++r, ++it) {
2601 *it = scalar_type(0);
2603 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2605 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2608 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2609 * tc2[ind2+n1*sn1+n2*sn2];
2612 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2615 ga_instruction_contract_2_2_rev(base_tensor &t_,
2616 const base_tensor &tc1_,
const base_tensor &tc2_,
2621 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2622 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2628 struct ga_instruction_matrix_mult :
public ga_instruction {
2630 const base_tensor &tc1, &tc2;
2632 virtual int exec() {
2633 GA_DEBUG_INFO(
"Instruction: order one contraction "
2634 "(dot product or matrix multiplication)");
2637 #if defined(GA_USES_BLAS)
2639 const BLAS_INT M_=BLAS_INT(M), J_=BLAS_INT(J), K_=BLAS_INT(K);
2640 constexpr
char notrans =
'N';
2641 constexpr scalar_type one(1), zero(0);
2642 gmm::dgemm_(¬rans, ¬rans, &M_, &K_, &J_, &one,
2643 &(tc1[0]), &M_, &(tc2[0]), &J_, &zero, &(t[0]), &M_);
2647 auto it = t.begin();
2648 if (M==2 && J==2 && K == 2) {
2649 *it++ = tc1[0]*tc2[0] + tc1[2]*tc2[1];
2650 *it++ = tc1[1]*tc2[0] + tc1[3]*tc2[1];
2651 *it++ = tc1[0]*tc2[2] + tc1[2]*tc2[3];
2652 *it++ = tc1[1]*tc2[2] + tc1[3]*tc2[3];
2653 }
else if (M==3 && J==3 && K == 3) {
2654 *it++ = tc1[0]*tc2[0] + tc1[3]*tc2[1] + tc1[6]*tc2[2];
2655 *it++ = tc1[1]*tc2[0] + tc1[4]*tc2[1] + tc1[7]*tc2[2];
2656 *it++ = tc1[2]*tc2[0] + tc1[5]*tc2[1] + tc1[8]*tc2[2];
2657 *it++ = tc1[0]*tc2[3] + tc1[3]*tc2[4] + tc1[6]*tc2[5];
2658 *it++ = tc1[1]*tc2[3] + tc1[4]*tc2[4] + tc1[7]*tc2[5];
2659 *it++ = tc1[2]*tc2[3] + tc1[5]*tc2[4] + tc1[8]*tc2[5];
2660 *it++ = tc1[0]*tc2[6] + tc1[3]*tc2[7] + tc1[6]*tc2[8];
2661 *it++ = tc1[1]*tc2[6] + tc1[4]*tc2[7] + tc1[7]*tc2[8];
2662 *it++ = tc1[2]*tc2[6] + tc1[5]*tc2[7] + tc1[8]*tc2[8];
2665 for (
size_type m = 0; m < M; ++m, ++it) {
2666 *it = scalar_type(0);
2668 *it += tc1[m+M*j] * tc2[j+J*k];
2671 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2675 ga_instruction_matrix_mult(base_tensor &t_,
2676 const base_tensor &tc1_,
2678 : t(t_), tc1(tc1_), tc2(tc2_), J(J_) {}
2682 struct ga_instruction_matrix_mult_spec :
public ga_instruction {
2684 const base_tensor &tc1, &tc2;
2687 virtual int exec() {
2688 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2689 "(dot product or matrix multiplication)");
2690 const size_type MI = tc1.size() / J, M = MI / I,
2691 NJ = tc2.size() / K, N = NJ / J;
2692 #if defined(GA_USES_BLAS)
2693 const BLAS_INT J_ = BLAS_INT(J), M_ = BLAS_INT(M), N_ = BLAS_INT(N),
2695 constexpr
char notrans =
'N', trans =
'T';
2696 constexpr scalar_type one(1), zero(0);
2698 auto it = t.begin();
2700 for (
size_type i = 0; i < I; ++i, it += MN)
2701 gmm::dgemm_(¬rans, &trans, &M_, &N_, &J_, &one,
2702 &(tc1[M*i]), &MI_, &(tc2[NJ*k]), &N_, &zero,
2704 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2706 auto it = t.begin();
2710 for (
size_type m = 0; m < M; ++m, ++it) {
2711 *it = scalar_type(0);
2713 *it += tc1[m+M*i+MI*j] * tc2[n+N*j+NJ*k];
2715 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2719 ga_instruction_matrix_mult_spec(base_tensor &t_,
2720 const base_tensor &tc1_,
2721 const base_tensor &tc2_,
2723 : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2727 struct ga_instruction_matrix_mult_spec2 :
public ga_instruction {
2729 const base_tensor &tc1, &tc2;
2732 virtual int exec() {
2733 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2734 "(dot product or matrix multiplication)");
2736 NJ = tc2.size() / K, N = NJ / J;
2737 #if defined(GA_USES_BLAS)
2738 const BLAS_INT J_ = BLAS_INT(J), MI_ = BLAS_INT(MI), N_ = BLAS_INT(N);
2739 constexpr
char notrans =
'N', trans =
'T';
2740 constexpr scalar_type one(1), zero(0);
2742 auto it = t.begin();
2743 for (
size_type k = 0; k < K; ++k, it += NMI)
2744 gmm::dgemm_(¬rans, &trans, &N_, &MI_, &J_, &one,
2745 &(tc2[NJ*k]), &N_, &(tc1[0]), &MI_, &zero,
2747 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2749 auto it = t.begin();
2752 for (
size_type n = 0; n < N; ++n, ++it) {
2753 *it = scalar_type(0);
2755 *it += tc1[mi+MI*j] * tc2[n+N*j+NJ*k];
2757 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2761 ga_instruction_matrix_mult_spec2(base_tensor &t_,
2762 const base_tensor &tc1_,
2763 const base_tensor &tc2_,
2765 : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2769 struct ga_instruction_contraction :
public ga_instruction {
2771 const base_tensor &tc1, &tc2;
2773 virtual int exec() {
2774 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << I);
2777 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error");
2778 #if defined(GA_USES_BLAS)
2780 BLAS_INT N_ = BLAS_INT(N), I_ = BLAS_INT(I), M_ = BLAS_INT(M);
2781 char notrans =
'N', trans =
'T';
2782 static const scalar_type one(1), zero(0);
2783 gmm::dgemm_(¬rans, &trans, &M_, &N_, &I_, &one,
2784 &(tc2[0]), &M_, &(tc1[0]), &N_, &zero, &(t[0]), &M_);
2788 auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
2790 for (
auto it = t.begin(); it != t.end(); ++it) {
2791 reduc_elem_unrolled__<7>(it, it1, it2, N, M);
2792 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2795 for (
auto it = t.begin(); it != t.end(); ++it) {
2796 reduc_elem_unrolled__<8>(it, it1, it2, N, M);
2797 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2800 for (
auto it = t.begin(); it != t.end(); ++it) {
2801 reduc_elem_unrolled__<9>(it, it1, it2, N, M);
2802 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2805 for (
auto it = t.begin(); it != t.end(); ++it) {
2806 reduc_elem_unrolled__<10>(it, it1, it2, N, M);
2807 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2810 for (
auto it = t.begin(); it != t.end(); ++it) {
2811 auto it11 = it1, it22 = it2;
2812 scalar_type a = (*it11) * (*it22);
2814 { it11 += N; it22 += M; a += (*it11) * (*it22); }
2816 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2829 ga_instruction_contraction(base_tensor &t_,
2830 const base_tensor &tc1_,
2832 : t(t_), tc1(tc1_), tc2(tc2_), I(I_) {}
2836 struct ga_instruction_contraction_opt0_2 :
public ga_instruction {
2838 const base_tensor &tc1, &tc2;
2840 virtual int exec() {
2841 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2842 " optimized for vectorized second tensor of type 2");
2843 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2845 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2847 auto it = t.begin();
2848 auto it1 = tc1.cbegin();
2849 for (
size_type i = 0; i < s1; ++i, ++it1) {
2850 auto it2 = tc2.cbegin();
2854 for (
size_type l = 0; l < q; ++l, ++it) {
2856 auto ittt1 = itt1, ittt2 = it2;
2857 *it = *ittt1 * (*ittt2);
2859 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2870 ga_instruction_contraction_opt0_2(base_tensor &t_,
2871 const base_tensor &tc1_,
2872 const base_tensor &tc2_,
2874 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2879 struct ga_instruction_contraction_opt0_2_unrolled :
public ga_instruction {
2881 const base_tensor &tc1, &tc2;
2883 virtual int exec() {
2884 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q <<
2885 " optimized for vectorized second tensor of type 2");
2886 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2888 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2890 auto it = t.begin();
2891 auto it1 = tc1.cbegin();
2892 for (
size_type i = 0; i < s1; ++i, ++it1) {
2893 auto it2 = tc2.cbegin();
2897 for (
size_type l = 0; l < q; ++l, ++it) {
2899 auto ittt1 = itt1, ittt2 = it2;
2900 *it = *ittt1 * (*ittt2);
2902 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2909 ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_,
2910 const base_tensor &tc1_,
2911 const base_tensor &tc2_,
2913 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2917 template <
int N,
int Q>
2918 struct ga_instruction_contraction_opt0_2_dunrolled :
public ga_instruction {
2920 const base_tensor &tc1, &tc2;
2921 virtual int exec() {
2922 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
2923 <<
" optimized for vectorized second tensor of type 2");
2924 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2926 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2928 auto it = t.begin();
2929 auto it1 = tc1.cbegin();
2930 for (
size_type i = 0; i < s1; ++i, ++it1) {
2931 auto it2 = tc2.cbegin();
2935 for (
size_type l = 0; l < Q; ++l, ++it) {
2937 auto ittt1 = itt1, ittt2 = it2;
2938 *it = *ittt1 * (*ittt2);
2940 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2947 ga_instruction_contraction_opt0_2_dunrolled(base_tensor &t_,
2948 const base_tensor &tc1_,
2949 const base_tensor &tc2_)
2950 : t(t_), tc1(tc1_), tc2(tc2_) {}
2954 struct ga_instruction_contraction_opt2_0 :
public ga_instruction {
2956 const base_tensor &tc1, &tc2;
2958 virtual int exec() {
2959 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2960 " optimized for vectorized second tensor of type 2");
2961 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2962 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2963 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2965 auto it = t.begin();
2967 auto it1 = tc1.cbegin() + i*q;
2969 auto it2 = tc2.cbegin() + l*s2;
2970 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2971 auto itt1 = it1, itt2 = it2;
2972 *it = *itt1 * (*itt2);
2974 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2981 ga_instruction_contraction_opt2_0(base_tensor &t_,
2982 const base_tensor &tc1_,
2983 const base_tensor &tc2_,
2985 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2990 struct ga_instruction_contraction_opt2_0_unrolled :
public ga_instruction {
2992 const base_tensor &tc1, &tc2;
2994 virtual int exec() {
2995 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q
2996 <<
" optimized for vectorized second tensor of type 2");
2997 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2998 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2999 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
3001 auto it = t.begin();
3002 auto it1 = tc1.cbegin();
3003 for (
size_type i = 0; i < s1_q; ++i, it1 += q) {
3005 auto it2 = tc2.cbegin() + l*s2;
3006 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
3007 auto itt1 = it1, itt2 = it2;
3008 *it = *itt1 * (*itt2);
3010 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3017 ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_,
3018 const base_tensor &tc1_,
3019 const base_tensor &tc2_,
3021 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
3025 template <
int N,
int Q>
3026 struct ga_instruction_contraction_opt2_0_dunrolled :
public ga_instruction {
3028 const base_tensor &tc1, &tc2;
3029 virtual int exec() {
3030 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
3031 <<
" optimized for vectorized second tensor of type 2");
3032 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
3033 size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
3034 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
3036 auto it = t.begin();
3037 auto it1 = tc1.cbegin();
3038 for (
size_type i = 0; i < s1_q; ++i, it1 += Q) {
3040 auto it2 = tc2.cbegin() + l*s2;
3041 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
3042 auto itt1 = it1, itt2 = it2;
3043 *it = *itt1 * (*itt2);
3045 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3052 ga_instruction_contraction_opt2_0_dunrolled(base_tensor &t_,
3053 const base_tensor &tc1_,
3054 const base_tensor &tc2_)
3055 : t(t_), tc1(tc1_), tc2(tc2_) {}
3059 struct ga_instruction_contraction_opt0_1 :
public ga_instruction {
3061 const base_tensor &tc1, &tc2;
3063 virtual int exec() {
3064 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
3065 " optimized for vectorized second tensor of type 1");
3066 size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
3068 auto it = t.begin();
3069 auto it1 = tc1.cbegin();
3070 for (
size_type i = 0; i < s1; ++i, ++it1) {
3071 auto it2 = tc2.cbegin();
3075 *it++ = (*itt1) * (*it2);
3077 { itt1 += s1; *it++ = (*itt1) * (*it2); }
3082 ga_instruction_contraction_opt0_1(base_tensor &t_,
3083 const base_tensor &tc1_,
3084 const base_tensor &tc2_,
3086 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3089 template<
int N>
inline void reduc_elem_unrolled_opt1_
3090 (
const base_vector::iterator &it,
const base_vector::const_iterator &it1,
3092 it[N-1] = it1[(N-1)*s1] * a;
3093 reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
3095 template<>
inline void reduc_elem_unrolled_opt1_<1>
3096 (
const base_vector::iterator &it,
const base_vector::const_iterator &it1,
3098 { *it = (*it1) * a; }
3102 struct ga_instruction_contraction_opt0_1_unrolled :
public ga_instruction {
3104 const base_tensor &tc1, &tc2;
3105 virtual int exec() {
3106 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N
3107 <<
" optimized for vectorized second tensor of type 1");
3108 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
3109 auto it = t.begin();
3110 auto it1 = tc1.cbegin();
3111 for (
size_type i = 0; i < s1; ++i, ++it1) {
3112 auto it2 = tc2.cbegin(), it2e = it2 + s2;
3113 for (; it2 != it2e; it2 += N, it += N)
3114 reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
3118 ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_,
3119 const base_tensor &tc1_,
3120 const base_tensor &tc2_)
3121 : t(t_), tc1(tc1_), tc2(tc2_) {}
3125 struct ga_instruction_contraction_opt1_1 :
public ga_instruction {
3127 const base_tensor &tc1, &tc2;
3129 virtual int exec() {
3130 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
3131 " optimized for both vectorized tensor of type 1");
3132 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
3133 GA_DEBUG_ASSERT(t.size() == s2*s1,
"Internal error");
3137 auto it2 = tc2.cbegin();
3140 auto it1 = tc1.cbegin();
3141 auto it = t.begin() + j*nn;
3143 if (i) { it1 += nn, it += s2*nn; }
3144 scalar_type a = (*it1) * (*it2);
3146 *itt = a; itt += s2_1; *itt = a;
3147 for (
size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
3152 ga_instruction_contraction_opt1_1(base_tensor &t_,
3153 const base_tensor &tc1_,
3155 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3162 struct ga_instruction_contraction_unrolled
3163 :
public ga_instruction {
3165 const base_tensor &tc1, &tc2;
3166 virtual int exec() {
3167 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << I);
3168 size_type N = tc1.size()/I, M = tc2.size()/I;
3169 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3170 <<
" != " << N <<
"*" << M);
3171 auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
3172 for (
auto it = t.begin(); it != t.end(); ++it) {
3173 reduc_elem_unrolled__<I>(it, it1, it2, N, M);
3174 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
3178 ga_instruction_contraction_unrolled(base_tensor &t_,
3179 const base_tensor &tc1_,
3180 const base_tensor &tc2_)
3181 : t(t_), tc1(tc1_), tc2(tc2_) {}
3186 struct ga_instruction_contraction_unrolled<1> :
public ga_instruction {
3188 const base_tensor &tc1, &tc2;
3189 virtual int exec() {
3190 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size 1");
3191 size_type N = tc1.size(), M = tc2.size();
3192 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3193 <<
" != " << N <<
"*" << M);
3195 base_tensor::iterator it = t.begin();
3196 base_tensor::const_iterator it1 = tc1.cbegin();
3199 for (
size_type n = 0; n < N; ++n, ++it1)
3200 *it++ = tc2[0] * (*it1);
3203 for (
size_type n = 0; n < N; ++n, ++it1) {
3204 base_tensor::const_iterator it2 = tc2.cbegin();
3205 dax__<2>(it, it2, *it1);
3209 for (
size_type n = 0; n < N; ++n, ++it1) {
3210 base_tensor::const_iterator it2 = tc2.cbegin();
3211 dax__<4>(it, it2, *it1);
3215 for (
size_type n = 0; n < N; ++n, ++it1) {
3216 base_tensor::const_iterator it2 = tc2.cbegin();
3217 dax__<4>(it, it2, *it1);
3221 const int M1 = int(M)/4;
3222 const int M2 = int(M) - M1*4;
3223 for (
size_type n = 0; n < N; ++n, ++it1) {
3224 base_tensor::const_iterator it2 = tc2.cbegin();
3225 for (
int mm=0; mm < M1; ++mm)
3226 dax__<4>(it, it2, *it1);
3227 for (
int mm=0; mm < M2; ++mm)
3228 *it++ = (*it2++) * (*it1);
3233 ga_instruction_contraction_unrolled(base_tensor &t_,
3234 const base_tensor &tc1_,
3235 const base_tensor &tc2_)
3236 : t(t_), tc1(tc1_), tc2(tc2_) {}
3239 template<
int N,
int S2>
3240 inline void reduc_elem_d_unrolled__(base_tensor::iterator &it,
3241 base_tensor::const_iterator &it1,
3242 base_tensor::const_iterator &it2,
3244 reduc_elem_unrolled__<N>(it, it1, it2, s1, s2);
3245 reduc_elem_d_unrolled__<N, S2-1>(++it, it1, ++it2, s1, s2);
3250 template<>
inline void reduc_elem_d_unrolled__<1, 0>
3251 (base_tensor::iterator &, base_tensor::const_iterator &,
3253 template<>
inline void reduc_elem_d_unrolled__<2, 0>
3254 (base_tensor::iterator &, base_tensor::const_iterator &,
3256 template<>
inline void reduc_elem_d_unrolled__<3, 0>
3257 (base_tensor::iterator &, base_tensor::const_iterator &,
3259 template<>
inline void reduc_elem_d_unrolled__<4, 0>
3260 (base_tensor::iterator &, base_tensor::const_iterator &,
3262 template<>
inline void reduc_elem_d_unrolled__<5, 0>
3263 (base_tensor::iterator &, base_tensor::const_iterator &,
3265 template<>
inline void reduc_elem_d_unrolled__<6, 0>
3266 (base_tensor::iterator &, base_tensor::const_iterator &,
3268 template<>
inline void reduc_elem_d_unrolled__<7, 0>
3269 (base_tensor::iterator &, base_tensor::const_iterator &,
3271 template<>
inline void reduc_elem_d_unrolled__<8, 0>
3272 (base_tensor::iterator &, base_tensor::const_iterator &,
3274 template<>
inline void reduc_elem_d_unrolled__<9, 0>
3275 (base_tensor::iterator &, base_tensor::const_iterator &,
3277 template<>
inline void reduc_elem_d_unrolled__<10, 0>
3278 (base_tensor::iterator &, base_tensor::const_iterator &,
3280 template<>
inline void reduc_elem_d_unrolled__<11, 0>
3281 (base_tensor::iterator &, base_tensor::const_iterator &,
3283 template<>
inline void reduc_elem_d_unrolled__<12, 0>
3284 (base_tensor::iterator &, base_tensor::const_iterator &,
3286 template<>
inline void reduc_elem_d_unrolled__<13, 0>
3287 (base_tensor::iterator &, base_tensor::const_iterator &,
3289 template<>
inline void reduc_elem_d_unrolled__<14, 0>
3290 (base_tensor::iterator &, base_tensor::const_iterator &,
3292 template<>
inline void reduc_elem_d_unrolled__<15, 0>
3293 (base_tensor::iterator &, base_tensor::const_iterator &,
3295 template<>
inline void reduc_elem_d_unrolled__<16, 0>
3296 (base_tensor::iterator &, base_tensor::const_iterator &,
3301 template<
int I,
int M>
3302 struct ga_ins_red_d_unrolled :
public ga_instruction {
3304 const base_tensor &tc1, &tc2;
3305 virtual int exec() {
3306 GA_DEBUG_INFO(
"Instruction: doubly unrolled contraction operation of size "
3308 size_type N = tc1.size()/I, M_ = tc2.size()/I;
3309 GA_DEBUG_ASSERT(M_ == M,
"Internal error");
3310 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3311 <<
" != " << N <<
"*" << M);
3312 auto it = t.begin();
3313 auto it1 = tc1.cbegin();
3314 for (
size_type n = 0; n < N; ++n, ++it1) {
3315 auto it2 = tc2.cbegin();
3316 reduc_elem_d_unrolled__<I, M>(it, it1, it2, N, M);
3318 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3321 ga_ins_red_d_unrolled(base_tensor &t_,
3322 const base_tensor &tc1_,
const base_tensor &tc2_)
3323 : t(t_), tc1(tc1_), tc2(tc2_) {}
3327 pga_instruction ga_instruction_contraction_switch
3328 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3330 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3332 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3333 tc1_.qdim() == n && tc2_.qdim() == n) {
3335 t_.set_sparsity(10, tc1_.qdim());
3336 return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
3339 if (tc2_.sparsity() == 1) {
3342 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3345 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3348 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3351 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3354 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2,n);
3357 if (tc2_.sparsity() == 2) {
3359 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3366 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3370 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3374 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3377 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3384 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3388 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3392 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3395 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3402 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3406 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3410 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3413 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3417 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3420 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3423 return std::make_shared<ga_instruction_contraction_opt0_2>
3428 if (tc1_.sparsity() == 2) {
3430 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3437 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3441 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3445 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3448 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3455 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3459 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3463 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3466 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3473 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3477 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3481 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3484 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3487 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3490 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3493 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3496 return std::make_shared<ga_instruction_contraction_opt2_0>
3497 (t,tc1,tc2, n1, q1);
3503 case 1 :
return std::make_shared<ga_instruction_contraction_unrolled< 1>>
3505 case 2 :
return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3507 case 3 :
return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3509 case 4 :
return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3511 case 5 :
return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3513 case 6 :
return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3517 default :
return std::make_shared<ga_instruction_contraction>
3522 pga_instruction ga_uniform_instruction_contraction_switch
3523 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3525 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3527 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3528 tc1_.qdim() == n && tc2_.qdim() == n) {
3530 t_.set_sparsity(10, tc1_.qdim());
3531 return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3533 if (tc2_.sparsity() == 1) {
3536 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3539 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3542 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3545 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3548 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3551 if (tc2_.sparsity() == 2) {
3553 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3560 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3564 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3568 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3571 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3578 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3582 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3586 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3589 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3596 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3600 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3604 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3607 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3611 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3614 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3617 return std::make_shared<ga_instruction_contraction_opt0_2>
3622 if (tc1_.sparsity() == 2) {
3624 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3631 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3635 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3639 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3642 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3649 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3653 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3657 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3660 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3667 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3671 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3675 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3678 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3681 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3684 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3687 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3690 return std::make_shared<ga_instruction_contraction_opt2_0>
3691 (t,tc1,tc2, n1, q1);
3701 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3702 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3703 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3704 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3708 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3709 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3710 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3711 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3715 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3716 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3717 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3718 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3722 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3723 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3724 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3725 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3729 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3730 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3731 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3732 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3736 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3737 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3738 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3739 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3743 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3744 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3745 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3746 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3750 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3751 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3752 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3753 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3757 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3758 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3759 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3760 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3764 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3765 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3766 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3767 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3769 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3775 struct ga_instruction_spec_contraction :
public ga_instruction {
3777 const base_tensor &tc1, &tc2;
3779 virtual int exec() {
3780 GA_DEBUG_INFO(
"Instruction: specific contraction operation of "
3782 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3784 base_tensor::iterator it = t.begin();
3787 for (
size_type m = 0; m < s1; ++m, ++it) {
3788 *it = scalar_type(0);
3790 *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3792 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3795 ga_instruction_spec_contraction(base_tensor &t_,
3796 const base_tensor &tc1_,
3798 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3802 struct ga_instruction_spec2_contraction :
public ga_instruction {
3804 const base_tensor &tc1, &tc2;
3806 virtual int exec() {
3807 GA_DEBUG_INFO(
"Instruction: second specific contraction operation of "
3809 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3810 size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3811 base_tensor::iterator it = t.begin();
3815 for (
size_type n = 0; n < s2; ++n, ++it) {
3816 *it = scalar_type(0);
3818 *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3820 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3823 ga_instruction_spec2_contraction(base_tensor &t_,
3824 const base_tensor &tc1_,
3826 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3830 struct ga_instruction_simple_tmult :
public ga_instruction {
3832 const base_tensor &tc1, &tc2;
3833 virtual int exec() {
3834 GA_DEBUG_INFO(
"Instruction: simple tensor product");
3836 GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(),
"Wrong sizes");
3837 base_tensor::const_iterator it2=tc2.cbegin(), it1=tc1.cbegin(), it1end=it1 + s1;
3838 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3839 *it = *(it2) * (*it1);
3840 if (++it1 == it1end) { it1 = tc1.cbegin(), ++it2; }
3844 ga_instruction_simple_tmult(base_tensor &t_,
3845 const base_tensor &tc1_,
const base_tensor &tc2_)
3846 : t(t_), tc1(tc1_), tc2(tc2_) {}
3851 template<
int IJ>
struct ga_instruction_simple_tmult_unrolled
3852 :
public ga_instruction {
3854 const base_tensor &tc1, &tc2;
3855 virtual int exec() {
3857 GA_DEBUG_ASSERT(tc1.size() == IJ,
3858 "Wrong sizes " << tc1.size() <<
" != " << IJ);
3859 GA_DEBUG_INFO(
"Instruction: simple tensor product, unrolled with "
3860 << IJ <<
" operations");
3861 GA_DEBUG_ASSERT(t.size() == IJ * KL,
3862 "Wrong sizes " << t.size() <<
" != " << IJ <<
"*" << KL);
3864 const BLAS_INT IJ_=BLAS_INT(IJ), KL_=BLAS_INT(KL), INC(1);
3865 const scalar_type one(1);
3866 std::fill(t.begin(), t.end(), scalar_type(0));
3867 gmm::dger_(&IJ_, &KL_, &one, &tc1[0], &INC, &tc2[0], &INC, &(t[0]), &IJ_);
3869 base_tensor::iterator it = t.begin();
3870 base_tensor::const_iterator it2 = tc2.cbegin();
3871 for (
size_type kl = 0; kl < KL; ++kl, ++it2) {
3872 base_tensor::const_iterator it1 = tc1.cbegin();
3873 dax__<IJ>(it, it1, *it2);
3875 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3879 ga_instruction_simple_tmult_unrolled(base_tensor &t_,
3880 const base_tensor &tc1_,
3881 const base_tensor &tc2_)
3882 : t(t_), tc1(tc1_), tc2(tc2_) {}
3885 pga_instruction ga_uniform_instruction_simple_tmult
3886 (base_tensor &t,
const base_tensor &tc1,
const base_tensor &tc2) {
3887 switch(tc1.size()) {
3888 case 1 : GMM_ASSERT1(
false,
"size 1 should not happen");
3889 case 2 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3891 case 3 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3893 case 4 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3895 case 5 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3897 case 6 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3899 case 7 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3901 case 8 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3903 case 9 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3905 case 10 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3907 case 11 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3909 case 12 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3911 case 13 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3913 case 14 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3915 case 15 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3917 case 16 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3919 default :
return std::make_shared<ga_instruction_simple_tmult>
3926 struct ga_instruction_spec_tmult :
public ga_instruction {
3928 const base_tensor &tc1, &tc2;
3930 virtual int exec() {
3931 GA_DEBUG_INFO(
"Instruction: specific tensor product");
3932 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3935 auto it = t.begin();
3940 for (
auto it1 = tc1.cbegin(); it1 != tc1.end(); ++it1)
3942 *it++ = (*it1) * tc2[n+N*j];
3948 auto it1 = tc1.cbegin() + M*i;
3949 dax__<2>(it, it1, tc2[n+N*j]);
3956 auto it1 = tc1.cbegin() + M*i;
3957 dax__<3>(it, it1, tc2[n+N*j]);
3964 auto it1 = tc1.cbegin() + M*i;
3965 dax__<4>(it, it1, tc2[n+N*j]);
3972 auto it1 = tc1.cbegin() + M*i;
3973 dax__<5>(it, it1, tc2[n+N*j]);
3980 auto it1 = tc1.cbegin() + M*i;
3981 dax__<6>(it, it1, tc2[n+N*j]);
3988 auto it1 = tc1.cbegin() + M*i;
3989 dax__<7>(it, it1, tc2[n+N*j]);
3996 auto it1 = tc1.cbegin() + M*i;
3997 dax__<8>(it, it1, tc2[n+N*j]);
4001 const int M1 = int(M)/8;
4002 const int M2 = int(M) - M1*8;
4008 auto it1 = tc1.cbegin() + M*i;
4009 for (
int mm=0; mm < M1; ++mm)
4010 dax__<8>(it, it1, tc2[n+N*j]);
4017 auto it1 = tc1.cbegin() + M*i;
4018 for (
int mm=0; mm < M1; ++mm)
4019 dax__<8>(it, it1, tc2[n+N*j]);
4020 dax__<1>(it, it1, tc2[n+N*j]);
4027 auto it1 = tc1.cbegin() + M*i;
4028 for (
int mm=0; mm < M1; ++mm)
4029 dax__<8>(it, it1, tc2[n+N*j]);
4030 dax__<2>(it, it1, tc2[n+N*j]);
4037 auto it1 = tc1.cbegin() + M*i;
4038 for (
int mm=0; mm < M1; ++mm)
4039 dax__<8>(it, it1, tc2[n+N*j]);
4040 dax__<3>(it, it1, tc2[n+N*j]);
4047 auto it1 = tc1.cbegin() + M*i;
4048 for (
int mm=0; mm < M1; ++mm)
4049 dax__<8>(it, it1, tc2[n+N*j]);
4050 dax__<4>(it, it1, tc2[n+N*j]);
4057 auto it1 = tc1.cbegin() + M*i;
4058 for (
int mm=0; mm < M1; ++mm)
4059 dax__<8>(it, it1, tc2[n+N*j]);
4060 dax__<5>(it, it1, tc2[n+N*j]);
4067 auto it1 = tc1.cbegin() + M*i;
4068 for (
int mm=0; mm < M1; ++mm)
4069 dax__<8>(it, it1, tc2[n+N*j]);
4070 dax__<6>(it, it1, tc2[n+N*j]);
4077 auto it1 = tc1.cbegin() + M*i;
4078 for (
int mm=0; mm < M1; ++mm)
4079 dax__<8>(it, it1, tc2[n+N*j]);
4080 dax__<7>(it, it1, tc2[n+N*j]);
4084 GMM_ASSERT1(
false,
"should not happen");
4087 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4093 scalar_type val = tc2[n+N*j];
4095 *it = tc1[m+M*i] * val;
4097 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4101 ga_instruction_spec_tmult(base_tensor &t_,
4102 const base_tensor &tc1_,
4103 const base_tensor &tc2_,
4105 : t(t_), tc1(tc1_), tc2(tc2_), I(I_), J(J_) {}
4109 struct ga_instruction_spec2_tmult :
public ga_instruction {
4111 const base_tensor &tc1, &tc2;
4112 virtual int exec() {
4113 GA_DEBUG_INFO(
"Instruction: second specific tensor product");
4114 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
4116 size_type M = tc2.sizes()[0], J = tc2.size() / M;
4118 base_tensor::iterator it = t.begin();
4122 *it = tc1[i] * tc2[m+M*j];
4123 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4126 ga_instruction_spec2_tmult(base_tensor &t_,
4127 const base_tensor &tc1_,
const base_tensor &tc2_)
4128 : t(t_), tc1(tc1_), tc2(tc2_) {}
4133 struct ga_instruction_simple_c_matrix :
public ga_instruction {
4135 std::vector<scalar_type *> components;
4136 virtual int exec() {
4137 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
4139 GA_DEBUG_ASSERT(t.size() == components.size(),
"Wrong sizes");
4140 for (
size_type i = 0; i < components.size(); ++i)
4141 t[i] = *(components[i]);
4144 ga_instruction_simple_c_matrix(base_tensor &t_,
4145 std::vector<scalar_type *> &components_)
4146 : t(t_), components(components_) {}
4149 struct ga_instruction_c_matrix_with_tests :
public ga_instruction {
4151 const std::vector<const base_tensor *> components;
4152 virtual int exec() {
4153 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
4154 "matrix with tests functions");
4155 size_type s = t.size() / components.size();
4156 GA_DEBUG_ASSERT(s,
"Wrong sizes");
4157 base_tensor::iterator it = t.begin();
4158 for (
size_type i = 0; i < components.size(); ++i) {
4159 const base_tensor &t1 = *(components[i]);
4160 if (t1.size() > 1) {
4161 GA_DEBUG_ASSERT(t1.size() == s,
"Wrong sizes, " << t1.size()
4163 for (
size_type j = 0; j < s; ++j) *it++ = t1[j];
4165 for (
size_type j = 0; j < s; ++j) *it++ = t1[0];
4170 ga_instruction_c_matrix_with_tests
4171 (base_tensor &t_,
const std::vector<const base_tensor *> &components_)
4172 : t(t_), components(components_) {}
4175 struct ga_instruction_eval_func_1arg_1res :
public ga_instruction {
4177 const scalar_type &c;
4178 pscalar_func_onearg f1;
4179 virtual int exec() {
4180 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4181 "predefined function on a scalar");
4185 ga_instruction_eval_func_1arg_1res(scalar_type &t_,
const scalar_type &c_,
4186 pscalar_func_onearg f1_)
4187 : t(t_), c(c_), f1(f1_) {}
4190 struct ga_instruction_eval_func_1arg_1res_expr :
public ga_instruction {
4192 const scalar_type &c;
4193 const ga_predef_function &F;
4194 virtual int exec() {
4195 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4196 "predefined function on a scalar");
4200 ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
4201 const scalar_type &c_,
4202 const ga_predef_function &F_)
4203 : t(t_), c(c_), F(F_) {}
4206 struct ga_instruction_eval_func_1arg :
public ga_instruction {
4208 const base_tensor &tc1;
4209 pscalar_func_onearg f1;
4210 virtual int exec() {
4211 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4212 "predefined function on tensor");
4213 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4214 for (
size_type i = 0; i < t.size(); ++i)
4215 t[i] = (*f1)(tc1[i]);
4218 ga_instruction_eval_func_1arg(base_tensor &t_,
4219 const base_tensor &c_, pscalar_func_onearg f1_)
4220 : t(t_), tc1(c_), f1(f1_) {}
4223 struct ga_instruction_eval_func_1arg_expr :
public ga_instruction {
4225 const base_tensor &tc1;
4226 const ga_predef_function &F;
4227 virtual int exec() {
4228 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4229 "predefined function on tensor");
4230 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4231 for (
size_type i = 0; i < t.size(); ++i)
4235 ga_instruction_eval_func_1arg_expr(base_tensor &t_,
4236 const base_tensor &c_,
4237 const ga_predef_function &F_)
4238 : t(t_), tc1(c_), F(F_) {}
4241 struct ga_instruction_eval_func_2arg_1res :
public ga_instruction {
4243 const scalar_type &c, &d;
4244 pscalar_func_twoargs f2;
4245 virtual int exec() {
4246 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4247 "predefined function on two scalar");
4251 ga_instruction_eval_func_2arg_1res(scalar_type &t_,
const scalar_type &c_,
4252 const scalar_type &d_,
4253 pscalar_func_twoargs f2_)
4254 : t(t_), c(c_), d(d_), f2(f2_) {}
4257 struct ga_instruction_eval_func_2arg_1res_expr :
public ga_instruction {
4259 const scalar_type &c, &d;
4260 const ga_predef_function &F;
4261 virtual int exec() {
4262 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4263 "predefined function on two scalar");
4267 ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
4268 const scalar_type &c_,
4269 const scalar_type &d_,
4270 const ga_predef_function &F_)
4271 : t(t_), c(c_), d(d_), F(F_) {}
4274 struct ga_instruction_eval_func_2arg_first_scalar :
public ga_instruction {
4276 const base_tensor &tc1, &tc2;
4277 pscalar_func_twoargs f2;
4278 virtual int exec() {
4279 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4280 "predefined function on one scalar and one tensor");
4281 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
4282 for (
size_type i = 0; i < t.size(); ++i)
4283 t[i] = (*f2)(tc1[0], tc2[i]);
4286 ga_instruction_eval_func_2arg_first_scalar(base_tensor &t_,
4287 const base_tensor &c_,
4288 const base_tensor &d_,
4289 pscalar_func_twoargs f2_)
4290 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4293 struct ga_instruction_eval_func_2arg_first_scalar_expr
4294 :
public ga_instruction {
4296 const base_tensor &tc1, &tc2;
4297 const ga_predef_function &F;
4298 virtual int exec() {
4299 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4300 "predefined function on one scalar and one tensor");
4301 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
4302 for (
size_type i = 0; i < t.size(); ++i)
4303 t[i] = F(tc1[0], tc2[i]);
4306 ga_instruction_eval_func_2arg_first_scalar_expr(base_tensor &t_,
4307 const base_tensor &c_,
4308 const base_tensor &d_,
4309 const ga_predef_function &F_)
4310 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4313 struct ga_instruction_eval_func_2arg_second_scalar :
public ga_instruction {
4315 const base_tensor &tc1, &tc2;
4316 pscalar_func_twoargs f2;
4317 virtual int exec() {
4318 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4319 "predefined function on one tensor and one scalar");
4320 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4321 for (
size_type i = 0; i < t.size(); ++i)
4322 t[i] = (*f2)(tc1[i], tc2[0]);
4325 ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
4326 const base_tensor &c_,
4327 const base_tensor &d_,
4328 pscalar_func_twoargs f2_)
4329 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4332 struct ga_instruction_eval_func_2arg_second_scalar_expr
4333 :
public ga_instruction {
4335 const base_tensor &tc1, &tc2;
4336 const ga_predef_function &F;
4337 virtual int exec() {
4338 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4339 "predefined function on one tensor and one scalar");
4340 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4341 for (
size_type i = 0; i < t.size(); ++i)
4342 t[i] = F(tc1[i], tc2[0]);
4345 ga_instruction_eval_func_2arg_second_scalar_expr(base_tensor &t_,
4346 const base_tensor &c_,
4347 const base_tensor &d_,
4348 const ga_predef_function &F_)
4349 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4352 struct ga_instruction_eval_func_2arg :
public ga_instruction {
4354 const base_tensor &tc1, &tc2;
4355 pscalar_func_twoargs f2;
4356 virtual int exec() {
4357 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4358 "predefined function on two tensors");
4359 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4361 for (
size_type i = 0; i < t.size(); ++i)
4362 t[i] = (*f2)(tc1[i], tc2[i]);
4365 ga_instruction_eval_func_2arg(base_tensor &t_,
4366 const base_tensor &c_,
4367 const base_tensor &d_, pscalar_func_twoargs f2_)
4368 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4371 struct ga_instruction_eval_func_2arg_expr :
public ga_instruction {
4373 const base_tensor &tc1, &tc2;
4374 const ga_predef_function &F;
4375 virtual int exec() {
4376 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4377 "predefined function on two tensors");
4378 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4380 for (
size_type i = 0; i < t.size(); ++i)
4381 t[i] = F(tc1[i], tc2[i]);
4384 ga_instruction_eval_func_2arg_expr(base_tensor &t_,
4385 const base_tensor &c_,
4386 const base_tensor &d_,
4387 const ga_predef_function &F_)
4388 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4391 struct ga_instruction_eval_OP :
public ga_instruction {
4393 const ga_nonlinear_operator &OP;
4394 ga_nonlinear_operator::arg_list args;
4395 virtual int exec() {
4396 GA_DEBUG_INFO(
"Instruction: operator evaluation");
4400 ga_instruction_eval_OP(base_tensor &t_,
const ga_nonlinear_operator &OP_,
4401 ga_nonlinear_operator::arg_list &args_)
4402 : t(t_), OP(OP_), args(args_) {}
4405 struct ga_instruction_eval_derivative_OP :
public ga_instruction {
4407 const ga_nonlinear_operator &OP;
4408 ga_nonlinear_operator::arg_list args;
4410 virtual int exec() {
4411 GA_DEBUG_INFO(
"Instruction: operator derivative evaluation");
4412 OP.derivative(args, der1, t);
4415 ga_instruction_eval_derivative_OP(base_tensor &t_,
4416 const ga_nonlinear_operator &OP_,
4417 ga_nonlinear_operator::arg_list &args_,
4419 : t(t_), OP(OP_), args(args_), der1(der1_) {}
4422 struct ga_instruction_eval_second_derivative_OP :
public ga_instruction {
4424 const ga_nonlinear_operator &OP;
4425 ga_nonlinear_operator::arg_list args;
4427 virtual int exec() {
4428 GA_DEBUG_INFO(
"Instruction: operator second derivative evaluation");
4429 OP.second_derivative(args, der1, der2, t);
4432 ga_instruction_eval_second_derivative_OP
4433 (base_tensor &t_,
const ga_nonlinear_operator &OP_,
4435 : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
4438 struct ga_instruction_tensor_slice :
public ga_instruction {
4440 const base_tensor &tc1;
4441 bgeot::multi_index mi, indices;
4442 virtual int exec() {
4443 GA_DEBUG_INFO(
"Instruction: tensor slice");
4445 for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
4446 mi3.incrementation(t.sizes())) {
4448 mi[indices[j]] = mi3[j];
4453 ga_instruction_tensor_slice(base_tensor &t_,
4454 const base_tensor &tc1_,
4455 bgeot::multi_index &mi_,
4456 bgeot::multi_index &indices_)
4457 : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
4460 struct ga_instruction_transformation_call :
public ga_instruction {
4461 const ga_workspace &workspace;
4462 ga_instruction_set::interpolate_info &inin;
4463 pinterpolate_transformation trans;
4464 fem_interpolation_context &ctx;
4465 const base_small_vector &Normal;
4469 virtual int exec() {
4470 GA_DEBUG_INFO(
"Instruction: call interpolate transformation");
4474 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
4475 face_num, P_ref, inin.Normal,
4476 inin.derivatives, compute_der);
4479 inin.m->points_of_convex(cv, inin.G);
4480 inin.ctx.change((inin.m)->trans_of_convex(cv),
4481 0, P_ref, inin.G, cv, face_num);
4482 inin.has_ctx =
true;
4485 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4487 inin.Normal.resize(0);
4488 inin.pt_y = inin.ctx.xreal();
4490 inin.ctx.invalid_convex_num();
4491 inin.Normal.resize(0);
4493 inin.has_ctx =
false;
4496 inin.ctx.invalid_convex_num();
4497 inin.Normal.resize(0);
4498 inin.pt_y.resize(0);
4499 inin.has_ctx =
false;
4501 GA_DEBUG_INFO(
"Instruction: end of call interpolate transformation");
4504 ga_instruction_transformation_call
4505 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4506 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4507 const base_small_vector &No,
const mesh &mm,
bool compute_der_)
4508 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4509 compute_der(compute_der_) {}
4512 struct ga_instruction_neighbor_transformation_call :
public ga_instruction {
4513 const ga_workspace &workspace;
4514 ga_instruction_set::interpolate_info &inin;
4515 pinterpolate_transformation trans;
4516 fem_interpolation_context &ctx;
4517 base_small_vector dummy_normal;
4520 papprox_integration &pai;
4522 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
4524 virtual int exec() {
4525 bool cancel_optimization =
false;
4526 GA_DEBUG_INFO(
"Instruction: call interpolate neighbor transformation");
4528 if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
4529 || cancel_optimization) {
4530 inin.ctx.invalid_convex_num();
4535 auto adj_face = m.adjacent_face(cv, f);
4537 GMM_WARNING2(
"Adjacent face not found, "
4538 "probably an non-interior face");
4539 inin.ctx.invalid_convex_num();
4541 gauss_pt_corresp gpc;
4542 gpc.pgt1 = m.trans_of_convex(cv);
4543 gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4545 auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4546 auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4548 auto str1 = gpc.pgt1->structure();
4549 auto str2 = gpc.pgt2->structure();
4550 size_type nbptf1 = str1->nb_points_of_face(f);
4551 size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4552 gpc.nodes.resize(nbptf1*2);
4553 for (
size_type i = 0; i < nbptf1; ++i) {
4554 gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4556 for (
size_type j = 0; j < nbptf2; ++j) {
4557 if (inds_pt2[j] == inds_pt1[i]) {
4558 gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4563 GMM_ASSERT1(found,
"Internal error");
4565 bgeot::pstored_point_tab pspt = 0;
4566 auto itm = neighbor_corresp.find(gpc);
4567 if (itm != neighbor_corresp.end()) {
4570 size_type nbpt = pai->nb_points_on_face(f);
4572 gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4573 size_type first_ind = pai->ind_first_point_on_face(f);
4575 &spt = *(pai->pintegration_points());
4577 m.points_of_convex(cv, G);
4578 fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4579 std::vector<base_node> P_ref(nbpt);
4582 ctx_x.set_xref(spt[first_ind+i]);
4583 bool converged =
true;
4584 gic.
invert(ctx_x.xreal(), P_ref[i], converged);
4585 bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4586 GMM_ASSERT1(is_in && converged,
"Geometric transformation "
4587 "inversion has failed in neighbor transformation");
4589 pspt = store_point_tab(P_ref);
4590 neighbor_corresp[gpc] = pspt;
4592 m.points_of_convex(adj_face.cv, inin.G);
4593 bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4594 inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4599 if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4600 inin.ctx.set_ii(ipt);
4602 inin.has_ctx =
true;
4603 inin.pt_y = inin.ctx.xreal();
4605 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4612 inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4613 &(inin.m), cv, face_num, P_ref,
4614 dummy_normal, inin.derivatives,
4618 inin.m->points_of_convex(cv, inin.G);
4619 inin.ctx.change((inin.m)->trans_of_convex(cv),
4620 0, P_ref, inin.G, cv, face_num);
4621 inin.has_ctx =
true;
4624 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4626 inin.Normal.resize(0);
4627 inin.pt_y = inin.ctx.xreal();
4629 inin.ctx.invalid_convex_num();
4631 inin.has_ctx =
false;
4634 inin.ctx.invalid_convex_num();
4635 inin.Normal.resize(0);
4636 inin.pt_y.resize(0);
4637 inin.has_ctx =
false;
4640 GA_DEBUG_INFO(
"Instruction: end of call neighbor interpolate "
4644 ga_instruction_neighbor_transformation_call
4645 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4646 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4647 const mesh &mm,
size_type &ipt_, papprox_integration &pai_,
4649 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4650 : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4651 ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4652 neighbor_corresp(neighbor_corresp_) {}
4656 struct ga_instruction_scalar_assembly :
public ga_instruction {
4657 const base_tensor &t;
4658 scalar_type &E, &coeff;
4659 virtual int exec() {
4660 GA_DEBUG_INFO(
"Instruction: scalar term assembly");
4664 ga_instruction_scalar_assembly(
const base_tensor &t_, scalar_type &E_,
4665 scalar_type &coeff_)
4666 : t(t_), E(E_), coeff(coeff_) {}
4669 struct ga_instruction_vector_assembly_mf :
public ga_instruction
4671 const base_tensor &t;
4672 base_vector &VI, &Vi;
4673 const fem_interpolation_context &ctx;
4674 const gmm::sub_interval *
const&I, *
const I__;
4675 const mesh_fem *
const&mf, *
const mf__;
4676 const bool &reduced_mf;
4677 const scalar_type &coeff;
4680 const bool interpolate;
4681 virtual int exec() {
4682 GA_DEBUG_INFO(
"Instruction: vector term assembly for fem variable");
4683 bool empty_weight = (coeff == scalar_type(0));
4684 if (ipt == 0 || interpolate) {
4685 if (empty_weight) elem.resize(0);
4686 elem.resize(t.size());
4688 copy_scaled_4(t, coeff, elem);
4689 }
else if (!empty_weight)
4691 add_scaled_4(t, coeff, elem);
4693 if (ipt == nbpt-1 || interpolate) {
4694 GA_DEBUG_ASSERT(mf,
"Internal error");
4695 if (!ctx.is_convex_num_valid())
return 0;
4698 if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4699 base_vector &V = reduced_mf ? Vi : VI;
4700 GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4701 "Bad assembly vector size " << V.size() <<
">=" <<
4702 I->first() <<
"+"<< mf->nb_basic_dof());
4703 auto itr = elem.cbegin();
4704 auto itw = V.begin() + I->first();
4705 for (
const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4707 *(itw+dof+q) += *itr++;
4708 GMM_ASSERT1(itr == elem.end(),
"Internal error");
4713 ga_instruction_vector_assembly_mf
4714 (
const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4715 const fem_interpolation_context &ctx_,
4716 const gmm::sub_interval *&I_,
const mesh_fem *&mf_,
4717 const bool &reduced_mf_,
4720 : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4721 I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4722 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4724 ga_instruction_vector_assembly_mf
4725 (
const base_tensor &t_, base_vector &V_,
4726 const fem_interpolation_context &ctx_,
4727 const gmm::sub_interval &I_,
const mesh_fem &mf_,
4730 : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4731 I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4732 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4734 const bool false_=
false;
4737 struct ga_instruction_vector_assembly_imd :
public ga_instruction {
4738 const base_tensor &t;
4740 const fem_interpolation_context &ctx;
4741 const gmm::sub_interval &I;
4745 const bool initialize;
4746 virtual int exec() {
4747 GA_DEBUG_INFO(
"Instruction: vector term assembly for im_data variable");
4749 size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4750 GMM_ASSERT1(i+t.size() <= I.size(),
4751 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4752 auto itw = V.begin() + I.first() + i;
4754 for (
const auto &val : t.as_vector())
4757 for (
const auto &val : t.as_vector())
4758 *itw++ += coeff*val;
4761 ga_instruction_vector_assembly_imd
4762 (
const base_tensor &t_, base_vector &V_,
4763 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4764 const im_data &imd_, scalar_type &coeff_,
const size_type &ipt_,
4765 bool initialize_=
false)
4766 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4767 initialize(initialize_)
4771 struct ga_instruction_vector_assembly :
public ga_instruction {
4772 const base_tensor &t;
4774 const gmm::sub_interval &I;
4776 virtual int exec() {
4777 GA_DEBUG_INFO(
"Instruction: vector term assembly for "
4778 "fixed size variable");
4779 gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4782 ga_instruction_vector_assembly(
const base_tensor &t_, base_vector &V_,
4783 const gmm::sub_interval &I_,
4784 scalar_type &coeff_)
4785 : t(t_), V(V_), I(I_), coeff(coeff_) {}
4788 struct ga_instruction_assignment :
public ga_instruction {
4789 const base_tensor &t;
4791 const fem_interpolation_context &ctx;
4793 virtual int exec() {
4794 GA_DEBUG_INFO(
"Instruction: Assignement to im_data");
4795 imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4798 ga_instruction_assignment(
const base_tensor &t_, base_vector &V_,
4799 const fem_interpolation_context &ctx_,
4800 const im_data *imd_)
4801 : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4804 struct ga_instruction_extract_residual_on_imd_dofs :
public ga_instruction {
4806 const base_vector &V;
4807 const fem_interpolation_context &ctx;
4808 const gmm::sub_interval &I;
4811 virtual int exec() {
4812 GA_DEBUG_INFO(
"Instruction: extract residual for im_data variable");
4815 size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4816 GMM_ASSERT1(i+t.size() <= I.size(),
4817 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4818 for (
auto &&val : t.as_vector())
4819 val = V[ifirst+(i++)];
4822 ga_instruction_extract_residual_on_imd_dofs
4823 (base_tensor &t_,
const base_vector &V_,
4824 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4825 const im_data &imd_,
const size_type &ipt_)
4826 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4831 template <
class MAT>
4832 inline void add_elem_matrix
4833 (MAT &K,
const std::vector<size_type> &dofs1,
4834 const std::vector<size_type> &dofs2, std::vector<size_type> &,
4835 const base_vector &elem, scalar_type threshold,
size_type ) {
4837 base_vector::const_iterator it = elem.cbegin();
4840 if (gmm::abs(*it) > threshold)
4841 K(dof1, dof2) += *it;
4853 inline void add_elem_matrix
4855 const std::vector<size_type> &dofs1,
const std::vector<size_type> &dofs2,
4856 std::vector<size_type> &dofs1_sort,
4857 const base_vector &elem, scalar_type threshold,
size_type N) {
4861 dofs1_sort.resize(s1);
4864 while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4865 { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4874 gmm::elt_rsvector_<scalar_type> ev;
4877 base_vector::const_iterator it = elem.cbegin();
4880 if (first) first =
false;
4882 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4886 col.reserve(maxest);
4889 if (gmm::abs(ev.e) > threshold) {
4898 if (gmm::abs(ev.e) > threshold) {
4905 if (col[l].c < ev.c) {
4913 auto itc = col.begin() + ind;
4914 if (ind != nb && itc->c == ev.c)
4917 if (nb - ind > 1300)
4918 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4919 << col.size() - ind <<
" non-zero entries");
4922 itc = col.begin() + ind;
4923 auto ite = col.end();
4926 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4939 inline void add_elem_matrix_contiguous_rows
4942 const std::vector<size_type> &dofs2,
4943 const base_vector &elem, scalar_type threshold) {
4945 gmm::elt_rsvector_<scalar_type> ev;
4947 base_vector::const_iterator it = elem.cbegin();
4950 if (first) first =
false;
4952 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4959 if (gmm::abs(ev.e) > threshold) {
4968 if (gmm::abs(ev.e) > threshold) {
4975 if (col[l].c < ev.c) {
4983 auto itc = col.begin() + ind;
4984 if (ind != nb && itc->c == ev.c)
4987 if (nb - ind > 1300)
4988 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4989 << col.size() - ind <<
" non-zero entries");
4992 itc = col.begin() + ind;
4993 auto ite = col.end();
4996 for (; ite != itc; --ite) { --itee; *ite = *itee; }
5008 inline void populate_dofs_vector
5009 (std::vector<size_type> &dofs,
5011 const getfem::mesh::ind_set &mfdofs)
5013 dofs.assign(size, ifirst);
5014 auto itd = dofs.begin();
5016 for (
const auto &dof : mfdofs) *itd++ += dof;
5018 for (
const auto &dof : mfdofs)
5019 for (
size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
5022 inline void populate_dofs_vector
5024 const getfem::mesh::ind_set &mfdofs)
5026 dofs.assign(size, ifirst);
5027 auto itd = dofs.begin();
5028 for (
const auto &dof : mfdofs) *itd++ += dof;
5032 inline void populate_contiguous_dofs_vector
5035 dofs.assign(size, ifirst);
5036 for (
size_type i=0; i < size; ++i) dofs[i] += i;
5039 struct ga_instruction_matrix_assembly_base :
public ga_instruction {
5040 const base_tensor &t;
5041 const fem_interpolation_context &ctx1, &ctx2;
5042 const scalar_type &alpha1, &alpha2, &coeff;
5046 std::vector<size_type> dofs1, dofs2, dofs1_sort;
5047 void add_tensor_to_element_matrix(
bool initialize,
bool empty_weight) {
5049 if (empty_weight) elem.resize(0);
5050 elem.resize(t.size());
5052 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5053 }
else if (!empty_weight)
5056 add_scaled_4(t, coeff*alpha1*alpha2, elem);
5058 ga_instruction_matrix_assembly_base
5059 (
const base_tensor &t_,
5060 const fem_interpolation_context &ctx1_,
5061 const fem_interpolation_context &ctx2_,
5062 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5064 : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
5065 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
5066 dofs1(0), dofs2(0), dofs1_sort(0)
5069 const bool false_=
false;
5074 struct ga_instruction_matrix_assembly_mf_mf
5075 :
public ga_instruction_matrix_assembly_base
5077 model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
5078 const gmm::sub_interval *
const&I1, *
const&I2, *
const I1__, *
const I2__;
5079 const mesh_fem *
const&mf1, *
const&mf2, *
const mf1__, *
const mf2__;
5080 const bool &reduced_mf1, &reduced_mf2;
5081 virtual int exec() {
5082 GA_DEBUG_INFO(
"Instruction: matrix term assembly mf-mf");
5083 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5085 bool initialize = (ipt == 0 || interpolate);
5086 bool empty_weight = (coeff == scalar_type(0));
5087 add_tensor_to_element_matrix(initialize, empty_weight);
5089 if (ipt == nbpt-1 || interpolate) {
5090 model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
5091 : (reduced_mf2 ? Kru : Krr);
5092 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5095 if (ninf == scalar_type(0))
return 0;
5097 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5098 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5099 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5103 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5104 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
5105 mf1->ind_scalar_basic_dof_of_element(cv1));
5106 if (mf1 == mf2 && cv1 == cv2) {
5107 if (ifirst1 == ifirst2) {
5108 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5110 populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
5111 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5114 N = std::max(N, ctx2.N());
5116 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5117 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
5118 mf2->ind_scalar_basic_dof_of_element(cv2));
5119 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5125 ga_instruction_matrix_assembly_mf_mf
5126 (
const base_tensor &t_,
5127 model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
5128 model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
5129 const fem_interpolation_context &ctx1_,
5130 const fem_interpolation_context &ctx2_,
5131 const ga_instruction_set::variable_group_info &vgi1,
5132 const ga_instruction_set::variable_group_info &vgi2,
5135 : ga_instruction_matrix_assembly_base
5136 (t_, ctx1_, ctx2_, vgi1.
alpha, vgi2.
alpha, coeff_, nbpt_, ipt_,
5138 Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
5139 I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
5140 mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
5141 reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
5143 ga_instruction_matrix_assembly_mf_mf
5144 (
const base_tensor &t_,
5145 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5146 const fem_interpolation_context &ctx1_,
5147 const fem_interpolation_context &ctx2_,
5148 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5149 const ga_instruction_set::variable_group_info &vgi2,
5152 : ga_instruction_matrix_assembly_base
5153 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, nbpt_, ipt_, interpolate_),
5154 Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
5155 I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
5156 mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
5157 reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
5159 ga_instruction_matrix_assembly_mf_mf
5160 (
const base_tensor &t_,
5161 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5162 const fem_interpolation_context &ctx1_,
5163 const fem_interpolation_context &ctx2_,
5164 const ga_instruction_set::variable_group_info &vgi1,
5165 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5168 : ga_instruction_matrix_assembly_base
5169 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
5170 Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
5171 I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
5172 mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
5173 reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
5175 ga_instruction_matrix_assembly_mf_mf
5176 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5177 const fem_interpolation_context &ctx1_,
5178 const fem_interpolation_context &ctx2_,
5179 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5180 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5183 : ga_instruction_matrix_assembly_base
5184 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
5185 Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
5186 I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
5187 mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
5188 reduced_mf1(false_), reduced_mf2(false_) {}
5192 struct ga_instruction_matrix_assembly_imd_mf
5193 :
public ga_instruction_matrix_assembly_base
5195 model_real_sparse_matrix &Kxr, &Kxu;
5196 const gmm::sub_interval *I1, *I2__, *
const &I2;
5197 const im_data *imd1;
5198 const mesh_fem *
const mf2__, *
const &mf2;
5199 const bool &reduced_mf2;
5200 virtual int exec() {
5201 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5202 "(imdata or fixed size)-mf");
5203 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5205 bool empty_weight = (coeff == scalar_type(0));
5206 add_tensor_to_element_matrix(
true, empty_weight);
5209 if (ninf == scalar_type(0))
return 0;
5211 model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
5212 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5213 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5214 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5215 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5216 if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ctx1.ii());
5218 populate_contiguous_dofs_vector(dofs1, s1, ifirst1);
5220 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5221 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
5222 mf2->ind_scalar_basic_dof_of_element(cv2));
5223 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
5227 ga_instruction_matrix_assembly_imd_mf
5228 (
const base_tensor &t_,
5229 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5230 const fem_interpolation_context &ctx1_,
5231 const fem_interpolation_context &ctx2_,
5232 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5233 const ga_instruction_set::variable_group_info &vgi2,
5234 const scalar_type &coeff_,
const size_type &ipt_)
5235 : ga_instruction_matrix_assembly_base
5236 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, zero_, ipt_, false),
5237 Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
5238 imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
5241 ga_instruction_matrix_assembly_imd_mf
5242 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5243 const fem_interpolation_context &ctx1_,
5244 const fem_interpolation_context &ctx2_,
5245 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5246 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5247 const scalar_type &coeff_,
const size_type &ipt_)
5248 : ga_instruction_matrix_assembly_base
5249 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5250 Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
5251 imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
5254 struct ga_instruction_matrix_assembly_mf_imd
5255 :
public ga_instruction_matrix_assembly_base
5257 model_real_sparse_matrix &Krx, &Kux;
5258 const gmm::sub_interval *
const &I1, *
const I1__, *I2;
5259 const mesh_fem *
const &mf1, *
const mf1__;
5260 const bool &reduced_mf1;
5261 const im_data *imd2;
5262 virtual int exec() {
5263 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5264 "mf-(imdata or fixed size)");
5265 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5267 bool empty_weight = (coeff == scalar_type(0));
5268 add_tensor_to_element_matrix(
true, empty_weight);
5271 if (ninf == scalar_type(0))
return 0;
5273 model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
5274 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5275 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5276 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5277 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5278 if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ctx2.ii());
5281 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5282 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
5283 mf1->ind_scalar_basic_dof_of_element(cv1));
5284 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5285 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
5289 ga_instruction_matrix_assembly_mf_imd
5290 (
const base_tensor &t_,
5291 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5292 const fem_interpolation_context &ctx1_,
5293 const fem_interpolation_context &ctx2_,
5294 const ga_instruction_set::variable_group_info &vgi1,
5295 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5296 const scalar_type &coeff_,
const size_type &ipt_)
5297 : ga_instruction_matrix_assembly_base
5298 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, zero_, ipt_, false),
5299 Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
5300 mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
5303 ga_instruction_matrix_assembly_mf_imd
5304 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5305 const fem_interpolation_context &ctx1_,
5306 const fem_interpolation_context &ctx2_,
5307 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5308 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5309 const scalar_type &coeff_,
const size_type &ipt_)
5310 : ga_instruction_matrix_assembly_base
5311 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5312 Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
5313 mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
5318 struct ga_instruction_matrix_assembly_imd_imd
5319 :
public ga_instruction_matrix_assembly_base
5321 model_real_sparse_matrix &K;
5322 const gmm::sub_interval &I1, &I2;
5323 const im_data *imd1, *imd2;
5324 virtual int exec() {
5325 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5326 "(imdata or fixed size)-(imdata or fixed size)");
5327 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5329 bool empty_weight = (coeff == scalar_type(0));
5330 add_tensor_to_element_matrix(
true, empty_weight);
5333 if (ninf == scalar_type(0))
return 0;
5335 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5336 size_type ifirst1 = I1.first(), ifirst2 = I2.first();
5338 ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ctx1.ii());
5340 ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ctx2.ii());
5342 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5343 add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
5346 ga_instruction_matrix_assembly_imd_imd
5347 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5348 const fem_interpolation_context &ctx1_,
5349 const fem_interpolation_context &ctx2_,
5350 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5351 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5352 const scalar_type &coeff_,
const size_type &ipt_)
5353 : ga_instruction_matrix_assembly_base
5354 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5355 K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
5359 struct ga_instruction_matrix_assembly_standard_scalar
5360 :
public ga_instruction_matrix_assembly_base
5362 model_real_sparse_matrix &K;
5363 const gmm::sub_interval &I1, &I2;
5364 const mesh_fem *pmf1, *pmf2;
5365 virtual int exec() {
5366 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5369 elem.resize(t.size());
5371 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5375 add_scaled_4(t, coeff*alpha1*alpha2, elem);
5377 if (ipt == nbpt-1) {
5378 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5381 if (ninf == scalar_type(0))
return 0;
5383 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
5385 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
5386 GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0],
"Internal error");
5387 populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
5389 if (pmf2 == pmf1 && cv1 == cv2) {
5390 if (I1.first() == I2.first()) {
5391 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5393 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5395 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5399 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
5400 GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1],
"Internal error");
5401 populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
5402 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5407 ga_instruction_matrix_assembly_standard_scalar
5408 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5409 const fem_interpolation_context &ctx1_,
5410 const fem_interpolation_context &ctx2_,
5411 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
5412 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5413 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5415 : ga_instruction_matrix_assembly_base
5416 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5417 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5420 struct ga_instruction_matrix_assembly_standard_vector
5421 :
public ga_instruction_matrix_assembly_base
5423 model_real_sparse_matrix &K;
5424 const gmm::sub_interval &I1, &I2;
5425 const mesh_fem *pmf1, *pmf2;
5426 virtual int exec() {
5427 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5430 elem.resize(t.size());
5431 copy_scaled_8(t, coeff*alpha1*alpha2, elem);
5436 add_scaled_8(t, coeff*alpha1*alpha2, elem);
5438 if (ipt == nbpt-1) {
5439 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5442 if (ninf == scalar_type(0))
return 0;
5443 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
5445 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5448 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
5449 populate_dofs_vector(dofs1, s1, I1.first(), qmult1,
5450 pmf1->ind_scalar_basic_dof_of_element(cv1));
5452 if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
5453 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5455 if (pmf2 == pmf1 && cv1 == cv2) {
5456 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5461 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
5462 populate_dofs_vector(dofs2, s2, I2.first(), qmult2,
5463 pmf2->ind_scalar_basic_dof_of_element(cv2));
5465 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5470 ga_instruction_matrix_assembly_standard_vector
5471 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5472 const fem_interpolation_context &ctx1_,
5473 const fem_interpolation_context &ctx2_,
5474 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
5475 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5476 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5478 : ga_instruction_matrix_assembly_base
5479 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5480 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5484 struct ga_instruction_matrix_assembly_standard_vector_opt10
5485 :
public ga_instruction_matrix_assembly_base
5487 model_real_sparse_matrix &K;
5488 const gmm::sub_interval &I1, &I2;
5489 const mesh_fem *pmf1, *pmf2;
5490 virtual int exec() {
5491 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5492 "vector fems optimized for format 10 qdim " << QQ);
5494 size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
5495 scalar_type e = coeff*alpha1*alpha2;
5497 elem.resize(ss1*ss2);
5498 auto itel = elem.begin();
5500 auto it = t.begin() + j*s1_q;
5501 for (
size_type i = 0; i < ss1; ++i, it += QQ)
5502 *itel++ = (*it) * e;
5505 auto itel = elem.begin();
5507 auto it = t.begin() + j*s1_q;
5508 for (
size_type i = 0; i < ss1; ++i, it += QQ)
5509 *itel++ += (*it) * e;
5512 if (ipt == nbpt-1) {
5513 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5516 if (ninf == scalar_type(0))
return 0;
5518 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5519 size_type i1 = I1.first(), i2 = I2.first();
5521 populate_dofs_vector(dofs1, ss1, i1,
5522 pmf1->ind_scalar_basic_dof_of_element(cv1));
5523 bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
5527 populate_dofs_vector(dofs2, ss2, i2,
5528 pmf2->ind_scalar_basic_dof_of_element(cv2));
5530 std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
5531 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5532 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5533 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5534 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5536 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5537 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5538 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5544 ga_instruction_matrix_assembly_standard_vector_opt10
5545 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
5546 const fem_interpolation_context &ctx1_,
5547 const fem_interpolation_context &ctx2_,
5548 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
5549 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5550 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5552 : ga_instruction_matrix_assembly_base
5553 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5554 K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5556 static_assert(QQ >= 2 && QQ <=3,
5557 "Template implemented only for QQ=2 and QQ=3");
5562 struct ga_instruction_condensation_sub :
public ga_instruction {
5565 gmm::dense_matrix<base_tensor *> KQJprime;
5566 std::vector<base_tensor *> RQprime;
5567 gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5568 base_tensor invKqqqq, Kqqjj;
5570 std::vector<std::array<size_type,3>> partQ, partJ;
5571 const scalar_type &coeff;
5572 virtual int exec() {
5573 GA_DEBUG_INFO(
"Instruction: variable cluster subdiagonal condensation");
5575 for (
const auto &qqq1 : partQ) {
5576 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5577 for (
const auto &qqq2 : partQ) {
5578 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5579 if (KQQloc(q1,q2)) {
5580 auto itr = KQQloc(q1,q2)->cbegin();
5581 GMM_ASSERT1(KQQloc(q1,q2)->size()
5582 == (qq1end-qq1start)*(qq2end-qq2start),
5584 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5585 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5586 invKqqqq(qq1,qq2) = *itr++;
5591 bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5595 for (
auto &&jjj : partJ) {
5598 for (
const auto &qqq : partQ) {
5602 GMM_ASSERT1(new_j == KQJloc(q,j)->size(1),
"Internal error");
5604 new_j = KQJloc(q,j)->size(1);
5608 for (
const auto &qqq : partQ) {
5610 KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5617 Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5623 for (
const auto &jjj : partJ) {
5624 size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5625 for (
const auto &qqq2 : partQ) {
5626 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5628 auto itr = KQJloc(q2,j)->begin();
5629 for (
size_type jj=jjstart; jj < jjend; ++jj) {
5630 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5631 for (
size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5632 Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5637 GMM_ASSERT1(itr == KQJloc(q2,j)->cend(),
"Internal error");
5641 for (
const auto &qqq2 : partQ) {
5642 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5644 auto itr = RQprime[q2]->cbegin();
5645 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5646 for (
size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5647 Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5649 GMM_ASSERT1(itr == RQprime[q2]->cend(),
"Internal error");
5655 for (
const auto &qqq1 : partQ) {
5656 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5658 auto itw = RQprime[q1]->begin();
5659 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5660 *itw++ = Rqq[qq1]/coeff;
5662 for (
const auto &jjj2 : partJ) {
5663 size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5664 auto itw = KQJprime(q1,j2)->begin();
5665 for (
size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5666 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5667 *itw++ = Kqqjj(qq1,jj2);
5673 ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5674 std::vector<base_tensor *> &RQpr,
5675 const gmm::dense_matrix<base_tensor *> &KQQ,
5676 const gmm::dense_matrix<base_tensor *> &KQJ,
5677 const std::set<size_type> &Qset,
5678 const scalar_type &coeff_)
5679 : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5682 KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5683 KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5684 for (
size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5685 for (
size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5687 for (
size_type j=0; j < KQJ.ncols(); ++j)
5690 partJ.push_back(std::array<size_type,3>{j,0,0});
5696 partQ.push_back(std::array<size_type,3>{q,0,0});
5698 for (
auto &qqq1 : partQ) {
5703 GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5704 new_q == KQQ(q2,q1)->size(1),
"Internal error");
5706 new_q = KQQ(q1,q2)->size(0);
5711 invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5712 Rqq.resize(partQ.back()[2]);
5718 struct ga_instruction_condensation_super_K :
public ga_instruction {
5720 std::vector<base_tensor *> KiQ, KQj;
5723 virtual int exec() {
5724 GA_DEBUG_INFO(
"Instruction: contribution of condensation to kept part");
5728 Kij.adjust_sizes(m,n);
5731 const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5733 GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5736 base_tensor::iterator it = Kij.begin();
5738 for (
size_type ii = 0; ii < m; ++ii, ++it)
5739 for (
size_type qq = 0; qq < qqsize; ++qq)
5740 *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5741 GA_DEBUG_ASSERT(it == Kij.end(),
"Wrong sizes");
5745 ga_instruction_condensation_super_K(base_tensor &Kij_,
5746 const std::vector<base_tensor *> KiQ_,
5747 const std::vector<base_tensor *> KQj_)
5748 : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5751 GMM_ASSERT1(KiQ.size() == KQj.size(),
"Internal error");
5755 struct ga_instruction_condensation_super_R :
public ga_instruction {
5757 std::vector<base_tensor *> KiQ, RQpr;
5760 virtual int exec() {
5761 GA_DEBUG_INFO(
"Instruction: contribution of condensation to primary rhs");
5767 const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5769 GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize,
"Internal error");
5770 base_tensor::iterator it = Ri.begin();
5771 for (
size_type ii = 0; ii < m; ++ii, ++it)
5772 for (
size_type qq = 0; qq < qqsize; ++qq)
5773 *it -= K1[ii+qq*m] * R2[qq];
5774 GA_DEBUG_ASSERT(it == Ri.end(),
"Wrong sizes");
5778 ga_instruction_condensation_super_R(base_tensor &Ri_,
5779 const std::vector<base_tensor *> KiQ_,
5780 const std::vector<base_tensor *> RQpr_)
5781 : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5784 GMM_ASSERT1(KiQ.size() == RQpr.size(),
"Internal error");
5792 static void extend_variable_in_gis(
const ga_workspace &workspace,
5793 const std::string &varname,
5794 ga_instruction_set &gis) {
5795 if (workspace.variable_group_exists(varname)) {
5796 for (
const std::string &v : workspace.variable_group(varname))
5797 extend_variable_in_gis(workspace, v, gis);
5798 }
else if (gis.extended_vars.count(varname) == 0) {
5799 const mesh_fem *mf = workspace.associated_mf(varname);
5800 if (mf->is_reduced()) {
5801 auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5802 base_vector &U = gis.really_extended_vars[varname];
5804 mf->extend_vector(workspace.value(varname), U);
5805 gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5807 gis.extended_vars[varname] = &(workspace.value(varname));
5812 static void ga_clear_node_list
5813 (pga_tree_node pnode, std::map<scalar_type,
5814 std::list<pga_tree_node> > &node_list) {
5815 std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5816 for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5817 it != loc_node_list.end(); ) {
5818 if (*it == pnode) it = loc_node_list.erase(it);
else ++it;
5820 for (
size_type i = 0; i < pnode->children.size(); ++i)
5821 ga_clear_node_list(pnode->children[i], node_list);
5826 static void ga_compile_node(
const pga_tree_node pnode,
5827 ga_workspace &workspace,
5828 ga_instruction_set &gis,
5829 ga_instruction_set::region_mim_instructions &rmi,
5830 const mesh &m,
bool function_case,
5831 ga_if_hierarchy &if_hierarchy) {
5833 if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5834 pnode->node_type == GA_NODE_OPERATOR ||
5835 pnode->node_type == GA_NODE_SPEC_FUNC ||
5836 pnode->node_type == GA_NODE_CONSTANT ||
5837 pnode->node_type == GA_NODE_ALLINDICES ||
5838 pnode->node_type == GA_NODE_RESHAPE ||
5839 pnode->node_type == GA_NODE_SWAP_IND ||
5840 pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5841 pnode->node_type == GA_NODE_CONTRACT)
return;
5845 pga_instruction pgai;
5846 ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5847 ga_if_hierarchy new_if_hierarchy;
5849 const mesh_fem *mf1 = 0, *mf2 = 0;
5850 const mesh_fem **mfg1 = 0, **mfg2 = 0;
5851 fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5852 bool tensor_to_clear =
false;
5853 bool tensor_to_adapt =
false;
5855 if (pnode->test_function_type) {
5856 if (pnode->name_test1.size())
5857 mf1 = workspace.associated_mf(pnode->name_test1);
5860 const std::string &intn1 = pnode->interpolate_name_test1;
5862 if (workspace.secondary_domain_exists(intn1)) {
5863 pctx1 = &(rmi.secondary_domain_infos.ctx);
5865 tensor_to_adapt =
true;
5866 pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5867 if (workspace.variable_group_exists(pnode->name_test1)) {
5868 ga_instruction_set::variable_group_info &vgi =
5869 rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5876 if (pnode->name_test2.size())
5877 mf2 = workspace.associated_mf(pnode->name_test2);
5880 const std::string &intn2 = pnode->interpolate_name_test2;
5882 if (workspace.secondary_domain_exists(intn2)) {
5883 pctx2 = &(rmi.secondary_domain_infos.ctx);
5885 tensor_to_adapt =
true;
5886 pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5887 if (workspace.variable_group_exists(pnode->name_test2)) {
5888 ga_instruction_set::variable_group_info &vgi =
5889 rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5900 pnode->t.set_to_original();
5901 pnode->t.set_sparsity(0, 0);
5902 bool is_uniform =
false;
5903 if (pnode->test_function_type == 1) {
5905 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5906 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5907 if (mf1 && mf1->is_uniform())
5908 { is_uniform =
true; pctx1->invalid_convex_num(); }
5909 }
else if (pnode->test_function_type == 2) {
5911 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5912 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5913 if (mf2 && mf2->is_uniform())
5914 { is_uniform =
true; pctx2->invalid_convex_num(); }
5915 }
else if (pnode->test_function_type == 3) {
5916 if ((mf1 || mfg1) && (mf2 || mfg2)) {
5917 pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5918 (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5919 pnode->qdim2, mf2, mfg2);
5920 if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5922 pctx1->invalid_convex_num();
5923 pctx2->invalid_convex_num();
5925 }
else if (mf1 || mfg1) {
5926 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5927 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5928 if (mf1 && mf1->is_uniform())
5929 { is_uniform =
true; pctx1->invalid_convex_num(); }
5930 }
else if (mf2 || mfg2) {
5931 pgai = std::make_shared<ga_instruction_second_ind_tensor>
5932 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5933 if (mf2 && mf2->is_uniform())
5934 { is_uniform =
true; pctx2->invalid_convex_num(); }
5939 pnode->t.set_to_original();
5940 if (rmi.node_list.count(pnode->hash_value) != 0) {
5941 for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5945 if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5946 pnode->t.set_to_copy(pnode1->t);
5949 if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5951 if (pnode->nb_test_functions() == 2) {
5955 else { rmi.instructions.push_back(std::move(pgai)); }
5957 pgai = std::make_shared<ga_instruction_transpose_test>
5958 (pnode->tensor(), pnode1->tensor());
5959 rmi.instructions.push_back(std::move(pgai));
5961 pnode->t.set_to_copy(pnode1->t);
5966 std::stringstream ss;
5967 ss <<
"Detected wrong equivalent nodes:" << endl;
5968 ga_print_node(pnode, ss);
5969 ss << endl <<
" and " << endl;
5970 ga_print_node(pnode1, ss);
5971 ss << endl <<
"No problem, but hash values could be adapted." << endl;
5972 GMM_TRACE2(ss.str());
5977 if (is_uniform) { pgai->exec(); }
5979 if (tensor_to_adapt)
5980 rmi.instructions.push_back(std::move(pgai));
5982 rmi.elt_instructions.push_back(std::move(pgai));
5986 size_type interpolate_filter_inst = rmi.instructions.size();
5987 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5988 pgai = pga_instruction();
5989 rmi.instructions.push_back(std::move(pgai));
5990 if_hierarchy.increment();
5991 new_if_hierarchy.child_of(if_hierarchy);
5992 pif_hierarchy = &new_if_hierarchy;
5995 for (
size_type i = 0; i < pnode->children.size(); ++i)
5996 ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5997 function_case, *pif_hierarchy);
5999 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
6000 const std::string &intn = pnode->interpolate_name;
6001 ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
6002 pgai = std::make_shared<ga_instruction_interpolate_filter>
6003 (pnode->tensor(), inin, pnode->nbc1,
6004 int(rmi.instructions.size() - interpolate_filter_inst));
6005 rmi.instructions[interpolate_filter_inst].swap(pgai);
6006 pgai = std::make_shared<ga_instruction_copy_tensor>
6007 (pnode->tensor(), pnode->children[0]->tensor());
6008 rmi.instructions.push_back(std::move(pgai));
6009 ga_clear_node_list(pnode->children[0], rmi.node_list);
6012 static scalar_type minus = -scalar_type(1);
6013 size_type nbch = pnode->children.size();
6014 pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
6015 pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
6016 bgeot::multi_index mi;
6017 const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
6019 size_type dim0 = child0 ? child0->tensor_order() : 0;
6020 size_type dim1 = child1 ? child1->tensor_order() : 0;
6022 switch (pnode->node_type) {
6024 case GA_NODE_PREDEF_FUNC:
case GA_NODE_OPERATOR:
case GA_NODE_SPEC_FUNC:
6025 case GA_NODE_CONSTANT:
case GA_NODE_ALLINDICES:
case GA_NODE_ZERO:
6026 case GA_NODE_RESHAPE:
case GA_NODE_CROSS_PRODUCT:
6027 case GA_NODE_SWAP_IND:
case GA_NODE_IND_MOVE_LAST:
6028 case GA_NODE_CONTRACT:
case GA_NODE_INTERPOLATE_FILTER:
6032 GMM_ASSERT1(!function_case,
6033 "No use of X is allowed in scalar functions");
6035 GA_DEBUG_ASSERT(pnode->tensor().size() == 1,
"dimensions mismatch");
6036 GMM_ASSERT1(pnode->nbc1 <= m.dim(),
6037 "Bad index for X in expression");
6038 pgai = std::make_shared<ga_instruction_X_component>
6039 (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
6041 if (pnode->tensor().size() != m.dim())
6042 pnode->init_vector_tensor(m.dim());
6043 pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
6045 rmi.instructions.push_back(std::move(pgai));
6048 case GA_NODE_ELT_SIZE:
6049 GMM_ASSERT1(!function_case,
6050 "No use of element_size is allowed in functions");
6051 if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
6052 pgai = std::make_shared<ga_instruction_element_size>
6053 (pnode->tensor(), gis.elt_size);
6054 gis.need_elt_size =
true;
6055 rmi.instructions.push_back(std::move(pgai));
6059 GMM_ASSERT1(!function_case,
6060 "No use of element_K is allowed in functions");
6061 pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
6063 rmi.instructions.push_back(std::move(pgai));
6067 GMM_ASSERT1(!function_case,
6068 "No use of element_B is allowed in functions");
6069 pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
6071 rmi.instructions.push_back(std::move(pgai));
6074 case GA_NODE_NORMAL:
6076 GMM_ASSERT1(!function_case,
6077 "No use of Normal is allowed in functions");
6078 if (pnode->tensor().size() != m.dim())
6079 pnode->init_vector_tensor(m.dim());
6080 const mesh_im_level_set *mimls
6081 =
dynamic_cast<const mesh_im_level_set *
>(rmi.im);
6082 if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
6084 pgai = std::make_shared<ga_instruction_level_set_normal_vector>
6085 (pnode->tensor(), mimls, gis.ctx);
6086 rmi.instructions.push_back(std::move(pgai));
6088 pgai = std::make_shared<ga_instruction_copy_Normal>
6089 (pnode->tensor(), gis.Normal);
6090 rmi.instructions.push_back(std::move(pgai));
6095 case GA_NODE_INTERPOLATE_X:
6096 case GA_NODE_INTERPOLATE_NORMAL:
6097 GMM_ASSERT1(!function_case,
6098 "No use of Interpolate is allowed in functions");
6099 if (pnode->tensor().size() != m.dim())
6100 pnode->init_vector_tensor(m.dim());
6101 if (pnode->node_type == GA_NODE_INTERPOLATE_X)
6102 pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
6104 rmi.interpolate_infos[pnode->interpolate_name].pt_y,
6105 rmi.interpolate_infos[pnode->interpolate_name]);
6106 else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
6107 pgai = std::make_shared<ga_instruction_copy_Normal>
6109 rmi.interpolate_infos[pnode->interpolate_name].Normal);
6110 rmi.instructions.push_back(std::move(pgai));
6113 case GA_NODE_INTERPOLATE_ELT_K:
6114 case GA_NODE_INTERPOLATE_ELT_B:
6115 GMM_ASSERT1(!function_case,
6116 "No use of Interpolate is allowed in functions");
6117 if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
6118 pgai = std::make_shared<ga_instruction_element_K>
6120 rmi.interpolate_infos[pnode->interpolate_name].ctx);
6121 else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
6122 pgai = std::make_shared<ga_instruction_element_B>
6124 rmi.interpolate_infos[pnode->interpolate_name].ctx);
6125 rmi.instructions.push_back(std::move(pgai));
6128 case GA_NODE_SECONDARY_DOMAIN_X:
6129 case GA_NODE_SECONDARY_DOMAIN_NORMAL:
6131 GMM_ASSERT1(!function_case,
6132 "No use of Secondary_domain is allowed in functions");
6133 auto psd = workspace.secondary_domain(pnode->interpolate_name);
6134 size_type sddim = psd->mim().linked_mesh().dim();
6135 if (pnode->tensor().size() != sddim)
6136 pnode->init_vector_tensor(sddim);
6137 if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
6138 pgai = std::make_shared<ga_instruction_X>
6139 (pnode->tensor(), rmi.secondary_domain_infos.ctx);
6140 else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
6141 pgai = std::make_shared<ga_instruction_copy_Normal>
6142 (pnode->tensor(), rmi.secondary_domain_infos.Normal);
6143 rmi.instructions.push_back(std::move(pgai));
6147 case GA_NODE_VAL:
case GA_NODE_GRAD:
6148 case GA_NODE_HESS:
case GA_NODE_DIVERG:
6149 case GA_NODE_ELEMENTARY_VAL:
case GA_NODE_ELEMENTARY_GRAD:
6150 case GA_NODE_ELEMENTARY_HESS:
case GA_NODE_ELEMENTARY_DIVERG:
6151 case GA_NODE_XFEM_PLUS_VAL:
case GA_NODE_XFEM_PLUS_GRAD:
6152 case GA_NODE_XFEM_PLUS_HESS:
case GA_NODE_XFEM_PLUS_DIVERG:
6153 case GA_NODE_XFEM_MINUS_VAL:
case GA_NODE_XFEM_MINUS_GRAD:
6154 case GA_NODE_XFEM_MINUS_HESS:
case GA_NODE_XFEM_MINUS_DIVERG:
6156 bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
6157 pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
6158 pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
6159 pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
6160 if (function_case) {
6161 GMM_ASSERT1(!is_elementary,
6162 "No elementary transformation is allowed in functions");
6163 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
6164 pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
6165 pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
6166 pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
6167 "Xfem_plus not allowed in functions");
6168 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
6169 pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
6170 pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
6171 pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
6172 "Xfem_plus not allowed in functions");
6173 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6174 const im_data *imd = workspace.associated_im_data(pnode->name);
6175 GMM_ASSERT1(!mf,
"No fem expression is allowed in "
6176 "function expression");
6177 GMM_ASSERT1(!imd,
"No integration method data is allowed in "
6178 "function expression");
6179 if (gmm::vect_size(workspace.value(pnode->name)) == 1)
6180 pgai = std::make_shared<ga_instruction_copy_scalar>
6181 (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
6183 pgai = std::make_shared<ga_instruction_copy_vect>
6184 (pnode->tensor().as_vector(), workspace.value(pnode->name));
6185 rmi.instructions.push_back(std::move(pgai));
6187 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6188 const im_data *imd = workspace.associated_im_data(pnode->name);
6190 if (is_elementary) {
6191 mf = workspace.associated_mf(pnode->elementary_target);
6192 GMM_ASSERT1(mf && mfo,
6193 "Wrong context for elementary transformation");
6194 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6195 "The finite element of variable " << pnode->name
6196 <<
" has to be defined on the same mesh as the "
6197 <<
"integration method or interpolation used");
6201 GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
6202 "Only values can be extracted on im_data (no " <<
6203 "gradient, Hessian, xfem or elementary tranformation" <<
6205 pgai = std::make_shared<ga_instruction_extract_local_im_data>
6206 (pnode->tensor(), *imd, workspace.value(pnode->name),
6207 gis.pai, gis.ctx, workspace.qdim(pnode->name));
6208 rmi.instructions.push_back(std::move(pgai));
6210 GMM_ASSERT1(mf,
"Internal error");
6212 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6213 "The finite element of variable " <<
6214 (is_elementary ? pnode->elementary_target : pnode->name)
6215 <<
" has to be defined on the same mesh as the "
6216 <<
"integration method or interpolation used");
6219 if (rmi.local_dofs.count(pnode->name) == 0) {
6220 rmi.local_dofs[pnode->name] = base_vector(1);
6221 extend_variable_in_gis(workspace, pnode->name, gis);
6224 if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
6226 pgai = std::make_shared<ga_instruction_slice_local_dofs>
6227 (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
6228 rmi.local_dofs[pnode->name],
6229 workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
6230 rmi.elt_instructions.push_back(std::move(pgai));
6234 if (mf->is_uniform()) {
6235 if (rmi.pfps.count(mf) == 0) {
6237 pgai = std::make_shared<ga_instruction_update_pfp>
6238 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6239 rmi.begin_instructions.push_back(std::move(pgai));
6241 }
else if (rmi.pfps.count(mf) == 0 ||
6242 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6243 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6245 pgai = std::make_shared<ga_instruction_update_pfp>
6246 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6247 rmi.instructions.push_back(std::move(pgai));
6251 pgai = pga_instruction();
6252 switch (pnode->node_type) {
6253 case GA_NODE_VAL:
case GA_NODE_ELEMENTARY_VAL:
6254 if (rmi.base.count(mf) == 0 ||
6255 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6256 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6257 pgai = std::make_shared<ga_instruction_val_base>
6258 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6261 case GA_NODE_XFEM_PLUS_VAL:
6262 if (rmi.xfem_plus_base.count(mf) == 0 ||
6263 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6265 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6266 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6267 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6270 case GA_NODE_XFEM_MINUS_VAL:
6271 if (rmi.xfem_minus_base.count(mf) == 0 ||
6272 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6274 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6275 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6276 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6279 case GA_NODE_GRAD:
case GA_NODE_DIVERG:
6280 case GA_NODE_ELEMENTARY_GRAD:
case GA_NODE_ELEMENTARY_DIVERG:
6281 if (rmi.grad.count(mf) == 0 ||
6282 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6283 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6284 pgai = std::make_shared<ga_instruction_grad_base>
6285 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6288 case GA_NODE_XFEM_PLUS_GRAD:
case GA_NODE_XFEM_PLUS_DIVERG:
6289 if (rmi.xfem_plus_grad.count(mf) == 0 ||
6290 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6292 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6293 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6294 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6297 case GA_NODE_XFEM_MINUS_GRAD:
case GA_NODE_XFEM_MINUS_DIVERG:
6298 if (rmi.xfem_minus_grad.count(mf) == 0 ||
6299 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6301 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6302 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6303 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6306 case GA_NODE_HESS:
case GA_NODE_ELEMENTARY_HESS:
6307 if (rmi.hess.count(mf) == 0 ||
6308 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6309 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6310 pgai = std::make_shared<ga_instruction_hess_base>
6311 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6314 case GA_NODE_XFEM_PLUS_HESS:
6315 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6316 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6318 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6319 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6320 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6323 case GA_NODE_XFEM_MINUS_HESS:
6324 if (rmi.xfem_minus_hess.count(mf) == 0 ||
6325 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6327 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6328 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6329 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6333 default : GMM_ASSERT1(
false,
"Internal error");
6335 if (pgai) rmi.instructions.push_back(std::move(pgai));
6338 switch (pnode->node_type) {
6340 pgai = std::make_shared<ga_instruction_val>
6341 (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
6342 workspace.qdim(pnode->name));
6345 pgai = std::make_shared<ga_instruction_grad>
6346 (pnode->tensor(), rmi.grad[mf],
6347 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6350 pgai = std::make_shared<ga_instruction_hess>
6351 (pnode->tensor(), rmi.hess[mf],
6352 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6354 case GA_NODE_DIVERG:
6355 pgai = std::make_shared<ga_instruction_diverg>
6356 (pnode->tensor(), rmi.grad[mf],
6357 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6359 case GA_NODE_XFEM_PLUS_VAL:
6360 pgai = std::make_shared<ga_instruction_val>
6361 (pnode->tensor(), rmi.xfem_plus_base[mf],
6362 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6364 case GA_NODE_XFEM_PLUS_GRAD:
6365 pgai = std::make_shared<ga_instruction_grad>
6366 (pnode->tensor(), rmi.xfem_plus_grad[mf],
6367 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6369 case GA_NODE_XFEM_PLUS_HESS:
6370 pgai = std::make_shared<ga_instruction_hess>
6371 (pnode->tensor(), rmi.xfem_plus_hess[mf],
6372 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6374 case GA_NODE_XFEM_PLUS_DIVERG:
6375 pgai = std::make_shared<ga_instruction_diverg>
6376 (pnode->tensor(), rmi.xfem_plus_grad[mf],
6377 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6379 case GA_NODE_XFEM_MINUS_VAL:
6380 pgai = std::make_shared<ga_instruction_val>
6381 (pnode->tensor(), rmi.xfem_minus_base[mf],
6382 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6384 case GA_NODE_XFEM_MINUS_GRAD:
6385 pgai = std::make_shared<ga_instruction_grad>
6386 (pnode->tensor(), rmi.xfem_minus_grad[mf],
6387 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6389 case GA_NODE_XFEM_MINUS_HESS:
6390 pgai = std::make_shared<ga_instruction_hess>
6391 (pnode->tensor(), rmi.xfem_minus_hess[mf],
6392 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6394 case GA_NODE_XFEM_MINUS_DIVERG:
6395 pgai = std::make_shared<ga_instruction_diverg>
6396 (pnode->tensor(), rmi.xfem_minus_grad[mf],
6397 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6399 case GA_NODE_ELEMENTARY_VAL:
6401 ga_instruction_set::elementary_trans_info &eti
6402 = rmi.elementary_trans_infos
6403 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6405 std::make_shared<ga_instruction_elementary_trans_val>
6406 (pnode->tensor(), rmi.base[mf],
6407 rmi.local_dofs[pnode->name],
6408 workspace.qdim(pnode->elementary_target),
6409 workspace.elementary_transformation(pnode->elementary_name),
6410 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6413 case GA_NODE_ELEMENTARY_GRAD:
6415 ga_instruction_set::elementary_trans_info &eti
6416 = rmi.elementary_trans_infos
6417 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6419 std::make_shared<ga_instruction_elementary_trans_grad>
6420 (pnode->tensor(), rmi.grad[mf],
6421 rmi.local_dofs[pnode->name],
6422 workspace.qdim(pnode->elementary_target),
6423 workspace.elementary_transformation(pnode->elementary_name),
6424 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6427 case GA_NODE_ELEMENTARY_HESS:
6429 ga_instruction_set::elementary_trans_info &eti
6430 = rmi.elementary_trans_infos
6431 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6433 std::make_shared<ga_instruction_elementary_trans_hess>
6434 (pnode->tensor(), rmi.hess[mf],
6435 rmi.local_dofs[pnode->name],
6436 workspace.qdim(pnode->elementary_target),
6437 workspace.elementary_transformation(pnode->elementary_name),
6438 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6441 case GA_NODE_ELEMENTARY_DIVERG:
6443 ga_instruction_set::elementary_trans_info &eti
6444 = rmi.elementary_trans_infos
6445 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6447 std::make_shared<ga_instruction_elementary_trans_diverg>
6448 (pnode->tensor(), rmi.grad[mf],
6449 rmi.local_dofs[pnode->name],
6450 workspace.qdim(pnode->elementary_target),
6451 workspace.elementary_transformation(pnode->elementary_name),
6452 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6457 rmi.instructions.push_back(std::move(pgai));
6463 case GA_NODE_SECONDARY_DOMAIN_VAL:
case GA_NODE_SECONDARY_DOMAIN_GRAD:
6464 case GA_NODE_SECONDARY_DOMAIN_HESS:
case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6466 GMM_ASSERT1(!function_case,
"internal error");
6467 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6468 const im_data *imd = workspace.associated_im_data(pnode->name);
6469 const std::string &intn = pnode->interpolate_name;
6470 auto &sdi = rmi.secondary_domain_infos;
6472 fem_interpolation_context *pctx = &(sdi.ctx);
6473 papprox_integration pai = sdi.pai;
6474 psecondary_domain psd = workspace.secondary_domain(intn);
6477 pgai = std::make_shared<ga_instruction_extract_local_im_data>
6478 (pnode->tensor(), *imd, workspace.value(pnode->name),
6479 pai, *pctx, workspace.qdim(pnode->name));
6480 rmi.instructions.push_back(std::move(pgai));
6482 GMM_ASSERT1(mf,
"Internal error");
6483 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6484 "The finite element of variable " << pnode->name <<
6485 " has to be defined on the same mesh as the "
6486 "integration method or interpolation used on the "
6487 "secondary domain");
6490 if (sdi.local_dofs.count(pnode->name) == 0) {
6491 sdi.local_dofs[pnode->name] = base_vector(1);
6492 extend_variable_in_gis(workspace, pnode->name, gis);
6494 if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
6496 pgai = std::make_shared<ga_instruction_slice_local_dofs>
6497 (*mf, *(gis.extended_vars[pnode->name]), *pctx,
6498 sdi.local_dofs[pnode->name],
6499 workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
6500 rmi.elt_instructions.push_back(std::move(pgai));
6504 if (mf->is_uniform()) {
6505 if (sdi.pfps.count(mf) == 0) {
6507 pgai = std::make_shared<ga_instruction_update_pfp>
6508 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6509 rmi.begin_instructions.push_back(std::move(pgai));
6511 }
else if (sdi.pfps.count(mf) == 0 ||
6512 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6513 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6515 pgai = std::make_shared<ga_instruction_update_pfp>
6516 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6517 rmi.instructions.push_back(std::move(pgai));
6521 pgai = pga_instruction();
6522 switch (pnode->node_type) {
6523 case GA_NODE_SECONDARY_DOMAIN_VAL:
6524 if (sdi.base.count(mf) == 0 ||
6525 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6526 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6527 pgai = std::make_shared<ga_instruction_val_base>
6528 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6531 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6532 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6533 if (sdi.grad.count(mf) == 0 ||
6534 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6535 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6536 pgai = std::make_shared<ga_instruction_grad_base>
6537 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6540 case GA_NODE_SECONDARY_DOMAIN_HESS:
6541 if (sdi.hess.count(mf) == 0 ||
6542 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6543 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6544 pgai = std::make_shared<ga_instruction_hess_base>
6545 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6548 default : GMM_ASSERT1(
false,
"Internal error");
6550 if (pgai) rmi.instructions.push_back(std::move(pgai));
6553 switch (pnode->node_type) {
6554 case GA_NODE_SECONDARY_DOMAIN_VAL:
6555 pgai = std::make_shared<ga_instruction_val>
6556 (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6557 workspace.qdim(pnode->name));
6559 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6560 pgai = std::make_shared<ga_instruction_grad>
6561 (pnode->tensor(), sdi.grad[mf],
6562 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6564 case GA_NODE_SECONDARY_DOMAIN_HESS:
6565 pgai = std::make_shared<ga_instruction_hess>
6566 (pnode->tensor(), sdi.hess[mf],
6567 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6569 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6570 pgai = std::make_shared<ga_instruction_diverg>
6571 (pnode->tensor(), sdi.grad[mf],
6572 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6576 rmi.instructions.push_back(std::move(pgai));
6581 case GA_NODE_INTERPOLATE_VAL:
case GA_NODE_INTERPOLATE_GRAD:
6582 case GA_NODE_INTERPOLATE_HESS:
case GA_NODE_INTERPOLATE_DIVERG:
6584 extend_variable_in_gis(workspace, pnode->name, gis);
6586 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6587 const std::string &intn = pnode->interpolate_name;
6588 const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6589 fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6590 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6591 if (workspace.variable_group_exists(pnode->name)) {
6592 ga_instruction_set::variable_group_info &vgi =
6593 rmi.interpolate_infos[intn].groups_info[pnode->name];
6594 mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6597 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6599 pgai = std::make_shared<ga_instruction_interpolate_val>
6600 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6601 workspace.qdim(pnode->name),
6602 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6603 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6605 pgai = std::make_shared<ga_instruction_interpolate_grad>
6606 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6607 workspace.qdim(pnode->name),
6608 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6609 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6611 pgai = std::make_shared<ga_instruction_interpolate_hess>
6612 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6613 workspace.qdim(pnode->name),
6614 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6616 pgai = std::make_shared<ga_instruction_interpolate_diverg>
6617 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6618 workspace.qdim(pnode->name),
6619 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6621 rmi.instructions.push_back(std::move(pgai));
6625 case GA_NODE_INTERPOLATE_DERIVATIVE:
6626 GMM_ASSERT1(!function_case,
6627 "No use of Interpolate is allowed in functions");
6628 pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6630 rmi.interpolate_infos[pnode->interpolate_name_der]
6631 .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6632 rmi.instructions.push_back(std::move(pgai));
6635 case GA_NODE_VAL_TEST:
case GA_NODE_GRAD_TEST:
6636 case GA_NODE_HESS_TEST:
case GA_NODE_DIVERG_TEST:
6637 case GA_NODE_ELEMENTARY_VAL_TEST:
case GA_NODE_ELEMENTARY_GRAD_TEST:
6638 case GA_NODE_ELEMENTARY_HESS_TEST:
case GA_NODE_ELEMENTARY_DIVERG_TEST:
6639 case GA_NODE_XFEM_PLUS_VAL_TEST:
case GA_NODE_XFEM_PLUS_GRAD_TEST:
6640 case GA_NODE_XFEM_PLUS_HESS_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6641 case GA_NODE_XFEM_MINUS_VAL_TEST:
case GA_NODE_XFEM_MINUS_GRAD_TEST:
6642 case GA_NODE_XFEM_MINUS_HESS_TEST:
case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6646 bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6647 pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6648 pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6649 pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6650 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6651 if (is_elementary) {
6652 mf = workspace.associated_mf(pnode->elementary_target);
6653 GMM_ASSERT1(mf && mfo,
6654 "Wrong context for elementary transformation");
6655 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6656 "The finite element of variable " << pnode->name
6657 <<
" has to be defined on the same mesh as the "
6658 <<
"integration method or interpolation used");
6662 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6663 "The finite element of variable " <<
6664 (is_elementary ? pnode->elementary_target : pnode->name)
6665 <<
" and the applied integration method have to be"
6666 <<
" defined on the same mesh");
6670 if (rmi.pfps.count(mf) == 0) {
6672 pgai = std::make_shared<ga_instruction_update_pfp>
6673 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6674 rmi.begin_instructions.push_back(std::move(pgai));
6676 }
else if (rmi.pfps.count(mf) == 0 ||
6677 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6678 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6680 pgai = std::make_shared<ga_instruction_update_pfp>
6681 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6682 rmi.instructions.push_back(std::move(pgai));
6686 pgai = pga_instruction();
6687 switch (pnode->node_type) {
6688 case GA_NODE_VAL_TEST:
case GA_NODE_ELEMENTARY_VAL_TEST:
6689 if (rmi.base.count(mf) == 0 ||
6690 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6691 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6692 pgai = std::make_shared<ga_instruction_val_base>
6693 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6696 case GA_NODE_XFEM_PLUS_VAL_TEST:
6697 if (rmi.xfem_plus_base.count(mf) == 0 ||
6698 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6700 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6701 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6702 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6705 case GA_NODE_XFEM_MINUS_VAL_TEST:
6706 if (rmi.xfem_minus_base.count(mf) == 0 ||
6707 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6709 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6710 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6711 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6714 case GA_NODE_GRAD_TEST:
case GA_NODE_DIVERG_TEST:
6715 case GA_NODE_ELEMENTARY_GRAD_TEST:
6716 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6717 if (rmi.grad.count(mf) == 0 ||
6718 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6719 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6720 pgai = std::make_shared<ga_instruction_grad_base>
6721 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6724 case GA_NODE_XFEM_PLUS_GRAD_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6725 if (rmi.xfem_plus_grad.count(mf) == 0 ||
6726 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6728 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6729 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6730 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6733 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6734 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6735 if (rmi.xfem_minus_grad.count(mf) == 0 ||
6736 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6738 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6739 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6740 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6743 case GA_NODE_HESS_TEST:
case GA_NODE_ELEMENTARY_HESS_TEST:
6744 if (rmi.hess.count(mf) == 0 ||
6745 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6746 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6747 pgai = std::make_shared<ga_instruction_hess_base>
6748 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6751 case GA_NODE_XFEM_PLUS_HESS_TEST:
6752 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6753 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6755 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6756 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6757 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6760 case GA_NODE_XFEM_MINUS_HESS_TEST:
6761 if (rmi.xfem_minus_hess.count(mf) == 0 ||
6762 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6764 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6765 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6766 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6770 default : GMM_ASSERT1(
false,
"Internal error");
6772 if (pgai) rmi.instructions.push_back(std::move(pgai));
6775 switch(pnode->node_type) {
6776 case GA_NODE_VAL_TEST:
6778 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6779 pnode->t.set_sparsity(1, mf->get_qdim());
6780 tensor_to_clear =
true;
6781 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6782 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6784 pgai = std::make_shared<ga_instruction_copy_val_base>
6785 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6788 case GA_NODE_GRAD_TEST:
6790 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6791 pnode->t.set_sparsity(2, mf->get_qdim());
6792 tensor_to_clear =
true;
6793 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6794 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6796 pgai = std::make_shared<ga_instruction_copy_grad_base>
6797 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6800 case GA_NODE_HESS_TEST:
6802 pgai = std::make_shared<ga_instruction_copy_hess_base>
6803 (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6804 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6805 pnode->t.set_sparsity(3, mf->get_qdim());
6807 case GA_NODE_DIVERG_TEST:
6809 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6810 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6812 case GA_NODE_XFEM_PLUS_VAL_TEST:
6814 pgai = std::make_shared<ga_instruction_copy_val_base>
6815 (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6816 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6817 pnode->t.set_sparsity(1, mf->get_qdim());
6819 case GA_NODE_XFEM_PLUS_GRAD_TEST:
6821 pgai = std::make_shared<ga_instruction_copy_grad_base>
6822 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6823 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6824 pnode->t.set_sparsity(2, mf->get_qdim());
6826 case GA_NODE_XFEM_PLUS_HESS_TEST:
6828 pgai = std::make_shared<ga_instruction_copy_hess_base>
6829 (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6830 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6831 pnode->t.set_sparsity(3, mf->get_qdim());
6833 case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6835 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6836 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6838 case GA_NODE_XFEM_MINUS_VAL_TEST:
6840 pgai = std::make_shared<ga_instruction_copy_val_base>
6841 (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6842 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6843 pnode->t.set_sparsity(1, mf->get_qdim());
6845 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6847 pgai = std::make_shared<ga_instruction_copy_grad_base>
6848 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6849 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6850 pnode->t.set_sparsity(2, mf->get_qdim());
6852 case GA_NODE_XFEM_MINUS_HESS_TEST:
6854 pgai = std::make_shared<ga_instruction_copy_hess_base>
6855 (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6856 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6857 pnode->t.set_sparsity(3, mf->get_qdim());
6859 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6861 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6862 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6864 case GA_NODE_ELEMENTARY_VAL_TEST:
6866 ga_instruction_set::elementary_trans_info &eti
6867 = rmi.elementary_trans_infos
6868 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6870 std::make_shared<ga_instruction_elementary_trans_val_base>
6871 (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6872 workspace.elementary_transformation(pnode->elementary_name),
6873 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6876 case GA_NODE_ELEMENTARY_GRAD_TEST:
6878 ga_instruction_set::elementary_trans_info &eti
6879 = rmi.elementary_trans_infos
6880 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6882 std::make_shared<ga_instruction_elementary_trans_grad_base>
6883 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6884 workspace.elementary_transformation(pnode->elementary_name),
6885 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6888 case GA_NODE_ELEMENTARY_HESS_TEST:
6890 ga_instruction_set::elementary_trans_info &eti
6891 = rmi.elementary_trans_infos
6892 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6894 std::make_shared<ga_instruction_elementary_trans_hess_base>
6895 (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6896 workspace.elementary_transformation(pnode->elementary_name),
6897 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6900 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6902 ga_instruction_set::elementary_trans_info &eti
6903 = rmi.elementary_trans_infos
6904 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6906 std::make_shared<ga_instruction_elementary_trans_diverg_base>
6907 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6908 workspace.elementary_transformation(pnode->elementary_name),
6909 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6914 if (pgai) rmi.instructions.push_back(std::move(pgai));
6916 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6920 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6921 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6922 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6923 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6925 GMM_ASSERT1(!function_case,
"internal error");
6926 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6927 const std::string &intn = pnode->interpolate_name;
6928 auto &sdi = rmi.secondary_domain_infos;
6930 fem_interpolation_context *pctx = &(sdi.ctx);
6931 papprox_integration pai = sdi.pai;
6932 psecondary_domain psd = workspace.secondary_domain(intn);
6934 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6935 "The finite element of variable " << pnode->name <<
6936 " and the applied integration method have to be"
6937 " defined on the same mesh for secondary domain");
6941 if (sdi.pfps.count(mf) == 0) {
6943 pgai = std::make_shared<ga_instruction_update_pfp>
6944 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6945 rmi.begin_instructions.push_back(std::move(pgai));
6947 }
else if (sdi.pfps.count(mf) == 0 ||
6948 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6949 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6951 pgai = std::make_shared<ga_instruction_update_pfp>
6952 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6953 rmi.instructions.push_back(std::move(pgai));
6957 pgai = pga_instruction();
6958 switch (pnode->node_type) {
6959 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6960 if (sdi.base.count(mf) == 0 ||
6961 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6962 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6963 pgai = std::make_shared<ga_instruction_val_base>
6964 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6967 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6968 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6969 if (sdi.grad.count(mf) == 0 ||
6970 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6971 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6972 pgai = std::make_shared<ga_instruction_grad_base>
6973 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6976 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6977 if (sdi.hess.count(mf) == 0 ||
6978 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6979 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6980 pgai = std::make_shared<ga_instruction_hess_base>
6981 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6984 default : GMM_ASSERT1(
false,
"Internal error");
6986 if (pgai) rmi.instructions.push_back(std::move(pgai));
6989 switch(pnode->node_type) {
6990 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6992 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6993 pnode->t.set_sparsity(1, mf->get_qdim());
6994 tensor_to_clear =
true;
6995 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6996 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6998 pgai = std::make_shared<ga_instruction_copy_val_base>
6999 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
7002 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
7004 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
7005 pnode->t.set_sparsity(2, mf->get_qdim());
7006 tensor_to_clear =
true;
7007 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
7008 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7010 pgai = std::make_shared<ga_instruction_copy_grad_base>
7011 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7014 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
7016 pgai = std::make_shared<ga_instruction_copy_hess_base>
7017 (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
7018 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
7019 pnode->t.set_sparsity(3, mf->get_qdim());
7021 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
7023 pgai = std::make_shared<ga_instruction_copy_diverg_base>
7024 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7028 if (pgai) rmi.instructions.push_back(std::move(pgai));
7030 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7034 case GA_NODE_INTERPOLATE_VAL_TEST:
case GA_NODE_INTERPOLATE_GRAD_TEST:
7035 case GA_NODE_INTERPOLATE_HESS_TEST:
case GA_NODE_INTERPOLATE_DIVERG_TEST:
7037 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
7038 const std::string &intn = pnode->interpolate_name;
7039 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
7040 if (workspace.variable_group_exists(pnode->name)) {
7041 ga_instruction_set::variable_group_info &vgi =
7042 rmi.interpolate_infos[intn].groups_info[pnode->name];
7043 mfg = &(vgi.mf); mfn = 0;
7046 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
7048 pgai = std::make_shared<ga_instruction_interpolate_val_base>
7049 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7050 workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
7052 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
7054 pgai = std::make_shared<ga_instruction_interpolate_grad_base>
7055 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7056 workspace.qdim(pnode->name),
7057 rmi.interpolate_infos[intn], gis.fp_pool);
7058 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
7060 pgai = std::make_shared<ga_instruction_interpolate_hess_base>
7061 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7062 workspace.qdim(pnode->name),
7063 rmi.interpolate_infos[intn], gis.fp_pool);
7066 pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
7067 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7068 workspace.qdim(pnode->name),
7069 rmi.interpolate_infos[intn], gis.fp_pool);
7071 rmi.instructions.push_back(std::move(pgai));
7072 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7077 switch(pnode->op_type) {
7080 if (pnode->tensor().size() == 1) {
7081 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7082 "Internal error: child0 not scalar");
7083 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7084 "Internal error: child1 not scalar");
7085 pgai = std::make_shared<ga_instruction_scalar_add>
7086 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7088 pgai = std::make_shared<ga_instruction_add>
7089 (pnode->tensor(), child0->tensor(), child1->tensor());
7091 if (child0->t.sparsity() == child1->t.sparsity()
7092 && child0->t.qdim() == child1->t.qdim())
7093 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7094 rmi.instructions.push_back(std::move(pgai));
7098 if (pnode->tensor().size() == 1) {
7099 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7100 "Internal error: child0 not scalar");
7101 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7102 "Internal error: child1 not scalar");
7103 pgai = std::make_shared<ga_instruction_scalar_sub>
7104 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7106 pgai = std::make_shared<ga_instruction_sub>
7107 (pnode->tensor(), child0->tensor(), child1->tensor());
7109 if (child0->t.sparsity() == child1->t.sparsity()
7110 && child0->t.qdim() == child1->t.qdim())
7111 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7112 rmi.instructions.push_back(std::move(pgai));
7115 case GA_UNARY_MINUS:
7116 if (pnode->tensor().size() == 1) {
7117 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
"Internal error");
7118 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7119 (pnode->tensor()[0], child0->tensor()[0], minus);
7121 pgai = std::make_shared<ga_instruction_scalar_mult>
7122 (pnode->tensor(), child0->tensor(), minus);
7124 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7125 rmi.instructions.push_back(std::move(pgai));
7129 case GA_DOT:
case GA_COLON:
case GA_MULT:
7131 size_type tps0 = child0->tensor_proper_size();
7132 size_type tps1 = child1->tensor_proper_size();
7133 size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
7136 pgai = pga_instruction();
7137 if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
7138 (pnode->op_type == GA_COLON && dim1 <= 2) ||
7139 (pnode->op_type == GA_MULT && dim0 == 4) ||
7140 (pnode->op_type == GA_MULT && dim1 <= 1) ||
7141 child0->tensor().size() == 1 || tps1 == 1) {
7143 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7144 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7145 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7147 else if (child0->tensor().size() == 1) {
7148 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7149 pgai = std::make_shared<ga_instruction_scalar_mult>
7150 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7152 else if (child1->tensor().size() == 1) {
7153 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7154 pgai = std::make_shared<ga_instruction_scalar_mult>
7155 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7157 else if (pnode->test_function_type < 3) {
7160 pgai = ga_uniform_instruction_simple_tmult
7161 (pnode->tensor(), child0->tensor(), child1->tensor());
7163 pgai = std::make_shared<ga_instruction_simple_tmult>
7164 (pnode->tensor(), child0->tensor(), child1->tensor());
7168 pgai = ga_uniform_instruction_simple_tmult
7169 (pnode->tensor(), child1->tensor(), child0->tensor());
7171 pgai = std::make_shared<ga_instruction_simple_tmult>
7172 (pnode->tensor(), child1->tensor(), child0->tensor());
7173 }
else if (is_uniform)
7174 pgai = ga_uniform_instruction_contraction_switch
7175 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7177 pgai = ga_instruction_contraction_switch
7178 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7181 if (child1->test_function_type == 1 ||
7182 child1->test_function_type == 3) {
7183 if (child1->test_function_type == 3 ||
7184 child1->tensor_proper_size() <= s2) {
7187 pgai = ga_uniform_instruction_simple_tmult
7188 (pnode->tensor(), child1->tensor(), child0->tensor());
7190 pgai = std::make_shared<ga_instruction_simple_tmult>
7191 (pnode->tensor(), child1->tensor(), child0->tensor());
7192 }
else if (is_uniform)
7193 pgai = ga_uniform_instruction_contraction_switch
7194 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7196 pgai = ga_instruction_contraction_switch
7197 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7199 pgai = std::make_shared<ga_instruction_spec_contraction>
7200 (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
7201 }
else if (child1->test_function_type == 0 ||
7202 (child0->tensor_proper_size() == s2 &&
7203 child1->tensor_proper_size() == s2)) {
7206 pgai = ga_uniform_instruction_simple_tmult
7207 (pnode->tensor(), child0->tensor(), child1->tensor());
7209 pgai = std::make_shared<ga_instruction_simple_tmult>
7210 (pnode->tensor(), child0->tensor(), child1->tensor());
7213 pgai = ga_uniform_instruction_contraction_switch
7214 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7216 pgai = ga_instruction_contraction_switch
7217 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7220 if (child0->tensor_proper_size() == s2)
7221 pgai = ga_uniform_instruction_contraction_switch
7222 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7223 else if (child1->tensor_proper_size() == s2)
7224 pgai = std::make_shared<ga_instruction_spec_contraction>
7225 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7227 pgai = std::make_shared<ga_instruction_spec2_contraction>
7228 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7233 if (pnode->test_function_type < 3) {
7236 pgai = ga_uniform_instruction_simple_tmult
7237 (pnode->tensor(), child0->tensor(), child1->tensor());
7239 pgai = std::make_shared<ga_instruction_simple_tmult>
7240 (pnode->tensor(), child0->tensor(), child1->tensor());
7242 if (child1->test_function_type == 0)
7243 pgai = std::make_shared<ga_instruction_matrix_mult>
7244 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7246 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7247 (pnode->tensor(), child0->tensor(), child1->tensor(),
7248 s2, tps0/s2, tps1/s2);
7251 if (child0->tensor_proper_size() == 1) {
7252 if (child0->test_function_type == 0 ||
7253 child0->test_function_type == 1) {
7255 pgai = ga_uniform_instruction_simple_tmult
7256 (pnode->tensor(), child0->tensor(), child1->tensor());
7258 pgai = std::make_shared<ga_instruction_simple_tmult>
7259 (pnode->tensor(), child0->tensor(), child1->tensor());
7261 pgai = std::make_shared<ga_instruction_spec_tmult>
7262 (pnode->tensor(), child1->tensor(), child0->tensor(),
7265 if (child1->test_function_type == 0)
7266 pgai = std::make_shared<ga_instruction_matrix_mult>
7267 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7268 else if (child1->test_function_type == 2)
7269 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7270 (pnode->tensor(), child0->tensor(), child1->tensor(),
7271 s2, tps0/s2, tps1/s2);
7273 pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
7274 (pnode->tensor(), child0->tensor(), child1->tensor(),
7275 s2, tps0/s2, tps1/s2);
7279 rmi.instructions.push_back(std::move(pgai));
7284 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7285 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7286 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7287 }
else if (child1->tensor().size() == 1) {
7288 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7289 pgai = std::make_shared<ga_instruction_scalar_div>
7290 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7291 }
else GMM_ASSERT1(
false,
"Internal error");
7292 rmi.instructions.push_back(std::move(pgai));
7296 pnode->t.set_to_copy(child0->t);
7297 pgai = std::make_shared<ga_instruction_print_tensor>
7298 (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
7299 rmi.instructions.push_back(std::move(pgai));
7303 if (pnode->tensor_proper_size() > 1) {
7304 size_type n1 = child0->tensor_proper_size(0);
7305 size_type n2 = (child0->tensor_order() > 1) ?
7306 child0->tensor_proper_size(1) : 1;
7308 for (
size_type i = 2; i < child0->tensor_order(); ++i)
7309 nn *= child0->tensor_proper_size(i);
7310 if (child0->nb_test_functions() == 0)
7311 pgai = std::make_shared<ga_instruction_transpose_no_test>
7312 (pnode->tensor(), child0->tensor(), n1, n2, nn);
7314 pgai = std::make_shared<ga_instruction_transpose>
7315 (pnode->tensor(), child0->tensor(), n1, n2, nn);
7316 rmi.instructions.push_back(std::move(pgai));
7318 pnode->t.set_to_copy(child0->t);
7323 if (pnode->tensor_proper_size() != 1) {
7324 pgai = std::make_shared<ga_instruction_sym>
7325 (pnode->tensor(), child0->tensor());
7326 rmi.instructions.push_back(std::move(pgai));
7328 pnode->t.set_to_copy(child0->t);
7334 pgai = std::make_shared<ga_instruction_skew>
7335 (pnode->tensor(), child0->tensor());
7336 rmi.instructions.push_back(std::move(pgai));
7342 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7344 pnode->t.set_to_copy(child0->t);
7346 pgai = std::make_shared<ga_instruction_trace>
7347 (pnode->tensor(), child0->tensor(), N);
7348 rmi.instructions.push_back(std::move(pgai));
7355 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7356 pgai = std::make_shared<ga_instruction_deviator>
7357 (pnode->tensor(), child0->tensor(), N);
7358 rmi.instructions.push_back(std::move(pgai));
7364 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7365 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7366 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7367 }
else if (child0->tensor().size() == 1) {
7368 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7369 pgai = std::make_shared<ga_instruction_scalar_mult>
7370 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7372 else if (child1->tensor().size() == 1) {
7373 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7374 pgai = std::make_shared<ga_instruction_scalar_mult>
7375 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7377 else if (child1->test_function_type == 0)
7378 pgai = std::make_shared<ga_instruction_dotmult>
7379 (pnode->tensor(), child0->tensor(), child1->tensor());
7380 else if (child0->test_function_type == 0)
7381 pgai = std::make_shared<ga_instruction_dotmult>
7382 (pnode->tensor(), child1->tensor(), child0->tensor());
7383 else if (child0->test_function_type == 1)
7384 pgai = std::make_shared<ga_instruction_dotmult_spec>
7385 (pnode->tensor(), child0->tensor(), child1->tensor());
7387 pgai = std::make_shared<ga_instruction_dotmult_spec>
7388 (pnode->tensor(), child1->tensor(), child0->tensor());
7390 rmi.instructions.push_back(std::move(pgai));
7395 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7396 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7397 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7398 }
else if (child1->tensor().size() == 1) {
7399 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7400 pgai = std::make_shared<ga_instruction_scalar_div>
7401 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7402 }
else if (child1->test_function_type == 0) {
7403 pgai = std::make_shared<ga_instruction_dotdiv>
7404 (pnode->tensor(), child0->tensor(), child1->tensor());
7405 }
else GMM_ASSERT1(
false,
"Internal error");
7406 rmi.instructions.push_back(std::move(pgai));
7411 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7412 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7413 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7414 }
else if (child0->tensor().size() == 1) {
7415 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7416 pgai = std::make_shared<ga_instruction_scalar_mult>
7417 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7419 else if (child1->tensor().size() == 1) {
7420 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7421 pgai = std::make_shared<ga_instruction_scalar_mult>
7422 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7424 else if (child1->test_function_type == 0) {
7426 pgai = ga_uniform_instruction_simple_tmult
7427 (pnode->tensor(), child0->tensor(), child1->tensor());
7429 pgai = std::make_shared<ga_instruction_simple_tmult>
7430 (pnode->tensor(), child0->tensor(), child1->tensor());
7431 }
else if (child1->tensor_proper_size() == 1)
7432 pgai = std::make_shared<ga_instruction_spec2_tmult>
7433 (pnode->tensor(), child0->tensor(), child1->tensor());
7435 pgai = std::make_shared<ga_instruction_spec_tmult>
7436 (pnode->tensor(), child0->tensor(), child1->tensor(),
7437 child0->tensor_proper_size(),
7438 child1->tensor_proper_size());
7440 rmi.instructions.push_back(std::move(pgai));
7443 default:GMM_ASSERT1(
false,
"Unexpected operation. Internal error.");
7447 case GA_NODE_C_MATRIX:
7449 if (pnode->test_function_type) {
7450 std::vector<const base_tensor *> components(pnode->children.size());
7451 for (
size_type i = 0; i < pnode->children.size(); ++i)
7452 components[i] = &(pnode->children[i]->tensor());
7453 pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
7454 (pnode->tensor(), components);
7456 std::vector<scalar_type *> components(pnode->children.size());
7457 for (
size_type i = 0; i < pnode->children.size(); ++i)
7458 components[i] = &(pnode->children[i]->tensor()[0]);
7459 pgai = std::make_shared<ga_instruction_simple_c_matrix>
7460 (pnode->tensor(), components);
7462 rmi.instructions.push_back(std::move(pgai));
7466 case GA_NODE_PARAMS:
7467 if (child0->node_type == GA_NODE_RESHAPE) {
7468 pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
7470 rmi.instructions.push_back(std::move(pgai));
7471 }
else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
7472 pga_tree_node child2 = pnode->children[2];
7473 if (child1->test_function_type==2 && child2->test_function_type==1)
7474 pgai = std::make_shared<ga_instruction_cross_product_tf>
7475 (pnode->tensor(), child2->tensor(), child1->tensor(),
true);
7476 else if (child1->test_function_type || child2->test_function_type)
7477 pgai = std::make_shared<ga_instruction_cross_product_tf>
7478 (pnode->tensor(), child1->tensor(), child2->tensor(),
false);
7480 pgai = std::make_shared<ga_instruction_cross_product>
7481 (pnode->tensor(), child1->tensor(), child2->tensor());
7482 rmi.instructions.push_back(std::move(pgai));
7483 }
else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
7485 ind =
size_type(round(pnode->children[2]->tensor()[0])-1);
7487 for (
size_type i = 0; i < child1->tensor_order(); ++i)
7488 if (i>ind) ii2 *= child1->tensor_proper_size(i);
7489 size_type nn = child1->tensor_proper_size(ind);
7490 pgai = std::make_shared<ga_instruction_index_move_last>
7491 (pnode->tensor(), child1->tensor(), nn, ii2);
7492 rmi.instructions.push_back(std::move(pgai));
7493 }
else if (child0->node_type == GA_NODE_SWAP_IND) {
7496 ind[i] =
size_type(round(pnode->children[i]->tensor()[0])-1);
7497 if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
7499 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7500 if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
7501 if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
7503 size_type nn1 = child1->tensor_proper_size(ind[2]);
7504 size_type nn2 = child1->tensor_proper_size(ind[3]);
7506 pgai = std::make_shared<ga_instruction_swap_indices>
7507 (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
7508 rmi.instructions.push_back(std::move(pgai));
7509 }
else if (child0->node_type == GA_NODE_CONTRACT) {
7510 std::vector<size_type> ind(2), indsize(2);
7511 pga_tree_node child2(0);
7512 if (pnode->children.size() == 4)
7513 { ind[0] = 2; ind[1] = 3; }
7514 else if (pnode->children.size() == 5)
7515 { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
7516 else if (pnode->children.size() == 7) {
7517 ind.resize(4); indsize.resize(4);
7518 ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
7519 child2 = pnode->children[4];
7522 for (
size_type i = 1; i < pnode->children.size(); ++i) {
7524 ind[kk] =
size_type(round(pnode->children[i]->tensor()[0])-1);
7525 indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
7530 if (pnode->children.size() == 4) {
7532 if (i1 > i2) std::swap(i1, i2);
7534 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7535 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7536 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7538 pgai = std::make_shared<ga_instruction_contract_1_1>
7539 (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7541 else if (pnode->children.size() == 5) {
7544 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7545 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7546 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7547 if (i > i1) ii2 *= child1->tensor_proper_size(i);
7549 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7550 if (i < i2) ii3 *= child2->tensor_proper_size(i);
7551 if (i > i2) ii4 *= child2->tensor_proper_size(i);
7553 if (child1->test_function_type==1 && child2->test_function_type==2)
7554 pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7555 (pnode->tensor(), child1->tensor(), child2->tensor(),
7556 indsize[0], ii1, ii2, ii3, ii4);
7558 pgai = std::make_shared<ga_instruction_contract_2_1>
7559 (pnode->tensor(), child1->tensor(), child2->tensor(),
7560 indsize[0], ii1, ii2, ii3, ii4);
7562 else if (pnode->children.size() == 7) {
7564 size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7565 size_type nn1 = indsize[0], nn2 = indsize[1];
7566 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7568 { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7569 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7570 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7571 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7572 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7574 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7575 if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7576 if ((i > i3 && i < i4) || (i > i4 && i < i3))
7577 ii5 *= child2->tensor_proper_size(i);
7578 if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7580 if (child1->test_function_type==1 && child2->test_function_type==2)
7581 pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7582 (pnode->tensor(), child1->tensor(), child2->tensor(),
7583 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7585 pgai = std::make_shared<ga_instruction_contract_2_2>
7586 (pnode->tensor(), child1->tensor(), child2->tensor(),
7587 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7589 rmi.instructions.push_back(std::move(pgai));
7590 }
else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7592 std::string name = child0->name;
7593 const ga_predef_function_tab &PREDEF_FUNCTIONS
7595 ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7596 const ga_predef_function &F = it->second;
7598 pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7601 if (child1->tensor().size() == 1) {
7603 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7604 (pnode->tensor()[0], child1->tensor()[0], F.f1());
7606 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7607 (pnode->tensor()[0], child1->tensor()[0], F);
7610 pgai = std::make_shared<ga_instruction_eval_func_1arg>
7611 (pnode->tensor(), child1->tensor(), F.f1());
7613 pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7614 (pnode->tensor(), child1->tensor(), F);
7617 if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7619 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7620 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7623 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7624 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7626 }
else if (child1->tensor().size() == 1) {
7629 std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7630 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7633 std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7634 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7635 }
else if (child2->tensor().size() == 1) {
7638 std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7639 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7642 std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7643 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7646 pgai = std::make_shared<ga_instruction_eval_func_2arg>
7647 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7649 pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7650 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7653 rmi.instructions.push_back(std::move(pgai));
7655 }
else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7657 GMM_ASSERT1(
false,
"Internal error");
7659 }
else if (child0->node_type == GA_NODE_OPERATOR) {
7661 ga_predef_operator_tab &PREDEF_OPERATORS
7663 ga_predef_operator_tab::T::iterator it
7664 = PREDEF_OPERATORS.tab.find(child0->name);
7665 const ga_nonlinear_operator &OP = *(it->second);
7666 ga_nonlinear_operator::arg_list args;
7667 for (
size_type i = 1; i < pnode->children.size(); ++i)
7668 args.push_back(&(pnode->children[i]->tensor()));
7670 if (child0->der1 && child0->der2 == 0) {
7671 pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7672 (pnode->tensor(), OP, args, child0->der1);
7673 }
else if (child0->der1 && child0->der2) {
7674 pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7675 (pnode->tensor(), OP, args, child0->der1, child0->der2);
7677 pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7680 rmi.instructions.push_back(std::move(pgai));
7683 bgeot::multi_index mi1(size0.size()), indices;
7684 size_type nb_test = pnode->nb_test_functions();
7685 if (pnode->tensor().size() == 1) {
7686 for (
size_type i = 0; i < child0->tensor_order(); ++i)
7687 mi1[i+nb_test] =
size_type(round(pnode->children[i+1]->tensor()[0])-1);
7688 pgai = std::make_shared<ga_instruction_copy_scalar>
7689 (pnode->tensor()[0], child0->tensor()(mi1));
7691 for (
size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7692 for (
size_type i = 0; i < child0->tensor_order(); ++i) {
7693 if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7695 =
size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7697 indices.push_back(i+nb_test);
7699 pgai = std::make_shared<ga_instruction_tensor_slice>
7700 (pnode->tensor(), child0->tensor(), mi1, indices);
7702 rmi.instructions.push_back(std::move(pgai));
7707 default:GMM_ASSERT1(
false,
"Unexpected node type " << pnode->node_type
7708 <<
" in compilation. Internal error.");
7710 if (tensor_to_clear) {
7713 pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7714 rmi.elt_instructions.push_back(std::move(pgai));
7717 rmi.node_list[pnode->hash_value].push_back(pnode);
7720 void ga_compile_function(ga_workspace &workspace,
7721 ga_instruction_set &gis,
bool scalar) {
7722 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7723 const ga_workspace::tree_description &td = workspace.tree_info(i);
7725 gis.trees.push_back(*(td.ptree));
7726 pga_tree_node root = gis.trees.back().root;
7728 GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7729 "The result of the given expression is not a scalar");
7730 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7731 gis.all_instructions[rm].m = td.m;
7732 ga_if_hierarchy if_hierarchy;
7733 ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7734 *(td.m),
true, if_hierarchy);
7736 gis.coeff = scalar_type(1);
7737 pga_instruction pgai;
7738 workspace.assembled_tensor() = root->tensor();
7739 pgai = std::make_shared<ga_instruction_add_to_coeff>
7740 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7741 gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7746 static bool ga_node_used_interpolates
7747 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7748 std::map<std::string, std::set<std::string> > &interpolates,
7749 std::set<std::string> &interpolates_der) {
7751 bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7752 pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7753 pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7754 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7755 bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7756 pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7757 pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7758 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7760 if (intrpl || intrpl_test ||
7761 pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7762 pnode->node_type == GA_NODE_INTERPOLATE_X ||
7763 pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7764 interpolates[pnode->interpolate_name].size();
7765 if (intrpl || intrpl_test) {
7766 if (workspace.variable_group_exists(pnode->name))
7767 interpolates[pnode->interpolate_name].insert(pnode->name);
7771 if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7772 interpolates_der.insert(pnode->interpolate_name_der);
7773 interpolates[pnode->interpolate_name_der].size();
7774 if (workspace.variable_group_exists(pnode->name))
7775 interpolates[pnode->interpolate_name_der].insert(pnode->name);
7777 for (
size_type i = 0; i < pnode->children.size(); ++i)
7778 found = ga_node_used_interpolates(pnode->children[i], workspace,
7779 interpolates, interpolates_der)
7785 static void ga_compile_interpolate_trans
7786 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7787 ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7790 std::set<std::string> interpolates_der;
7791 std::map<std::string, std::set<std::string> > transformations;
7792 ga_node_used_interpolates(pnode, workspace, transformations,
7795 for (
const auto &transformation : transformations) {
7796 const std::string &transname = transformation.first;
7797 bool compute_der = (interpolates_der.count(transname) != 0);
7798 if (rmi.transformations.count(transname) == 0 ||
7799 (compute_der && rmi.transformations_der.count(transname) == 0)) {
7800 rmi.transformations[transname].size();
7801 gis.transformations.insert(transname);
7802 if (compute_der) rmi.transformations_der.insert(transname);
7803 pga_instruction pgai;
7804 if (transname.compare(
"neighbor_element") == 0 ||
7805 transname.compare(
"neighbour_elt") == 0) {
7806 pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7807 (workspace, rmi.interpolate_infos[transname],
7808 workspace.interpolate_transformation(transname), gis.ctx,
7809 m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7811 pgai = std::make_shared<ga_instruction_transformation_call>
7812 (workspace, rmi.interpolate_infos[transname],
7813 workspace.interpolate_transformation(transname), gis.ctx,
7814 gis.Normal, m, compute_der);
7816 if (pgai) rmi.instructions.push_back(std::move(pgai));
7819 for (
const std::string &nodename : transformation.second) {
7820 if (rmi.transformations[transname].count(nodename) == 0) {
7821 auto&& inin = rmi.interpolate_infos[transname];
7822 pga_instruction pgai =
7823 std::make_shared<ga_instruction_update_group_info>
7824 (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7825 rmi.instructions.push_back(std::move(pgai));
7826 rmi.transformations[transname].insert(nodename);
7832 void ga_compile_interpolation(ga_workspace &workspace,
7833 ga_instruction_set &gis) {
7834 gis.transformations.clear();
7835 gis.all_instructions.clear();
7836 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7837 const ga_workspace::tree_description &td = workspace.tree_info(i);
7838 if (td.operation != ga_workspace::ASSEMBLY) {
7839 gis.trees.push_back(*(td.ptree));
7842 const mesh *m = td.m;
7843 GMM_ASSERT1(m,
"Internal error");
7844 ga_semantic_analysis(gis.trees.back(), workspace, *m,
7845 ref_elt_dim_of_mesh(*m, *(td.rg)),
true,
false);
7846 pga_tree_node root = gis.trees.back().root;
7849 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7850 auto &rmi = gis.all_instructions[rm];
7854 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7855 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
7856 rmi.current_hierarchy);
7859 workspace.assembled_tensor() = root->tensor();
7860 pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7861 (workspace.assembled_tensor(), root->tensor());
7862 rmi.instructions.push_back(std::move(pgai));
7869 struct var_set : std::map<std::string,size_type> {
7871 size_type operator[](
const std::string &name) {
7874 auto it = find(name);
7881 std::string operator[](
const size_type &
id)
const {
7882 for (
const auto &key_value : *
this)
7883 if (key_value.second ==
id)
7884 return key_value.first;
7885 return std::string(
"");
7890 struct condensation_description {
7891 var_set Ivars, Jvars, Qvars;
7894 std::vector<std::set<size_type>> Qclusters, Jclusters;
7900 std::vector<size_type> cluster_of_Qvar;
7902 gmm::dense_matrix<base_tensor *> KQQ,
7906 std::vector<base_tensor *> RI,
7910 void ga_compile(ga_workspace &workspace,
7911 ga_instruction_set &gis,
size_type order,
bool condensation) {
7912 gis.transformations.clear();
7913 gis.all_instructions.clear();
7914 gis.unreduced_terms.clear();
7915 workspace.clear_temporary_variable_intervals();
7917 std::map<const ga_instruction_set::region_mim, condensation_description>
7920 if (condensation && order == 2) {
7921 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7922 const ga_workspace::tree_description &td = workspace.tree_info(i);
7923 if (td.order != 2 && td.order !=
size_type(-1))
7925 ga_tree tree(*(td.ptree));
7926 ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7927 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7929 pga_tree_node root = tree.root;
7932 v1_is_intern = workspace.is_internal_variable(root->name_test1),
7933 v2_is_intern = workspace.is_internal_variable(root->name_test2);
7934 if (v1_is_intern || v2_is_intern) {
7935 GMM_ASSERT1(tree.secondary_domain.empty(),
7936 "Condensed variable cannot be used in secondary domain");
7938 for (
const auto &key_val : condensations) {
7939 const ga_instruction_set::region_mim rm0 = key_val.first;
7940 const condensation_description &CC0 = key_val.second;
7941 if (rm0.mim() == td.mim && rm0.region() != td.rg
7942 && (CC0.Qvars.count(root->name_test1) ||
7943 CC0.Qvars.count(root->name_test2))) {
7945 (*(rm0.region()), *(td.rg));
7946 GMM_ASSERT1(intrsct.is_empty(),
7947 "Cannot condense coupled variables between "
7948 "intersecting regions");
7951 const ga_instruction_set::region_mim rm(td.mim, td.rg,
nullptr);
7953 condensation_description &CC = condensations[rm];
7955 q1 = v1_is_intern ? CC.Qvars[root->name_test1] :
size_type(-1),
7956 q2 = v2_is_intern ? CC.Qvars[root->name_test2] :
size_type(-1);
7958 std::vector<size_type> selected_clusters;
7959 for (
size_type j=0; j < CC.Qclusters.size(); ++j)
7960 if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7961 selected_clusters.push_back(j);
7963 if (selected_clusters.empty()) {
7964 CC.Qclusters.push_back(std::set<size_type>());
7965 if (q1 !=
size_type(-1)) CC.Qclusters.back().insert(q1);
7966 if (q2 !=
size_type(-1)) CC.Qclusters.back().insert(q2);
7968 auto &target = CC.Qclusters[selected_clusters[0]];
7969 if (q1 !=
size_type(-1)) target.insert(q1);
7970 if (q2 !=
size_type(-1)) target.insert(q2);
7971 for (
size_type j=selected_clusters.size()-1; j > 1; --j) {
7972 auto &source = CC.Qclusters[selected_clusters[j]];
7973 target.insert(source.begin(), source.end());
7974 CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7981 for (
auto &key_value : condensations) {
7982 condensation_description &CC = key_value.second;
7991 CC.Jclusters.resize(CC.Qclusters.size());
7993 CC.cluster_of_Qvar.resize(Qsize);
7994 for (
size_type i=0; i < CC.Qclusters.size(); ++i)
7995 for (
const size_type &var : CC.Qclusters[i])
7996 CC.cluster_of_Qvar[var] = i;
8001 CC.KQQ.resize(Qsize, Qsize);
8002 CC.RQpr.resize(Qsize);
8004 bgeot::multi_index mi(1);
8005 mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
8006 gis.condensation_tensors.push_back
8007 (std::make_shared<base_tensor>(mi));
8008 CC.RQpr[q] = gis.condensation_tensors.back().get();
8013 std::array<ga_workspace::operation_type,3>
8014 phases{ga_workspace::PRE_ASSIGNMENT,
8015 ga_workspace::ASSEMBLY,
8016 ga_workspace::POST_ASSIGNMENT};
8017 for (
const auto &phase : phases) {
8019 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
8020 const ga_workspace::tree_description &td = workspace.tree_info(i);
8021 if (td.operation != phase)
8024 if (td.order == order || td.order ==
size_type(-1)) {
8025 std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
8027 : gis.interpolation_trees;
8028 trees.push_back(*(td.ptree));
8030 ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
8031 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
8033 pga_tree_node root = trees.back().root;
8038 psecondary_domain psd(0);
8039 if (trees.back().secondary_domain.size())
8040 psd = workspace.secondary_domain(trees.back().secondary_domain);
8041 ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
8042 auto &rmi = gis.all_instructions[rm];
8046 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
8047 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
8048 rmi.current_hierarchy);
8052 if (phase != ga_workspace::ASSEMBLY) {
8053 if (!td.varname_interpolation.empty()) {
8055 = workspace.associated_im_data(td.varname_interpolation);
8056 auto &V =
const_cast<model_real_plain_vector &
>
8057 (workspace.value(td.varname_interpolation));
8058 GMM_ASSERT1(imd,
"Internal error");
8059 auto pgai = std::make_shared<ga_instruction_assignment>
8060 (root->tensor(), V, gis.ctx, imd);
8061 rmi.instructions.push_back(std::move(pgai));
8064 pga_instruction pgai;
8067 workspace.assembled_tensor() = root->tensor();
8068 pgai = std::make_shared<ga_instruction_add_to_coeff>
8069 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
8073 GMM_ASSERT1(root->tensor_proper_size() == 1,
8074 "Invalid vector or tensor quantity. An order 1 "
8075 "weak form has to be a scalar quantity");
8076 const mesh_fem *
const
8077 mf = workspace.associated_mf(root->name_test1);
8078 const im_data *
const
8079 imd = workspace.associated_im_data(root->name_test1);
8080 workspace.add_temporary_interval_for_unreduced_variable
8083 base_vector &Vu = workspace.unreduced_vector(),
8084 &Vr = workspace.assembled_vector();
8086 const std::string &intn1 = root->interpolate_name_test1;
8087 bool secondary = !intn1.empty() &&
8088 workspace.secondary_domain_exists(intn1);
8089 fem_interpolation_context
8090 &ctx = intn1.empty() ? gis.ctx
8091 : (secondary ? rmi.secondary_domain_infos.ctx
8092 : rmi.interpolate_infos[intn1].ctx);
8094 !(intn1.empty() || intn1 ==
"neighbor_element"
8095 || intn1 ==
"neighbour_elt" || secondary);
8097 if (intn1.size() && !secondary &&
8098 workspace.variable_group_exists(root->name_test1)) {
8099 ga_instruction_set::variable_group_info
8100 &vgi = rmi.interpolate_infos[intn1]
8101 .groups_info[root->name_test1];
8102 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8103 (root->tensor(), Vr, Vu, ctx,
8104 vgi.I, vgi.mf, vgi.reduced_mf,
8105 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8106 for (
const std::string &name
8107 : workspace.variable_group(root->name_test1))
8108 gis.unreduced_terms.emplace(name,
"");
8110 base_vector &V = mf->is_reduced() ? Vu : Vr;
8111 const gmm::sub_interval
8112 &I = mf->is_reduced()
8113 ? workspace.temporary_interval_of_variable
8115 : workspace.interval_of_variable(root->name_test1);
8116 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8117 (root->tensor(), V, ctx, I, *mf,
8118 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8119 if (mf->is_reduced())
8120 gis.unreduced_terms.emplace(root->name_test1,
"");
8123 GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
8124 "Interpolate transformation on integration "
8126 if (!workspace.is_internal_variable(root->name_test1) ||
8128 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8129 (root->tensor(), Vr, gis.ctx,
8130 workspace.interval_of_variable(root->name_test1),
8131 *imd, gis.coeff, gis.ipt);
8134 pgai = std::make_shared<ga_instruction_vector_assembly>
8135 (root->tensor(), Vr,
8136 workspace.interval_of_variable(root->name_test1),
8142 GMM_ASSERT1(root->tensor_proper_size() == 1,
8143 "Invalid vector or tensor quantity. An order 2 "
8144 "weak form has to be a scalar quantity");
8145 const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
8146 *mf2=workspace.associated_mf(root->name_test2);
8148 *imd1 = workspace.associated_im_data(root->name_test1),
8149 *imd2 = workspace.associated_im_data(root->name_test2);
8150 const std::string &intn1 = root->interpolate_name_test1,
8151 &intn2 = root->interpolate_name_test2;
8152 bool secondary1 = intn1.size() &&
8153 workspace.secondary_domain_exists(intn1);
8154 bool secondary2 = intn2.size() &&
8155 workspace.secondary_domain_exists(intn2);
8156 fem_interpolation_context
8157 &ctx1 = intn1.empty() ? gis.ctx
8158 : (secondary1 ? rmi.secondary_domain_infos.ctx
8159 : rmi.interpolate_infos[intn1].ctx),
8160 &ctx2 = intn2.empty() ? gis.ctx
8161 : (secondary2 ? rmi.secondary_domain_infos.ctx
8162 : rmi.interpolate_infos[intn2].ctx);
8163 bool interpolate = !(intn1.empty() || intn1 ==
"neighbor_element"
8164 || intn1 ==
"neighbour_elt"
8166 !(intn2.empty() || intn2 ==
"neighbor_element"
8167 || intn2 ==
"neighbour_elt"
8170 workspace.add_temporary_interval_for_unreduced_variable
8172 workspace.add_temporary_interval_for_unreduced_variable
8175 bool has_var_group1 = (!intn1.empty() && !secondary1 &&
8176 workspace.variable_group_exists
8177 (root->name_test1));
8178 bool has_var_group2 = (!intn2.empty() && !secondary2 &&
8179 workspace.variable_group_exists
8180 (root->name_test2));
8181 bool simple = !interpolate &&
8182 !has_var_group1 && !has_var_group2 &&
8183 mf1 && !(mf1->is_reduced()) &&
8184 mf2 && !(mf2->is_reduced());
8187 auto &Krr = workspace.assembled_matrix();
8188 auto &Kru = workspace.col_unreduced_matrix();
8189 auto &Kur = workspace.row_unreduced_matrix();
8190 auto &Kuu = workspace.row_col_unreduced_matrix();
8193 const gmm::sub_interval
8194 &I1 = workspace.interval_of_variable(root->name_test1),
8195 &I2 = workspace.interval_of_variable(root->name_test2);
8197 &alpha1 = workspace.factor_of_variable(root->name_test1),
8198 &alpha2 = workspace.factor_of_variable(root->name_test2);
8199 if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
8200 pgai = std::make_shared
8201 <ga_instruction_matrix_assembly_standard_scalar>
8202 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8203 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8204 else if (root->sparsity() == 10 && root->t.qdim() == 2)
8205 pgai = std::make_shared
8206 <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
8207 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8208 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8209 else if (root->sparsity() == 10 && root->t.qdim() == 3)
8210 pgai = std::make_shared
8211 <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
8212 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8213 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8215 pgai = std::make_shared
8216 <ga_instruction_matrix_assembly_standard_vector>
8217 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8218 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8219 }
else if (condensation &&
8220 workspace.is_internal_variable(root->name_test1) &&
8221 workspace.is_internal_variable(root->name_test2)) {
8225 GMM_ASSERT1(imd1 && imd2,
"Internal error");
8226 GMM_ASSERT1(!interpolate,
"Internal error");
8230 condensation_description &CC = condensations[rm];
8231 GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
8232 CC.Qvars.count(root->name_test2) > 0,
8234 size_type q1 = CC.Qvars[root->name_test1],
8235 q2 = CC.Qvars[root->name_test2];
8236 if (!CC.KQQ(q1,q2)) {
8238 gis.condensation_tensors.push_back
8239 (std::make_shared<base_tensor>(s1,s2));
8240 CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
8241 pgai = std::make_shared<ga_instruction_copy_vect>
8242 (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
8245 pgai = std::make_shared<ga_instruction_add_to>
8246 (*CC.KQQ(q1,q2), root->tensor());
8248 rmi.instructions.push_back(std::move(pgai));
8249 }
else if (condensation &&
8250 workspace.is_internal_variable(root->name_test1)) {
8254 GMM_ASSERT1(imd1,
"Internal error");
8255 GMM_ASSERT1(!interpolate,
"Internal error");
8258 condensation_description &CC = condensations[rm];
8259 GMM_ASSERT1(CC.Qvars.count(root->name_test1),
8261 size_type q1 = CC.Qvars[root->name_test1],
8262 j2 = CC.Jvars[root->name_test2];
8263 CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
8264 if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
8265 CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
8266 std::max(CC.KQJ.ncols(), j2+1));
8267 if (!CC.KQJ(q1,j2)) {
8271 gis.condensation_tensors.push_back
8272 (std::make_shared<base_tensor>(root->tensor()));
8273 GMM_ASSERT1(root->tensor().size(0) == s1,
"Internal error");
8274 CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
8275 pgai = std::make_shared<ga_instruction_copy_vect>
8276 (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
8280 pgai = std::make_shared<ga_instruction_add_to>
8281 (*CC.KQJ(q1,j2), root->tensor());
8283 rmi.instructions.push_back(std::move(pgai));
8284 }
else if (condensation &&
8285 workspace.is_internal_variable(root->name_test2)) {
8289 GMM_ASSERT1(imd2,
"Internal error");
8290 GMM_ASSERT1(!interpolate,
"Internal error");
8293 condensation_description &CC = condensations[rm];
8294 GMM_ASSERT1(CC.Qvars.count(root->name_test2),
8296 size_type i1 = CC.Ivars[root->name_test1],
8297 q2 = CC.Qvars[root->name_test2];
8298 if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
8299 CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
8300 std::max(CC.KIQ.ncols(), q2+1));
8301 if (!CC.KIQ(i1,q2)) {
8305 gis.condensation_tensors.push_back
8306 (std::make_shared<base_tensor>(root->tensor()));
8307 GMM_ASSERT1(root->tensor().size(1) == s2,
8309 CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
8310 pgai = std::make_shared<ga_instruction_copy_vect>
8311 (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
8315 pgai = std::make_shared<ga_instruction_add_to>
8316 (*CC.KIQ(i1,q2), root->tensor());
8318 rmi.instructions.push_back(std::move(pgai));
8319 }
else if (!workspace.is_internal_variable(root->name_test1) &&
8320 !workspace.is_internal_variable(root->name_test2)) {
8322 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
8323 || has_var_group1 || has_var_group2)
8324 gis.unreduced_terms.emplace(root->name_test1,
8327 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8328 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8329 auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
8330 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8331 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8334 &alpha1 = workspace.factor_of_variable(root->name_test1),
8335 &alpha2 = workspace.factor_of_variable(root->name_test2);
8337 if (has_var_group1) {
8338 ga_instruction_set::variable_group_info
8339 &vgi1 = rmi.interpolate_infos[intn1]
8340 .groups_info[root->name_test1];
8341 if (has_var_group2) {
8342 ga_instruction_set::variable_group_info
8343 &vgi2 = rmi.interpolate_infos[intn2]
8344 .groups_info[root->name_test2];
8345 pgai = std::make_shared
8346 <ga_instruction_matrix_assembly_mf_mf>
8347 (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
8349 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8351 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8352 ? workspace.temporary_interval_of_variable
8354 : workspace.interval_of_variable(root->name_test2);
8356 pgai = std::make_shared
8357 <ga_instruction_matrix_assembly_mf_mf>
8358 (root->tensor(), Krx, Kux, ctx1, ctx2,
8359 vgi1, I2, *mf2, alpha2,
8360 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8362 pgai = std::make_shared
8363 <ga_instruction_matrix_assembly_mf_imd>
8364 (root->tensor(), Krr, Kur, ctx1, ctx2,
8365 vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
8368 const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
8369 ? workspace.temporary_interval_of_variable
8371 : workspace.interval_of_variable(root->name_test1);
8372 if (has_var_group2) {
8373 ga_instruction_set::variable_group_info
8374 &vgi2 = rmi.interpolate_infos[intn2]
8375 .groups_info[root->name_test2];
8377 pgai = std::make_shared
8378 <ga_instruction_matrix_assembly_mf_mf>
8379 (root->tensor(), Kxr, Kxu, ctx1, ctx2,
8380 I1, *mf1, alpha1, vgi2,
8381 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8383 pgai = std::make_shared
8384 <ga_instruction_matrix_assembly_imd_mf>
8385 (root->tensor(), Krr, Kru, ctx1, ctx2,
8386 I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
8388 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8389 ? workspace.temporary_interval_of_variable
8391 : workspace.interval_of_variable(root->name_test2);
8393 pgai = std::make_shared
8394 <ga_instruction_matrix_assembly_mf_mf>
8395 (root->tensor(), Kxx, ctx1, ctx2,
8396 I1, *mf1, alpha1, I2, *mf2, alpha2,
8397 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8399 pgai = std::make_shared
8400 <ga_instruction_matrix_assembly_mf_imd>
8401 (root->tensor(), Kxr, ctx1, ctx2,
8402 I1, *mf1, alpha1, I2, imd2, alpha2,
8403 gis.coeff, gis.ipt);
8405 pgai = std::make_shared
8406 <ga_instruction_matrix_assembly_imd_mf>
8407 (root->tensor(), Krx, ctx1, ctx2,
8408 I1, imd1, alpha1, I2, *mf2, alpha2,
8409 gis.coeff, gis.ipt);
8411 pgai = std::make_shared
8412 <ga_instruction_matrix_assembly_imd_imd>
8413 (root->tensor(), Krr, ctx1, ctx2,
8414 I1, imd1, alpha1, I2, imd2, alpha2,
8415 gis.coeff, gis.ipt);
8423 rmi.instructions.push_back(std::move(pgai));
8429 if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
8431 auto &Krr = workspace.assembled_matrix();
8432 auto &Kru = workspace.col_unreduced_matrix();
8433 auto &Kur = workspace.row_unreduced_matrix();
8434 auto &Kuu = workspace.row_col_unreduced_matrix();
8436 for (
auto &&key_val : condensations) {
8437 const ga_instruction_set::region_mim rm = key_val.first;
8438 condensation_description &CC = key_val.second;
8439 auto &rmi = gis.all_instructions[rm];
8441 CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
8442 for (
size_type k=0; k < CC.KQJpr.size(); ++k) {
8443 gis.condensation_tensors.push_back
8444 (std::make_shared<base_tensor>(2,2));
8445 CC.KQJpr[k] = gis.condensation_tensors.back().get();
8448 pga_instruction pgai;
8451 for (
size_type k=0; k < CC.Qclusters.size(); ++k) {
8455 std::string name_test1 = CC.Qvars[q1];
8456 const im_data *imd1 = workspace.associated_im_data(name_test1);
8457 const gmm::sub_interval
8458 &I1 = workspace.interval_of_variable(name_test1);
8460 std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
8461 (*(CC.RQpr[q1]), workspace.cached_vector(),
8462 gis.ctx, I1, *imd1, gis.ipt);
8463 rmi.instructions.push_back(std::move(pgai));
8469 pgai = std::make_shared<ga_instruction_condensation_sub>
8470 (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff);
8471 rmi.instructions.push_back(std::move(pgai));
8476 std::string name_test1 = CC.Qvars[q1];
8477 const im_data *imd1 = workspace.associated_im_data(name_test1);
8480 const gmm::sub_interval
8481 &I1 = workspace.interval_of_variable(name_test1);
8482 GMM_ASSERT1(imd1,
"Internal error");
8484 std::string name_test2 = CC.Jvars[j2];
8485 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8486 const im_data *imd2 = workspace.associated_im_data(name_test2);
8493 const gmm::sub_interval
8494 &I2 = mf2 && mf2->is_reduced()
8495 ? workspace.temporary_interval_of_variable(name_test2)
8496 : workspace.interval_of_variable(name_test2);
8497 const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2));
8498 model_real_sparse_matrix
8499 &KQJpr = mf2 && mf2->is_reduced()
8500 ? workspace.col_unreduced_matrix()
8501 : workspace.internal_coupling_matrix();
8504 std::make_shared<ga_instruction_matrix_assembly_imd_mf>
8505 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8506 I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt);
8508 if (mf2->is_reduced())
8509 gis.unreduced_terms.emplace(name_test1, name_test2);
8512 std::make_shared<ga_instruction_matrix_assembly_imd_imd>
8513 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8514 I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt);
8515 rmi.instructions.push_back(std::move(pgai));
8517 const bool initialize =
true;
8518 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8519 (*(CC.RQpr[q1]), workspace.assembled_vector(),
8520 gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize);
8521 rmi.instructions.push_back(std::move(pgai));
8526 for (
size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
8528 std::string name_test1 = CC.Ivars[i1];
8529 const mesh_fem *mf1 = workspace.associated_mf(name_test1);
8530 const im_data *imd1 = workspace.associated_im_data(name_test1);
8532 &alpha1 = workspace.factor_of_variable(name_test1);
8533 const gmm::sub_interval
8534 &I1 = mf1 && mf1->is_reduced()
8535 ? workspace.temporary_interval_of_variable(name_test1)
8536 : workspace.interval_of_variable(name_test1);
8540 std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8541 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8545 Q_of_J[j].insert(q);
8548 for (
size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8549 if (Q_of_J[j2].size()) {
8550 std::vector<base_tensor *> Ki1Q, KQj2;
8552 Ki1Q.push_back(CC.KIQ(i1,q));
8553 KQj2.push_back(CC.KQJpr(q,j2));
8556 gis.condensation_tensors.push_back
8557 (std::make_shared<base_tensor>());
8558 base_tensor &Kij = *gis.condensation_tensors.back();
8559 pgai = std::make_shared<ga_instruction_condensation_super_K>
8561 rmi.instructions.push_back(std::move(pgai));
8563 std::string name_test2 = CC.Jvars[j2];
8564 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8565 const im_data *imd2 = workspace.associated_im_data(name_test2);
8570 &alpha2 = workspace.factor_of_variable(name_test2);
8571 const gmm::sub_interval
8572 &I2 = mf2 && mf2->is_reduced()
8573 ? workspace.temporary_interval_of_variable(name_test2)
8574 : workspace.interval_of_variable(name_test2);
8576 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8577 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8578 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8579 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8581 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8582 gis.unreduced_terms.emplace(name_test1, name_test2);
8585 pgai = std::make_shared
8586 <ga_instruction_matrix_assembly_mf_mf>
8587 (Kij, Kxx, gis.ctx, gis.ctx,
8588 I1, *mf1, alpha1, I2, *mf2, alpha2,
8589 gis.coeff, gis.nbpt, gis.ipt,
false);
8591 pgai = std::make_shared
8592 <ga_instruction_matrix_assembly_mf_imd>
8593 (Kij, Kxr, gis.ctx, gis.ctx,
8594 I1, *mf1, alpha1, I2, imd2, alpha2,
8595 gis.coeff, gis.ipt);
8597 pgai = std::make_shared
8598 <ga_instruction_matrix_assembly_imd_mf>
8599 (Kij, Krx, gis.ctx, gis.ctx,
8600 I1, imd1, alpha1, I2, *mf2, alpha2,
8601 gis.coeff, gis.ipt);
8603 pgai = std::make_shared
8604 <ga_instruction_matrix_assembly_imd_imd>
8605 (Kij, Krr, gis.ctx, gis.ctx,
8606 I1, imd1, alpha1, I2, imd2, alpha2,
8607 gis.coeff, gis.ipt);
8608 rmi.instructions.push_back(std::move(pgai));
8613 std::vector<base_tensor *> Ki1Q, RQpr;
8614 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8616 Ki1Q.push_back(CC.KIQ(i1,q));
8617 RQpr.push_back(CC.RQpr[q]);
8619 gis.condensation_tensors.push_back
8620 (std::make_shared<base_tensor>());
8621 base_tensor &Ri = *gis.condensation_tensors.back();
8622 pgai = std::make_shared<ga_instruction_condensation_super_R>
8624 rmi.instructions.push_back(std::move(pgai));
8626 base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8627 : workspace.assembled_vector();
8629 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8630 (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt,
false);
8632 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8633 (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8635 pgai = std::make_shared<ga_instruction_vector_assembly>
8636 (Ri, R, I1, gis.coeff);
8637 rmi.instructions.push_back(std::move(pgai));
8652 void ga_function_exec(ga_instruction_set &gis) {
8654 for (
auto &&instr : gis.all_instructions) {
8655 const auto &gil = instr.second.instructions;
8656 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8660 void ga_interpolation_exec(ga_instruction_set &gis,
8661 ga_workspace &workspace,
8662 ga_interpolation_context &gic) {
8664 base_small_vector un, up;
8666 for (
const std::string &t : gis.transformations)
8667 workspace.interpolate_transformation(t)->init(workspace);
8669 for (
auto &&instr : gis.all_instructions) {
8672 const mesh_region ®ion = *(instr.first.region());
8674 GMM_ASSERT1(&m == &(gic.linked_mesh()),
8675 "Incompatibility of meshes in interpolation");
8676 const auto &gilb = instr.second.begin_instructions;
8677 const auto &gile = instr.second.elt_instructions;
8678 const auto &gil = instr.second.instructions;
8681 std::vector<size_type> ind;
8682 auto pai_old = papprox_integration{};
8684 if (gic.use_mim()) {
8691 bgeot::pstored_point_tab pspt
8692 = gic.ppoints_for_element(v.cv(), v.f(), ind);
8694 if (pspt.get() && ind.size() && pspt->size()) {
8695 m.points_of_convex(v.cv(), G);
8697 up.resize(G.nrows());
8698 un.resize(pgt->dim());
8700 if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8701 gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8703 if (!(gic.use_pgp(v.cv()))) {
8704 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8706 gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8711 if (gis.need_elt_size)
8715 gis.nbpt = pspt->size();
8716 for (
size_type ii = 0; ii < ind.size(); ++ii) {
8718 if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8719 else gis.ctx.set_xref((*pspt)[gis.ipt]);
8721 if (ii == 0 || !(pgt->is_linear())) {
8724 const base_matrix& B = gis.ctx.B();
8728 gmm::scale(up,1.0/nup);
8731 }
else gis.Normal.resize(0);
8733 gmm::clear(workspace.assembled_tensor().as_vector());
8735 for (
size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8736 for (
size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8738 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8739 gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8744 for (
const std::string &t : gis.transformations)
8745 workspace.interpolate_transformation(t)->finalize();
8750 void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8752 base_small_vector un;
8753 scalar_type J1(0), J2(0);
8755 for (
const std::string &t : gis.transformations)
8756 workspace.interpolate_transformation(t)->init(workspace);
8758 for (
auto &instr : gis.all_instructions) {
8760 psecondary_domain psd = instr.first.psd();
8762 GMM_ASSERT1(&m == &(mim.
linked_mesh()),
"Incompatibility of meshes");
8763 const auto &gilb = instr.second.begin_instructions;
8764 const auto &gile = instr.second.elt_instructions;
8765 const auto &gil = instr.second.instructions;
8779 const mesh_region ®ion = *(instr.first.region());
8784 pintegration_method pim = 0;
8785 papprox_integration pai = 0;
8786 bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8787 bgeot::pgeotrans_precomp pgp = 0;
8788 bool first_gp =
true;
8792 if (v.cv() != old_cv) {
8793 pgt = m.trans_of_convex(v.cv());
8795 m.points_of_convex(v.cv(), G1);
8797 if (pim->type() == IM_NONE)
continue;
8798 GMM_ASSERT1(pim->type() == IM_APPROX,
"Sorry, exact methods "
8799 "cannot be used in high level generic assembly");
8800 pai = pim->approx_method();
8801 pspt = pai->pintegration_points();
8803 if (pgp && gis.pai == pai && pgt_old == pgt) {
8804 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8806 if (pai->is_built_on_the_fly()) {
8807 gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8810 pgp = gis.gp_pool(pgt, pspt);
8811 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8813 pgt_old = pgt; gis.pai = pai;
8815 if (gis.need_elt_size)
8820 if (pim->type() == IM_NONE)
continue;
8821 gis.ctx.set_face_num(v.f());
8823 if (pspt != old_pspt) { first_gp =
true; old_pspt = pspt; }
8828 gis.nbpt = pai->nb_points_on_face(v.f());
8829 first_ind = pai->ind_first_point_on_face(v.f());
8831 gis.nbpt = pai->nb_points_on_convex();
8833 for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8834 if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8835 else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8836 if (gis.ipt == 0 || !(pgt->is_linear())) {
8840 gis.Normal.resize(G1.nrows());
8841 un.resize(pgt->dim());
8846 gmm::scale(gis.Normal, 1.0/nup);
8848 }
else gis.Normal.resize(0);
8850 auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8851 gis.coeff = J1 * ipt_coeff;
8852 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8853 workspace.include_empty_int_points());
8854 if (!enable_ipt) gis.coeff = scalar_type(0);
8856 for (
size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8860 for (
size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8862 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8863 for (
size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8870 GA_DEBUG_INFO(
"-----------------------------");
8874 auto &sdi = instr.second.secondary_domain_infos;
8875 const mesh_region ®ion1 = *(instr.first.region());
8881 pintegration_method pim1 = 0, pim2 = 0;
8882 papprox_integration pai1 = 0, pai2 = 0;
8883 bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8884 bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8885 bool first_gp =
true;
8889 if (v1.cv() != old_cv1) {
8890 pgt1 = m.trans_of_convex(v1.cv());
8892 m.points_of_convex(v1.cv(), G1);
8894 if (pim1->type() == IM_NONE)
continue;
8895 GMM_ASSERT1(pim1->type() == IM_APPROX,
"Sorry, exact methods "
8896 "cannot be used in high level generic assembly");
8897 pai1 = pim1->approx_method();
8898 pspt1 = pai1->pintegration_points();
8899 if (pspt1->size()) {
8900 if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8901 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8903 if (pai1->is_built_on_the_fly()) {
8904 gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8907 pgp1 = gis.gp_pool(pgt1, pspt1);
8908 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8910 pgt1_old = pgt1; gis.pai = pai1;
8912 if (gis.need_elt_size)
8917 if (pim1->type() == IM_NONE)
continue;
8918 gis.ctx.set_face_num(v1.f());
8920 if (pspt1 != old_pspt1) { first_gp =
true; old_pspt1 = pspt1; }
8921 if (pspt1->size()) {
8925 nbpt1 = pai1->nb_points_on_face(v1.f());
8926 first_ind1 = pai1->ind_first_point_on_face(v1.f());
8928 nbpt1 = pai1->nb_points_on_convex();
8931 const mesh &m2 = psd->mim().linked_mesh();
8932 const mesh_region ®ion2 = psd->give_region(m, v1.cv(), v1.f());
8934 !v2.finished(); ++v2) {
8935 if (v2.cv() != old_cv2) {
8936 pgt2 = m2.trans_of_convex(v2.cv());
8937 pim2 = psd->mim().int_method_of_element(v2.cv());
8938 m2.points_of_convex(v2.cv(), G2);
8940 if (pim2->type() == IM_NONE)
continue;
8941 GMM_ASSERT1(pim2->type() == IM_APPROX,
"Sorry, exact methods "
8942 "cannot be used in high level generic assembly");
8943 pai2 = pim2->approx_method();
8944 pspt2 = pai2->pintegration_points();
8945 if (pspt2->size()) {
8946 if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8947 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8949 if (pai2->is_built_on_the_fly()) {
8950 sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8953 pgp2 = gis.gp_pool(pgt2, pspt2);
8954 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8956 pgt2_old = pgt2; sdi.pai = pai2;
8961 if (pim2->type() == IM_NONE)
continue;
8962 sdi.ctx.set_face_num(v2.f());
8964 if (pspt2 != old_pspt2) { first_gp =
true; old_pspt2 = pspt2; }
8965 if (pspt2->size()) {
8969 nbpt2 = pai2->nb_points_on_face(v2.f());
8970 first_ind2 = pai2->ind_first_point_on_face(v2.f());
8972 nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8974 gis.nbpt = nbpt1 * nbpt2;
8976 for (
size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8977 for (
size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8979 if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8980 else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8981 if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8982 else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8984 if (gis.ipt == 0 || !(pgt1->is_linear())) {
8987 gis.Normal.resize(G1.nrows());
8988 un.resize(pgt1->dim());
8993 gmm::scale(gis.Normal, 1.0/nup);
8995 }
else gis.Normal.resize(0);
8998 if (gis.ipt == 0 || !(pgt2->is_linear())) {
9001 sdi.Normal.resize(G2.nrows());
9002 un.resize(pgt2->dim());
9007 gmm::scale(sdi.Normal, 1.0/nup);
9009 }
else sdi.Normal.resize(0);
9012 auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
9013 * pai2->coeff(first_ind2+ipt2);
9014 gis.coeff = J1 * J2 * ipt_coeff;
9015 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
9016 workspace.include_empty_int_points());
9017 if (!enable_ipt) gis.coeff = scalar_type(0);
9020 for (
size_type j=0; j < gilb.size(); ++j)
9025 for (
size_type j=0; j < gile.size(); ++j)
9028 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
9029 for (
size_type j=0; j < gil.size(); ++j)
9040 GA_DEBUG_INFO(
"-----------------------------");
9045 for (
const std::string &t : gis.transformations)
9046 workspace.interpolate_transformation(t)->finalize();
does the inversion of the geometric transformation for a given convex
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
static T & instance()
Instance from the current thread.
Describe an integration method linked to a mesh.
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
"iterator" class for regions.
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Describe a mesh (collection of convexes (elements) and points).
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
sparse vector built upon std::vector.
Semantic analysis of assembly trees and semantic manipulations.
Compilation and execution operations.
a subclass of mesh_im which is conformal to a number of level sets.
void copy(const L1 &l1, L2 &l2)
*/
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
void resize(V &v, size_type n)
*/
void clean(L &l, double threshold)
Clean a vector or matrix (replace near-zero entries with zeroes).
void mult(const L1 &l1, const L2 &l2, L3 &l3)
*/
void add(const L1 &l1, L2 &l2)
*/
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
gmm::uint16_type short_type
used as the common short type integer in the library
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
size_t size_type
used as the common size type in the library
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
GEneric Tool for Finite Element Methods.
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.