GetFEM  5.5
getfem_generic_assembly_compile_and_exec.cc
1 /*===========================================================================
2 
3  Copyright (C) 2013-2026 Yves Renard
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program. If not, see https://www.gnu.org/licenses/.
18 
19 ===========================================================================*/
20 
24 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
25 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
26 
27 #if defined(GMM_USES_BLAS)
28 #define GA_USES_BLAS
29 #endif
30 
31 // #define GA_DEBUG_INFO(a) { cout << a << endl; }
32 #define GA_DEBUG_INFO(a)
33 
34 
35 
36 namespace getfem {
37 
38 
39  template <class VEC1, class VEC2>
40  inline void copy_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
41  auto it1 = v1.begin();
42  auto it2 = v2.begin(), it2e = v2.end();
43  size_type nd = v1.size() >> 2;
44  for (size_type i = 0; i < nd; ++i) {
45  *it2++ = (*it1++) * a;
46  *it2++ = (*it1++) * a;
47  *it2++ = (*it1++) * a;
48  *it2++ = (*it1++) * a;
49  }
50  for (; it2 != it2e;)
51  *it2++ = (*it1++) * a;
52  }
53 
54  template <class VEC1, class VEC2>
55  inline void add_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
56  auto it1 = v1.begin();
57  auto it2 = v2.begin(), it2e = v2.end();
58  size_type nd = v1.size() >> 2;
59  for (size_type i = 0; i < nd; ++i) {
60  *it2++ += (*it1++) * a;
61  *it2++ += (*it1++) * a;
62  *it2++ += (*it1++) * a;
63  *it2++ += (*it1++) * a;
64  }
65  for (; it2 != it2e;)
66  *it2++ += (*it1++) * a;
67  }
68 
69  template <class VEC1, class VEC2>
70  inline void copy_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
71  auto it1 = v1.begin();
72  auto it2 = v2.begin(), it2e = v2.end();
73  size_type nd = v1.size() >> 3;
74  for (size_type i = 0; i < nd; ++i) {
75  *it2++ = (*it1++) * a;
76  *it2++ = (*it1++) * a;
77  *it2++ = (*it1++) * a;
78  *it2++ = (*it1++) * a;
79  *it2++ = (*it1++) * a;
80  *it2++ = (*it1++) * a;
81  *it2++ = (*it1++) * a;
82  *it2++ = (*it1++) * a;
83  }
84  for (; it2 != it2e;)
85  *it2++ = (*it1++) * a;
86  }
87 
88  template <class VEC1, class VEC2>
89  inline void add_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
90  auto it1 = v1.begin();
91  auto it2 = v2.begin(), it2e = v2.end();
92  size_type nd = v1.size() >> 3;
93  for (size_type i = 0; i < nd; ++i) {
94  *it2++ += (*it1++) * a;
95  *it2++ += (*it1++) * a;
96  *it2++ += (*it1++) * a;
97  *it2++ += (*it1++) * a;
98  *it2++ += (*it1++) * a;
99  *it2++ += (*it1++) * a;
100  *it2++ += (*it1++) * a;
101  *it2++ += (*it1++) * a;
102  }
103  for (; it2 != it2e;)
104  *it2++ += (*it1++) * a;
105  }
106 
107  bool operator <(const gauss_pt_corresp &gpc1,
108  const gauss_pt_corresp &gpc2) {
109  if (gpc1.pai != gpc2.pai)
110  return (gpc1.pai < gpc2.pai );
111  if (gpc1.nodes.size() != gpc2.nodes.size())
112  return (gpc1.nodes.size() < gpc2.nodes.size());
113  for (size_type i = 0; i < gpc1.nodes.size(); ++i)
114  if (gpc1.nodes[i] != gpc2.nodes[i])
115  return (gpc1.nodes[i] < gpc2.nodes[i]);
116  if (gpc1.pgt1 != gpc2.pgt1)
117  return (gpc1.pgt1 < gpc2.pgt1);
118  if (gpc1.pgt2 != gpc2.pgt2)
119  return (gpc1.pgt2 < gpc2.pgt2);
120  return false;
121  }
122 
123  bool operator <(const ga_instruction_set::region_mim &rm1,
124  const ga_instruction_set::region_mim &rm2) {
125  if (rm1.mim() != rm2.mim()) return (rm1.mim() < rm2.mim());
126  if (rm1.region() != rm2.region()) return (rm1.region() < rm2.region());
127  return (rm1.psd() < rm2.psd());
128  }
129 
130  //=========================================================================
131  // Instructions for compilation: basic optimized operations on tensors
132  //=========================================================================
133 
134  struct ga_instruction_extract_local_im_data : public ga_instruction {
135  base_tensor &t;
136  const im_data &imd;
137  papprox_integration &pai;
138  const base_vector &U;
139  const fem_interpolation_context &ctx;
140  size_type qdim, cv_old;
141  virtual int exec() {
142  GA_DEBUG_INFO("Instruction: extract local im data");
143  size_type cv = ctx.convex_num();
144  if (cv != cv_old) {
145  cv_old = cv;
146  GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
147  ->approx_method() == pai, "Im data have to be used only "
148  "on their original integration method.");
149  }
150  size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
151  GMM_ASSERT1(ipt != size_type(-1),
152  "Im data with no data on the current integration point.");
153  auto it = U.begin()+ipt*qdim;
154  std::copy(it, it+qdim, t.begin());
155  return 0;
156  }
157  ga_instruction_extract_local_im_data
158  (base_tensor &t_, const im_data &imd_, const base_vector &U_,
159  papprox_integration &pai_, const fem_interpolation_context &ctx_,
160  size_type qdim_)
161  : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
162  cv_old(-1)
163  {}
164  };
165 
166  struct ga_instruction_slice_local_dofs : public ga_instruction {
167  const mesh_fem &mf;
168  const base_vector &U;
169  const fem_interpolation_context &ctx;
170  base_vector &coeff;
171  size_type qmult1, qmult2;
172  virtual int exec() {
173  GA_DEBUG_INFO("Instruction: Slice local dofs");
174  GMM_ASSERT1(qmult1 != 0 && qmult2 != 0, "Internal error");
175  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(),
176  coeff, qmult1, qmult2);
177  return 0;
178  }
179  ga_instruction_slice_local_dofs(const mesh_fem &mf_, const base_vector &U_,
180  const fem_interpolation_context &ctx_,
181  base_vector &coeff_,
182  size_type qmult1_, size_type qmult2_)
183  : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
184  qmult1(qmult1_), qmult2(qmult2_) {}
185  };
186 
187  struct ga_instruction_update_pfp : public ga_instruction {
188  const mesh_fem &mf;
189  const fem_interpolation_context &ctx;
190  fem_precomp_pool &fp_pool;
191  pfem_precomp &pfp;
192 
193  virtual int exec() {
194  GA_DEBUG_INFO("Instruction: Pfp update");
195  if (ctx.have_pgp()) {
196  size_type cv = ctx.is_convex_num_valid()
197  ? ctx.convex_num() : mf.convex_index().first_true();
198  pfem pf = mf.fem_of_element(cv);
199  if (!pfp || pf != pfp->get_pfem() ||
200  ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
201  pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
202  }
203  } else {
204  pfp = 0;
205  }
206  return 0;
207  }
208 
209  ga_instruction_update_pfp(const mesh_fem &mf_, pfem_precomp &pfp_,
210  const fem_interpolation_context &ctx_,
211  fem_precomp_pool &fp_pool_)
212  : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
213  };
214 
215  struct ga_instruction_first_ind_tensor : public ga_instruction {
216  base_tensor &t;
217  const fem_interpolation_context &ctx;
218  size_type qdim;
219  const mesh_fem *mfn, **mfg;
220 
221  virtual int exec() {
222  GA_DEBUG_INFO("Instruction: adapt first index of tensor");
223  const mesh_fem &mf = *(mfg ? *mfg : mfn);
224  GA_DEBUG_ASSERT(mfg ? *mfg : mfn, "Internal error");
225  size_type cv_1 = ctx.is_convex_num_valid()
226  ? ctx.convex_num() : mf.convex_index().first_true();
227  pfem pf = mf.fem_of_element(cv_1);
228  GMM_ASSERT1(pf, "An element without finite element method defined");
229  size_type Qmult = qdim / pf->target_dim();
230  size_type s = pf->nb_dof(cv_1) * Qmult;
231  if (t.sizes()[0] != s)
232  { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
233  return 0;
234  }
235 
236  ga_instruction_first_ind_tensor(base_tensor &t_,
237  const fem_interpolation_context &ctx_,
238  size_type qdim_, const mesh_fem *mfn_,
239  const mesh_fem **mfg_)
240  : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
241  };
242 
243  struct ga_instruction_second_ind_tensor
244  : public ga_instruction_first_ind_tensor {
245 
246  virtual int exec() {
247  GA_DEBUG_INFO("Instruction: adapt second index of tensor");
248  const mesh_fem &mf = *(mfg ? *mfg : mfn);
249  size_type cv_1 = ctx.is_convex_num_valid()
250  ? ctx.convex_num() : mf.convex_index().first_true();
251  pfem pf = mf.fem_of_element(cv_1);
252  GMM_ASSERT1(pf, "An element without finite element methode defined");
253  size_type Qmult = qdim / pf->target_dim();
254  size_type s = pf->nb_dof(cv_1) * Qmult;
255  if (t.sizes()[1] != s)
256  { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
257  return 0;
258  }
259 
260  ga_instruction_second_ind_tensor(base_tensor &t_,
261  fem_interpolation_context &ctx_,
262  size_type qdim_, const mesh_fem *mfn_,
263  const mesh_fem **mfg_)
264  : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
265 
266  };
267 
268  struct ga_instruction_two_first_ind_tensor : public ga_instruction {
269  base_tensor &t;
270  const fem_interpolation_context &ctx1, &ctx2;
271  size_type qdim1;
272  const mesh_fem *mfn1, **mfg1;
273  size_type qdim2;
274  const mesh_fem *mfn2, **mfg2;
275 
276  virtual int exec() {
277  GA_DEBUG_INFO("Instruction: adapt two first indices of tensor");
278  const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
279  const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
280  size_type cv_1 = ctx1.is_convex_num_valid()
281  ? ctx1.convex_num() : mf1.convex_index().first_true();
282  size_type cv_2 = ctx2.is_convex_num_valid()
283  ? ctx2.convex_num() : mf2.convex_index().first_true();
284  pfem pf1 = mf1.fem_of_element(cv_1);
285  GMM_ASSERT1(pf1, "An element without finite element method defined");
286  pfem pf2 = mf2.fem_of_element(cv_2);
287  GMM_ASSERT1(pf2, "An element without finite element method defined");
288  size_type Qmult1 = qdim1 / pf1->target_dim();
289  size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
290  size_type Qmult2 = qdim2 / pf2->target_dim();
291  size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
292  GMM_ASSERT1(s1 > 0 && s2 >0, "Element without degrees of freedom");
293  if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
294  bgeot::multi_index mi = t.sizes();
295  mi[0] = s1; mi[1] = s2;
296  t.adjust_sizes(mi);
297  }
298  return 0;
299  }
300 
301  ga_instruction_two_first_ind_tensor
302  (base_tensor &t_, const fem_interpolation_context &ctx1_,
303  const fem_interpolation_context &ctx2_,
304  size_type qdim1_, const mesh_fem *mfn1_, const mesh_fem **mfg1_,
305  size_type qdim2_, const mesh_fem *mfn2_, const mesh_fem **mfg2_)
306  : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
307  mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
308  };
309 
310 
311  struct ga_instruction_X_component : public ga_instruction {
312  scalar_type &t;
313  const fem_interpolation_context &ctx;
314  size_type n;
315 
316  virtual int exec() {
317  GA_DEBUG_INFO("Instruction: X component");
318  t = ctx.xreal()[n];
319  return 0;
320  }
321 
322  ga_instruction_X_component
323  (scalar_type &t_, const fem_interpolation_context &ctx_, size_type n_)
324  : t(t_), ctx(ctx_), n(n_) {}
325  };
326 
327  struct ga_instruction_X : public ga_instruction {
328  base_tensor &t;
329  const fem_interpolation_context &ctx;
330 
331  virtual int exec() {
332  GA_DEBUG_INFO("Instruction: X");
333  GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(), "dimensions mismatch");
334  gmm::copy(ctx.xreal(), t.as_vector());
335  return 0;
336  }
337 
338  ga_instruction_X(base_tensor &t_, const fem_interpolation_context &ctx_)
339  : t(t_), ctx(ctx_) {}
340  };
341 
342  struct ga_instruction_copy_small_vect : public ga_instruction {
343  base_tensor &t;
344  const base_small_vector &vec;
345 
346  virtual int exec() {
347  GA_DEBUG_INFO("Instruction: copy small vector");
348  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
349  gmm::copy(vec, t.as_vector());
350  return 0;
351  }
352  ga_instruction_copy_small_vect(base_tensor &t_,
353  const base_small_vector &vec_)
354  : t(t_), vec(vec_) {}
355  };
356 
357  struct ga_instruction_copy_Normal : public ga_instruction_copy_small_vect {
358 
359  virtual int exec() {
360  GA_DEBUG_INFO("Instruction: unit normal vector");
361  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
362  "vector. Possible reasons: not on boundary or "
363  "transformation failed.");
364  gmm::copy(vec, t.as_vector());
365  return 0;
366  }
367  ga_instruction_copy_Normal(base_tensor &t_,
368  const base_small_vector &Normal_)
369  : ga_instruction_copy_small_vect(t_, Normal_) {}
370  };
371 
372  struct ga_instruction_level_set_normal_vector : public ga_instruction {
373  base_tensor &t;
374  const mesh_im_level_set *mimls;
375  const fem_interpolation_context &ctx;
376  base_small_vector vec;
377 
378  virtual int exec() {
379  GA_DEBUG_INFO("Instruction: unit normal vector to a level-set");
380  mimls->compute_normal_vector(ctx, vec);
381  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
382  "vector. Possible reasons: not on boundary or "
383  "transformation failed.");
384  gmm::copy(vec, t.as_vector());
385  return 0;
386  }
387  ga_instruction_level_set_normal_vector
388  (base_tensor &t_, const mesh_im_level_set *mimls_,
389  const fem_interpolation_context &ctx_)
390  : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
391  };
392 
393  struct ga_instruction_element_size : public ga_instruction {
394  base_tensor &t;
395  scalar_type &es;
396 
397  virtual int exec() {
398  GA_DEBUG_INFO("Instruction: element_size");
399  GMM_ASSERT1(t.size() == 1, "Invalid element size.");
400  t[0] = es;
401  return 0;
402  }
403  ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
404  : t(t_), es(es_) {}
405  };
406 
407  struct ga_instruction_element_K : public ga_instruction {
408  base_tensor &t;
409  const fem_interpolation_context &ctx;
410 
411  virtual int exec() {
412  GA_DEBUG_INFO("Instruction: element_K");
413  GMM_ASSERT1(t.size() == (ctx.K()).size(), "Invalid tensor size.");
414  gmm::copy(ctx.K().as_vector(), t.as_vector());
415  return 0;
416  }
417  ga_instruction_element_K(base_tensor &t_,
418  const fem_interpolation_context &ct)
419  : t(t_), ctx(ct) {}
420  };
421 
422  struct ga_instruction_element_B : public ga_instruction {
423  base_tensor &t;
424  const fem_interpolation_context &ctx;
425 
426  virtual int exec() {
427  GA_DEBUG_INFO("Instruction: element_B");
428  GMM_ASSERT1(t.size() == (ctx.B()).size(), "Invalid tensor size.");
429  gmm::copy(ctx.B().as_vector(), t.as_vector());
430  return 0;
431  }
432  ga_instruction_element_B(base_tensor &t_,
433  const fem_interpolation_context &ct)
434  : t(t_), ctx(ct) {}
435  };
436 
437  struct ga_instruction_val_base : public ga_instruction {
438  base_tensor &t;
439  fem_interpolation_context &ctx;
440  const mesh_fem &mf;
441  const pfem_precomp &pfp;
442 
443  virtual int exec() { // --> t(ndof,target_dim)
444  GA_DEBUG_INFO("Instruction: compute value of base functions");
445  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
446  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
447  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
448  // ctx.base_value(t);
449  if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
450  else {
451  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
452  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
453  ctx.base_value(t);
454  }
455  return 0;
456  }
457 
458  ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
459  const mesh_fem &mf_, const pfem_precomp &pfp_)
460  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
461  };
462 
463  struct ga_instruction_xfem_plus_val_base : public ga_instruction {
464  base_tensor &t;
465  fem_interpolation_context &ctx;
466  const mesh_fem &mf;
467  pfem_precomp &pfp;
468 
469  virtual int exec() { // --> t(ndof,target_dim)
470  GA_DEBUG_INFO("Instruction: compute value of base functions");
471  if (ctx.have_pgp()) ctx.set_pfp(pfp);
472  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
473  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
474  int old_xfem_side = ctx.xfem_side();
475  ctx.set_xfem_side(1);
476  ctx.base_value(t);
477  ctx.set_xfem_side(old_xfem_side);
478  return 0;
479  }
480 
481  ga_instruction_xfem_plus_val_base(base_tensor &tt,
482  fem_interpolation_context &ct,
483  const mesh_fem &mf_, pfem_precomp &pfp_)
484  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
485  };
486 
487  struct ga_instruction_xfem_minus_val_base : public ga_instruction {
488  base_tensor &t;
489  fem_interpolation_context &ctx;
490  const mesh_fem &mf;
491  pfem_precomp &pfp;
492 
493  virtual int exec() { // --> t(ndof,target_dim)
494  GA_DEBUG_INFO("Instruction: compute value of base functions");
495  if (ctx.have_pgp()) ctx.set_pfp(pfp);
496  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
497  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
498  int old_xfem_side = ctx.xfem_side();
499  ctx.set_xfem_side(-1);
500  ctx.base_value(t);
501  ctx.set_xfem_side(old_xfem_side);
502  return 0;
503  }
504 
505  ga_instruction_xfem_minus_val_base
506  (base_tensor &tt, fem_interpolation_context &ct,
507  const mesh_fem &mf_, pfem_precomp &pfp_)
508  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
509  };
510 
511  struct ga_instruction_grad_base : public ga_instruction_val_base {
512 
513  virtual int exec() { // --> t(ndof,target_dim,N)
514  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
515  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
516  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
517  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
518  // ctx.grad_base_value(t);
519  if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
520  else {
521  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
522  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
523  ctx.grad_base_value(t);
524  }
525  return 0;
526  }
527 
528  ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
529  const mesh_fem &mf_, pfem_precomp &pfp_)
530  : ga_instruction_val_base(tt, ct, mf_, pfp_)
531  {}
532  };
533 
534  struct ga_instruction_xfem_plus_grad_base : public ga_instruction_val_base {
535 
536  virtual int exec() { // --> t(ndof,target_dim,N)
537  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
538  if (ctx.have_pgp()) ctx.set_pfp(pfp);
539  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
540  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
541  int old_xfem_side = ctx.xfem_side();
542  ctx.set_xfem_side(1);
543  ctx.grad_base_value(t);
544  ctx.set_xfem_side(old_xfem_side);
545  return 0;
546  }
547 
548  ga_instruction_xfem_plus_grad_base
549  (base_tensor &tt, fem_interpolation_context &ct,
550  const mesh_fem &mf_, pfem_precomp &pfp_)
551  : ga_instruction_val_base(tt, ct, mf_, pfp_)
552  {}
553  };
554 
555  struct ga_instruction_xfem_minus_grad_base : public ga_instruction_val_base {
556 
557  virtual int exec() { // --> t(ndof,target_dim,N)
558  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
559  if (ctx.have_pgp()) ctx.set_pfp(pfp);
560  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
561  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
562  int old_xfem_side = ctx.xfem_side();
563  ctx.set_xfem_side(-1);
564  ctx.grad_base_value(t);
565  ctx.set_xfem_side(old_xfem_side);
566  return 0;
567  }
568 
569  ga_instruction_xfem_minus_grad_base
570  (base_tensor &tt, fem_interpolation_context &ct,
571  const mesh_fem &mf_, pfem_precomp &pfp_)
572  : ga_instruction_val_base(tt, ct, mf_, pfp_)
573  {}
574  };
575 
576 
577  struct ga_instruction_hess_base : public ga_instruction_val_base {
578 
579  virtual int exec() { // --> t(ndof,target_dim,N*N)
580  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
581  if (ctx.have_pgp()) ctx.set_pfp(pfp);
582  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
583  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
584  ctx.hess_base_value(t);
585  return 0;
586  }
587 
588  ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
589  const mesh_fem &mf_, pfem_precomp &pfp_)
590  : ga_instruction_val_base(tt, ct, mf_, pfp_)
591  {}
592  };
593 
594  struct ga_instruction_xfem_plus_hess_base : public ga_instruction_val_base {
595 
596  virtual int exec() { // --> t(ndof,target_dim,N*N)
597  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
598  if (ctx.have_pgp()) ctx.set_pfp(pfp);
599  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
600  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
601  int old_xfem_side = ctx.xfem_side();
602  ctx.set_xfem_side(1);
603  ctx.hess_base_value(t);
604  ctx.set_xfem_side(old_xfem_side);
605  return 0;
606  }
607 
608  ga_instruction_xfem_plus_hess_base
609  (base_tensor &tt, fem_interpolation_context &ct,
610  const mesh_fem &mf_, pfem_precomp &pfp_)
611  : ga_instruction_val_base(tt, ct, mf_, pfp_)
612  {}
613  };
614 
615  struct ga_instruction_xfem_minus_hess_base : public ga_instruction_val_base {
616 
617  virtual int exec() { // --> t(ndof,target_dim,N*N)
618  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
619  if (ctx.have_pgp()) ctx.set_pfp(pfp);
620  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
621  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
622  int old_xfem_side = ctx.xfem_side();
623  ctx.set_xfem_side(-1);
624  ctx.hess_base_value(t);
625  ctx.set_xfem_side(old_xfem_side);
626  return 0;
627  }
628 
629  ga_instruction_xfem_minus_hess_base
630  (base_tensor &tt, fem_interpolation_context &ct,
631  const mesh_fem &mf_, pfem_precomp &pfp_)
632  : ga_instruction_val_base(tt, ct, mf_, pfp_)
633  {}
634  };
635 
636  struct ga_instruction_val : public ga_instruction {
637  scalar_type &a;
638  base_tensor &t;
639  const base_tensor &Z;
640  const base_vector &coeff;
641  size_type qdim;
642  // Z(ndof,target_dim), coeff(Qmult,ndof) --> t(target_dim*Qmult)
643  virtual int exec() {
644  GA_DEBUG_INFO("Instruction: variable value");
645  size_type ndof = Z.sizes()[0];
646  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
647  GA_DEBUG_ASSERT(t.size() == qdim, "dimensions mismatch");
648 
649  if (qdim == 1) {
650  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
651  "Wrong size for coeff vector");
652  auto itc = coeff.begin(); auto itZ = Z.begin();
653  a = (*itc++) * (*itZ++);
654  while (itc != coeff.end()) a += (*itc++) * (*itZ++);
655  } else {
656  size_type target_dim = Z.sizes()[1];
657  if (target_dim == 1) {
658  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
659  "Wrong size for coeff vector");
660  auto itc = coeff.begin(); auto itZ = Z.begin();
661  for (auto it = t.begin(); it != t.end(); ++it)
662  *it = (*itc++) * (*itZ);
663  ++itZ;
664  for (size_type j = 1; j < ndof; ++j, ++itZ) {
665  for (auto it = t.begin(); it != t.end(); ++it)
666  *it += (*itc++) * (*itZ);
667  }
668  } else {
669  size_type Qmult = qdim / target_dim;
670  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
671  "Wrong size for coeff vector");
672 
673  gmm::clear(t.as_vector());
674  auto itc = coeff.begin();
675  for (size_type j = 0; j < ndof; ++j) {
676  auto it = t.begin();
677  for (size_type q = 0; q < Qmult; ++q, ++itc) {
678  for (size_type r = 0; r < target_dim; ++r)
679  *it++ += (*itc) * Z[j + r*ndof];
680  }
681  }
682  }
683  }
684  return 0;
685  }
686 
687  ga_instruction_val(base_tensor &tt, const base_tensor &Z_,
688  const base_vector &co, size_type q)
689  : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
690  };
691 
692  struct ga_instruction_grad : public ga_instruction_val {
693  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N)
694  virtual int exec() {
695  GA_DEBUG_INFO("Instruction: gradient");
696  size_type ndof = Z.sizes()[0];
697  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
698  size_type N = Z.sizes()[2];
699  if (qdim == 1) {
700  GA_DEBUG_ASSERT(t.size() == N, "dimensions mismatch");
701  GA_DEBUG_ASSERT(coeff.size() == ndof, "Wrong size for coeff vector");
702  auto itZ = Z.begin();
703  for (auto it = t.begin(); it != t.end(); ++it) {
704  auto itc = coeff.begin();
705  *it = (*itc++) * (*itZ++);
706  while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
707  }
708  } else {
709  size_type target_dim = Z.sizes()[1];
710  if (target_dim == 1) {
711  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
712  GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
713  "Wrong size for coeff vector");
714  for (size_type q = 0; q < qdim; ++q) {
715  auto itZ = Z.begin(); auto it = t.begin() + q;
716  for (size_type k = 0; k < N; ++k) {
717  if (k) it += qdim;
718  auto itc = coeff.begin() + q;
719  *it = (*itc) * (*itZ++);
720  for (size_type j = 1; j < ndof; ++j)
721  { itc += qdim; *it += (*itc) * (*itZ++); }
722  }
723  }
724  } else {
725  size_type Qmult = qdim / target_dim;
726  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
727  GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
728  "Wrong size for coeff vector");
729  gmm::clear(t.as_vector());
730  for (size_type q = 0; q < Qmult; ++q) {
731  auto itZ = Z.begin();
732  for (size_type k = 0; k < N; ++k)
733  for (size_type r = 0; r < target_dim; ++r)
734  for (size_type j = 0; j < ndof; ++j)
735  t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
736  }
737  }
738  }
739  return 0;
740  }
741 
742  ga_instruction_grad(base_tensor &tt, const base_tensor &Z_,
743  const base_vector &co, size_type q)
744  : ga_instruction_val(tt, Z_, co, q)
745  {}
746 
747  };
748 
749  struct ga_instruction_hess : public ga_instruction_val {
750  // Z(ndof,target_dim,N*N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N,N)
751  virtual int exec() {
752  GA_DEBUG_INFO("Instruction: Hessian");
753  size_type ndof = Z.sizes()[0];
754  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
755  size_type NN = gmm::sqr(t.sizes().back());
756  GA_DEBUG_ASSERT(NN == Z.sizes()[2], "Internal error");
757  if (qdim == 1) {
758  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
759  "Wrong size for coeff vector");
760  auto it = Z.begin(); auto itt = t.begin();
761  for (size_type kl = 0; kl < NN; ++kl, ++itt) {
762  *itt = scalar_type(0);
763  for (auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
764  *itt += (*itc) * (*it);
765  }
766  GMM_ASSERT1(itt == t.end(), "dimensions mismatch");
767  } else {
768  size_type target_dim = Z.sizes()[1];
769  if (target_dim == 1) {
770  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
771  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
772  "Wrong size for coeff vector");
773  gmm::clear(t.as_vector());
774  for (size_type q = 0; q < qdim; ++q) {
775  base_tensor::const_iterator it = Z.begin();
776  for (size_type kl = 0; kl < NN; ++kl)
777  for (size_type j = 0; j < ndof; ++j, ++it)
778  t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
779  }
780  } else {
781  size_type Qmult = qdim / target_dim;
782  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
783  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
784  "Wrong size for coeff vector");
785  gmm::clear(t.as_vector());
786  for (size_type q = 0; q < Qmult; ++q) {
787  base_tensor::const_iterator it = Z.begin();
788  for (size_type kl = 0; kl < NN; ++kl)
789  for (size_type r = 0; r < target_dim; ++r)
790  for (size_type j = 0; j < ndof; ++j, ++it)
791  t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
792  }
793  }
794  }
795  return 0;
796  }
797 
798  ga_instruction_hess(base_tensor &tt, const base_tensor &Z_,
799  const base_vector &co, size_type q)
800  : ga_instruction_val(tt, Z_, co, q)
801  {}
802  };
803 
804  struct ga_instruction_diverg : public ga_instruction_val {
805  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(1)
806  virtual int exec() {
807  GA_DEBUG_INFO("Instruction: divergence");
808  size_type ndof = Z.sizes()[0];
809  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
810  size_type target_dim = Z.sizes()[1];
811  size_type N = Z.sizes()[2];
812  size_type Qmult = qdim / target_dim;
813  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
814  "Dimensions mismatch for divergence operator");
815  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
816  "Wrong size for coeff vector");
817 
818  t[0] = scalar_type(0);
819  base_tensor::const_iterator it = Z.begin();
820  if (Qmult == 1)
821  for (size_type k = 0; k < N; ++k) {
822  if (k) it += (N*ndof + 1);
823  for (size_type j = 0; j < ndof; ++j) {
824  if (j) ++it;
825  t[0] += coeff[j] * (*it);
826  }
827  }
828  else // if (target_dim() == 1)
829  for (size_type k = 0; k < N; ++k) {
830  if (k) ++it;
831  for (size_type j = 0; j < ndof; ++j) {
832  if (j) ++it;
833  t[0] += coeff[j*N+k] * (*it);
834  }
835  }
836  return 0;
837  }
838 
839  ga_instruction_diverg(base_tensor &tt, const base_tensor &Z_,
840  const base_vector &co, size_type q)
841  : ga_instruction_val(tt, Z_, co, q)
842  {}
843  };
844 
845  struct ga_instruction_copy_val_base : public ga_instruction {
846  base_tensor &t;
847  const base_tensor &Z;
848  size_type qdim;
849  // Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
850  virtual int exec() {
851  GA_DEBUG_INFO("Instruction: value of test functions");
852  if (qdim == 1) {
853  GA_DEBUG_ASSERT(t.size() == Z.size(), "Wrong size for base vector");
854  std::copy(Z.begin(), Z.end(), t.begin());
855  } else {
856  size_type target_dim = Z.sizes()[1];
857  size_type Qmult = qdim / target_dim;
858  if (Qmult == 1) {
859  std::copy(Z.begin(), Z.end(), t.begin());
860  } else {
861  if (target_dim == 1) {
862  size_type ndof = Z.sizes()[0];
863  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
864  "Wrong size for base vector");
865  std::fill(t.begin(), t.end(), scalar_type(0));
866  auto itZ = Z.begin();
867  size_type s = t.sizes()[0], sss = s+1;
868 
869  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
870  auto it = t.begin();
871  for (size_type i = 0; i < ndof; ++i, ++itZ) {
872  if (i) it += Qmult;
873  auto it2 = it;
874  *it2 = *itZ;
875  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
876  }
877  } else {
878  size_type ndof = Z.sizes()[0];
879  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
880  "Wrong size for base vector");
881  std::fill(t.begin(), t.end(), scalar_type(0));
882  auto itZ = Z.begin();
883  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
884 
885  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
886  for (size_type k = 0; k < target_dim; ++k) {
887  auto it = t.begin() + (ss * k);
888  for (size_type i = 0; i < ndof; ++i, ++itZ) {
889  if (i) it += Qmult;
890  auto it2 = it;
891  *it2 = *itZ;
892  for (size_type j = 1; j < Qmult; ++j)
893  { it2 += sss; *it2 = *itZ; }
894  }
895  }
896  }
897  }
898  }
899  return 0;
900  }
901 
902  ga_instruction_copy_val_base(base_tensor &tt, const base_tensor &Z_,
903  size_type q) : t(tt), Z(Z_), qdim(q) {}
904  };
905 
906  struct ga_instruction_copy_grad_base : public ga_instruction_copy_val_base {
907  // Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
908  virtual int exec() {
909  GA_DEBUG_INFO("Instruction: gradient of test functions");
910  if (qdim == 1) {
911  std::copy(Z.begin(), Z.end(), t.begin());
912  } else {
913  size_type target_dim = Z.sizes()[1];
914  size_type Qmult = qdim / target_dim;
915  if (Qmult == 1) {
916  std::copy(Z.begin(), Z.end(), t.begin());
917  } else {
918  if (target_dim == 1) {
919  size_type ndof = Z.sizes()[0];
920  size_type N = Z.sizes()[2];
921  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
922  "Wrong size for gradient vector");
923  std::fill(t.begin(), t.end(), scalar_type(0));
924  base_tensor::const_iterator itZ = Z.begin();
925  size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
926 
927  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
928  for (size_type l = 0; l < N; ++l) {
929  base_tensor::iterator it = t.begin() + (ssss*l);
930  for (size_type i = 0; i < ndof; ++i, ++itZ) {
931  if (i) it += Qmult;
932  base_tensor::iterator it2 = it;
933  *it2 = *itZ;
934  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
935  }
936  }
937  } else {
938  size_type ndof = Z.sizes()[0];
939  size_type N = Z.sizes()[2];
940  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
941  "Wrong size for gradient vector");
942  std::fill(t.begin(), t.end(), scalar_type(0));
943  base_tensor::const_iterator itZ = Z.begin();
944  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
945  size_type ssss = ss*target_dim;
946 
947  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
948  for (size_type l = 0; l < N; ++l)
949  for (size_type k = 0; k < target_dim; ++k) {
950  base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
951  for (size_type i = 0; i < ndof; ++i, ++itZ) {
952  if (i) it += Qmult;
953  base_tensor::iterator it2 = it;
954  *it2 = *itZ;
955  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
956  }
957  }
958  }
959  }
960  }
961  return 0;
962  }
963 
964  ga_instruction_copy_grad_base(base_tensor &tt, const base_tensor &Z_,
965  size_type q)
966  : ga_instruction_copy_val_base(tt,Z_,q) {}
967  };
968 
969  struct ga_instruction_copy_vect_val_base : public ga_instruction {
970  base_tensor &t;
971  const base_tensor &Z;
972  size_type qdim;
973  // Z(ndof) --> t(qdim*ndof,qdim*target_dim)
974  virtual int exec() {
975  GA_DEBUG_INFO("Instruction: vectorized value of test functions");
976 
977  size_type ndof = Z.sizes()[0];
978  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
979  "Wrong size for base vector");
980  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
981  auto itZ = Z.begin();
982  size_type s = t.sizes()[0], sss = s+1;
983 
984  // Performs t(i*qdim+j, k*qdim + j) = Z(i,k);
985  auto it = t.begin();
986  for (size_type i = 0; i < ndof; ++i, ++itZ) {
987  if (i) it += qdim;
988  auto it2 = it;
989  *it2 = *itZ;
990  for (size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
991  }
992  return 0;
993  }
994 
995  ga_instruction_copy_vect_val_base(base_tensor &tt, const base_tensor &Z_,
996  size_type q) : t(tt), Z(Z_), qdim(q) {}
997  };
998 
999  struct ga_instruction_copy_vect_grad_base
1000  : public ga_instruction_copy_vect_val_base {
1001  // Z(ndof,N) --> t(qdim*ndof,qdim,N)
1002  virtual int exec() {
1003  GA_DEBUG_INFO("Instruction: vectorized gradient of test functions");
1004  size_type ndof = Z.sizes()[0];
1005  size_type N = Z.sizes()[2];
1006  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1007  "Wrong size for gradient vector");
1008  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
1009  base_tensor::const_iterator itZ = Z.begin();
1010  size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1011 
1012  // Performs t(i*qdim+j, k*qdim + j, l) = Z(i,k,l);
1013  for (size_type l = 0; l < N; ++l) {
1014  base_tensor::iterator it = t.begin() + (ssss*l);
1015  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1016  if (i) it += qdim;
1017  base_tensor::iterator it2 = it;
1018  *it2 = *itZ;
1019  for (size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1020  }
1021  }
1022  return 0;
1023  }
1024 
1025  ga_instruction_copy_vect_grad_base(base_tensor &tt, const base_tensor &Z_,
1026  size_type q)
1027  : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1028  };
1029 
1030  struct ga_instruction_copy_hess_base : public ga_instruction_copy_val_base {
1031  // Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1032  virtual int exec() {
1033  GA_DEBUG_INFO("Instruction: Hessian of test functions");
1034  size_type target_dim = Z.sizes()[1];
1035  size_type Qmult = qdim / target_dim;
1036  if (Qmult == 1) {
1037  gmm::copy(Z.as_vector(), t.as_vector());
1038  } else {
1039  size_type ndof = Z.sizes()[0];
1040  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1041  "Wrong size for Hessian vector");
1042  gmm::clear(t.as_vector());
1043  base_tensor::const_iterator itZ = Z.begin();
1044  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1045 
1046  // Performs t(i*Qmult+j, k*Qmult + j, l, m) = Z(i,k,l*N+m)
1047  size_type NNdim = Z.sizes()[2]*target_dim;
1048  for (size_type klm = 0; klm < NNdim; ++klm) {
1049  base_tensor::iterator it = t.begin() + (ss * klm);
1050  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1051  if (i) it += Qmult;
1052  base_tensor::iterator it2 = it;
1053  *it2 = *itZ;
1054  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1055  }
1056  }
1057  }
1058  return 0;
1059  }
1060 
1061  ga_instruction_copy_hess_base(base_tensor &tt, const base_tensor &Z_,
1062  size_type q)
1063  : ga_instruction_copy_val_base(tt, Z_, q) {}
1064  };
1065 
1066  struct ga_instruction_copy_diverg_base : public ga_instruction_copy_val_base {
1067  // Z(ndof,target_dim,N) --> t(Qmult*ndof)
1068  virtual int exec() {
1069  GA_DEBUG_INFO("Instruction: divergence of test functions");
1070  size_type ndof = Z.sizes()[0];
1071  size_type target_dim = Z.sizes()[1];
1072  size_type N = Z.sizes()[2];
1073  size_type Qmult = qdim / target_dim;
1074  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1075  "Dimensions mismatch for divergence operator");
1076  GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1077  "Wrong size for divergence vector");
1078  gmm::clear(t.as_vector());
1079  base_tensor::const_iterator itZ = Z.begin();
1080  if (Qmult == 1) { // target_dim == N
1081  // Performs t(i) = Trace(Z(i,:,:))
1082  for (size_type l = 0; l < N; ++l) {
1083  base_tensor::iterator it = t.begin();
1084  if (l) itZ += target_dim*ndof+1;
1085  for (size_type i = 0; i < ndof; ++i) {
1086  if (i) { ++it; ++itZ; }
1087  *it += *itZ;
1088  }
1089  }
1090  } else { // Qmult == N
1091  // Performs t(i*Qmult+j) = Z(i,1,j)
1092  for (size_type j = 0; j < N; ++j) {
1093  base_tensor::iterator it = t.begin() + j;
1094  if (j) ++itZ;
1095  for (size_type i = 0; i < ndof; ++i) {
1096  if (i) { it += Qmult; ++itZ; }
1097  *it += *itZ;
1098  }
1099  }
1100  }
1101  return 0;
1102  }
1103 
1104  ga_instruction_copy_diverg_base(base_tensor &tt, const base_tensor &Z_,
1105  size_type q)
1106  : ga_instruction_copy_val_base(tt, Z_, q) {}
1107  };
1108 
1109  struct ga_instruction_elementary_trans {
1110  const base_vector &coeff_in;
1111  base_vector coeff_out;
1112  pelementary_transformation elemtrans;
1113  const mesh_fem &mf1, &mf2;
1114  const fem_interpolation_context &ctx;
1115  base_matrix &M;
1116  size_type &icv;
1117 
1118  void do_transformation(size_type n, size_type m) {
1119  if (icv != ctx.convex_num() || M.size() == 0) {
1120  M.base_resize(m, n);
1121  icv = ctx.convex_num();
1122  elemtrans->give_transformation(mf1, mf2, icv, M);
1123  }
1124  coeff_out.resize(gmm::mat_nrows(M));
1125  gmm::mult(M, coeff_in, coeff_out); // remember: coeff == coeff_out
1126  }
1127 
1128  ga_instruction_elementary_trans
1129  (const base_vector &co, pelementary_transformation e,
1130  const mesh_fem &mf1_, const mesh_fem &mf2_,
1131  const fem_interpolation_context &ctx_, base_matrix &M_,
1132  size_type &icv_)
1133  : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1134  M(M_), icv(icv_) {}
1135  ~ga_instruction_elementary_trans() {};
1136  };
1137 
1138  struct ga_instruction_elementary_trans_val
1139  : public ga_instruction_val, ga_instruction_elementary_trans {
1140  // Z(ndof,target_dim), coeff_in(Qmult,ndof) --> t(target_dim*Qmult)
1141  virtual int exec() {
1142  GA_DEBUG_INFO("Instruction: variable value with elementary "
1143  "transformation");
1144  size_type ndof = Z.sizes()[0];
1145  size_type Qmult = qdim / Z.sizes()[1];
1146  do_transformation(coeff_in.size(), ndof*Qmult);
1147  return ga_instruction_val::exec();
1148  }
1149 
1150  ga_instruction_elementary_trans_val
1151  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1152  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1153  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1154  : ga_instruction_val(tt, Z_, coeff_out, q),
1155  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1156  };
1157 
1158  struct ga_instruction_elementary_trans_grad
1159  : public ga_instruction_grad, ga_instruction_elementary_trans {
1160  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N)
1161  virtual int exec() {
1162  GA_DEBUG_INFO("Instruction: gradient with elementary transformation");
1163  size_type ndof = Z.sizes()[0];
1164  size_type Qmult = qdim / Z.sizes()[1];
1165  do_transformation(coeff_in.size(), ndof*Qmult);
1166  return ga_instruction_grad::exec();
1167  }
1168 
1169  ga_instruction_elementary_trans_grad
1170  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1171  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1172  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1173  : ga_instruction_grad(tt, Z_, coeff_out, q),
1174  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1175  };
1176 
1177  struct ga_instruction_elementary_trans_hess
1178  : public ga_instruction_hess, ga_instruction_elementary_trans {
1179  // Z(ndof,target_dim,N,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N,N)
1180  virtual int exec() {
1181  GA_DEBUG_INFO("Instruction: Hessian with elementary transformation");
1182  size_type ndof = Z.sizes()[0];
1183  size_type Qmult = qdim / Z.sizes()[1];
1184  do_transformation(coeff_in.size(), ndof*Qmult);
1185  return ga_instruction_hess::exec();
1186  }
1187 
1188  ga_instruction_elementary_trans_hess
1189  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1190  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1191  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1192  : ga_instruction_hess(tt, Z_, coeff_out, q),
1193  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1194  };
1195 
1196  struct ga_instruction_elementary_trans_diverg
1197  : public ga_instruction_diverg, ga_instruction_elementary_trans {
1198  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(1)
1199  virtual int exec() {
1200  GA_DEBUG_INFO("Instruction: divergence with elementary transformation");
1201  size_type ndof = Z.sizes()[0];
1202  size_type Qmult = qdim / Z.sizes()[1];
1203  do_transformation(coeff_in.size(), ndof*Qmult);
1204  return ga_instruction_diverg::exec();
1205  }
1206 
1207  ga_instruction_elementary_trans_diverg
1208  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1209  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1210  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1211  : ga_instruction_diverg(tt, Z_, coeff_out, q),
1212  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1213  };
1214 
1215  struct ga_instruction_update_group_info : public ga_instruction {
1216  const ga_workspace &workspace;
1217  const ga_instruction_set &gis;
1218  const ga_instruction_set::interpolate_info &inin;
1219  const std::string gname;
1220  ga_instruction_set::variable_group_info &vgi;
1221 
1222  virtual int exec() {
1223  GA_DEBUG_INFO("Instruction: Update group info for "+gname);
1224  if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1225  return 0;
1226 
1227  vgi.cached_mesh = inin.m;
1228  const std::string &varname
1229  = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1230  : workspace.first_variable_of_group(gname);
1231  vgi.varname = &varname;
1232  vgi.mf = workspace.associated_mf(varname);
1233  GA_DEBUG_ASSERT(vgi.mf, "Group variable should always have a mesh_fem");
1234  vgi.reduced_mf = vgi.mf->is_reduced();
1235  if (vgi.reduced_mf) {
1236  const auto it = gis.really_extended_vars.find(varname);
1237  GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1238  "Variable " << varname << " not in extended variables");
1239  vgi.U = &(it->second);
1240  vgi.I = &(workspace.temporary_interval_of_variable(varname));
1241  } else {
1242  vgi.U = &(workspace.value(varname));
1243  vgi.I = &(workspace.interval_of_variable(varname));
1244  }
1245  vgi.alpha = workspace.factor_of_variable(varname);
1246  return 0;
1247  }
1248 
1249  ga_instruction_update_group_info
1250  (const ga_workspace &workspace_, const ga_instruction_set &gis_,
1251  const ga_instruction_set::interpolate_info &inin_,
1252  const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1253  : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1254  {}
1255  };
1256 
1257  struct ga_instruction_interpolate_filter : public ga_instruction {
1258  base_tensor &t;
1259  const ga_instruction_set::interpolate_info &inin;
1260  const size_type pt_type;
1261  const int nb;
1262 
1263  virtual int exec() {
1264  GA_DEBUG_INFO("Instruction: interpolated filter");
1265  if ((pt_type == size_type(-1) && inin.pt_type) ||
1266  (pt_type != size_type(-1) && inin.pt_type == pt_type)) {
1267  GA_DEBUG_INFO("Instruction: interpolated filter: pass");
1268  return 0;
1269  }
1270  else {
1271  GA_DEBUG_INFO("Instruction: interpolated filter: filtered");
1272  gmm::clear(t.as_vector());
1273  return nb;
1274  }
1275  return 0;
1276  }
1277 
1278  ga_instruction_interpolate_filter
1279  (base_tensor &t_, const ga_instruction_set::interpolate_info &inin_,
1280  size_type ind_, int nb_)
1281  : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1282  };
1283 
1284  struct ga_instruction_copy_interpolated_small_vect : public ga_instruction {
1285  base_tensor &t;
1286  const base_small_vector &vec;
1287  const ga_instruction_set::interpolate_info &inin;
1288 
1289  virtual int exec() {
1290  GA_DEBUG_INFO("Instruction: copy small vector");
1291  GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1292  "Invalid element, probably transformation failed");
1293  GMM_ASSERT1(t.size() == vec.size(),
1294  "Invalid vector size: " << t.size() << "!=" << vec.size());
1295  gmm::copy(vec, t.as_vector());
1296  return 0;
1297  }
1298  ga_instruction_copy_interpolated_small_vect
1299  (base_tensor &t_, const base_small_vector &vec_,
1300  const ga_instruction_set::interpolate_info &inin_)
1301  : t(t_), vec(vec_), inin(inin_) {}
1302  };
1303 
1304  struct ga_instruction_interpolate : public ga_instruction {
1305  base_tensor &t;
1306  const mesh **m;
1307  const mesh_fem *mfn, **mfg;
1308  const base_vector *Un, **Ug;
1309  fem_interpolation_context &ctx;
1310  base_vector coeff;
1311  size_type qdim;
1312  const size_type &ipt;
1313  fem_precomp_pool &fp_pool;
1314  ga_instruction_set::interpolate_info &inin;
1315 
1316  virtual int exec() {
1317  GMM_ASSERT1(ctx.is_convex_num_valid(), "No valid element for the "
1318  "transformation. Probably transformation failed");
1319  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1320  const base_vector &U = *(Ug ? *Ug : Un);
1321  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1322  "on another mesh than the one it is defined on");
1323  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(), coeff);
1324  pfem pf = mf.fem_of_element(ctx.convex_num());
1325  GMM_ASSERT1(pf, "Undefined finite element method");
1326  if (ctx.have_pgp()) {
1327  if (ipt == 0)
1328  inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1329  ctx.set_pfp(inin.pfps[&mf]);
1330  } else {
1331  ctx.set_pf(pf);
1332  }
1333  return 0;
1334  }
1335 
1336  ga_instruction_interpolate
1337  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1338  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1339  fem_interpolation_context &ctx_, size_type q, const size_type &ipt_,
1340  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1341  : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1342  ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1343  };
1344 
1345  struct ga_instruction_interpolate_val : public ga_instruction_interpolate {
1346  // --> t(target_dim*Qmult)
1347  virtual int exec() {
1348  GA_DEBUG_INFO("Instruction: interpolated variable value");
1349  ga_instruction_interpolate::exec();
1350  ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1351  // cout << "interpolate " << &U << " result : " << t.as_vector() << endl;
1352  return 0;
1353  }
1354 
1355  ga_instruction_interpolate_val
1356  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1357  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1358  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1359  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1360  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1361  fp_pool_, inin_)
1362  {}
1363  };
1364 
1365  struct ga_instruction_interpolate_grad : public ga_instruction_interpolate {
1366  // --> t(target_dim*Qmult,N)
1367  virtual int exec() {
1368  GA_DEBUG_INFO("Instruction: interpolated variable grad");
1369  ga_instruction_interpolate::exec();
1370  base_matrix v(qdim, ctx.N());
1371  ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1372  gmm::copy(v.as_vector(), t.as_vector());
1373  return 0;
1374  }
1375 
1376  ga_instruction_interpolate_grad
1377  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1378  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1379  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1380  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1381  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1382  fp_pool_, inin_)
1383  {}
1384  };
1385 
1386  struct ga_instruction_interpolate_hess : public ga_instruction_interpolate {
1387  // --> t(target_dim*Qmult,N,N)
1388  virtual int exec() {
1389  GA_DEBUG_INFO("Instruction: interpolated variable hessian");
1390  ga_instruction_interpolate::exec();
1391  base_matrix v(qdim, ctx.N()*ctx.N()); // To be optimized
1392  ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1393  gmm::copy(v.as_vector(), t.as_vector());
1394  return 0;
1395  }
1396 
1397  ga_instruction_interpolate_hess
1398  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1399  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1400  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1401  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1402  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1403  fp_pool_, inin_)
1404  {}
1405  };
1406 
1407  struct ga_instruction_interpolate_diverg : public ga_instruction_interpolate {
1408  // --> t(1)
1409  virtual int exec() {
1410  GA_DEBUG_INFO("Instruction: interpolated variable divergence");
1411  ga_instruction_interpolate::exec();
1412  ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1413  return 0;
1414  }
1415 
1416  ga_instruction_interpolate_diverg
1417  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1418  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1419  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1420  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1421  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1422  fp_pool_, inin_)
1423  {}
1424  };
1425 
1426  struct ga_instruction_interpolate_base {
1427  base_tensor ZZ;
1428  const mesh **m;
1429  const mesh_fem *mfn, **mfg;
1430  const size_type &ipt;
1431  ga_instruction_set::interpolate_info &inin;
1432  fem_precomp_pool &fp_pool;
1433 
1434  virtual int exec() {
1435  GMM_ASSERT1(inin.ctx.is_convex_num_valid(), "No valid element for "
1436  "the transformation. Probably transformation failed");
1437  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1438  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1439  "on another mesh than the one it is defined on");
1440 
1441  pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1442  GMM_ASSERT1(pf, "Undefined finite element method");
1443 
1444  if (inin.ctx.have_pgp()) {
1445  if (ipt == 0)
1446  inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1447  inin.ctx.set_pfp(inin.pfps[&mf]);
1448  } else {
1449  inin.ctx.set_pf(pf);
1450  }
1451  return 0;
1452  }
1453 
1454  ga_instruction_interpolate_base
1455  (const mesh **m_, const mesh_fem *mfn_, const mesh_fem **mfg_,
1456  const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1457  fem_precomp_pool &fp_pool_)
1458  : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1459  fp_pool(fp_pool_) {}
1460  };
1461 
1462  struct ga_instruction_interpolate_val_base
1463  : public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1464  // ctx --> Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
1465  virtual int exec() {
1466  GA_DEBUG_INFO("Instruction: interpolated base value");
1467  ga_instruction_interpolate_base::exec();
1468  inin.ctx.pf()->real_base_value(inin.ctx, ZZ); // remember Z == ZZ
1469  return ga_instruction_copy_val_base::exec();
1470  }
1471 
1472  ga_instruction_interpolate_val_base
1473  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1474  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1475  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1476  : ga_instruction_copy_val_base(t_, ZZ, q),
1477  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1478  inin_, fp_pool_) {}
1479  };
1480 
1481  struct ga_instruction_interpolate_grad_base
1482  : public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1483  // ctx --> Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
1484  virtual int exec() {
1485  GA_DEBUG_INFO("Instruction: interpolated base grad");
1486  ga_instruction_interpolate_base::exec();
1487  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1488  return ga_instruction_copy_grad_base::exec();
1489  }
1490 
1491  ga_instruction_interpolate_grad_base
1492  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1493  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1494  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1495  : ga_instruction_copy_grad_base(t_, ZZ, q),
1496  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1497  inin_, fp_pool_) {}
1498  };
1499 
1500  struct ga_instruction_interpolate_hess_base
1501  : public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1502  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1503  virtual int exec() {
1504  GA_DEBUG_INFO("Instruction: interpolated base hessian");
1505  ga_instruction_interpolate_base::exec();
1506  inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ); // remember Z == ZZ
1507  return ga_instruction_copy_hess_base::exec();
1508  }
1509 
1510  ga_instruction_interpolate_hess_base
1511  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1512  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1513  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1514  : ga_instruction_copy_hess_base(t_, ZZ, q),
1515  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1516  inin_, fp_pool_) {}
1517  };
1518 
1519  struct ga_instruction_interpolate_diverg_base
1520  : public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1521  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof)
1522  virtual int exec() {
1523  GA_DEBUG_INFO("Instruction: interpolated base divergence");
1524  ga_instruction_interpolate_base::exec();
1525  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1526  return ga_instruction_copy_diverg_base::exec();
1527  }
1528 
1529  ga_instruction_interpolate_diverg_base
1530  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1531  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1532  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1533  : ga_instruction_copy_diverg_base(t_, ZZ, q),
1534  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1535  inin_, fp_pool_) {}
1536  };
1537 
1538 
1539  struct ga_instruction_elementary_trans_base {
1540  base_tensor t_in;
1541  base_tensor &t_out;
1542  pelementary_transformation elemtrans;
1543  const mesh_fem &mf1, &mf2;
1544  const fem_interpolation_context &ctx;
1545  base_matrix &M;
1546  size_type &icv;
1547 
1548  void do_transformation(size_type n, size_type m) {
1549  if (icv != ctx.convex_num() || M.size() == 0) {
1550  M.base_resize(m, n);
1551  icv = ctx.convex_num();
1552  elemtrans->give_transformation(mf1, mf2, icv, M);
1553  }
1554  t_out.mat_reduction(t_in, M, 0);
1555  }
1556 
1557  ga_instruction_elementary_trans_base
1558  (base_tensor &t_, pelementary_transformation e, const mesh_fem &mf1_,
1559  const mesh_fem &mf2_,
1560  const fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1561  : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1562  M(M_), icv(icv_) {}
1563  };
1564 
1565  struct ga_instruction_elementary_trans_val_base
1566  : public ga_instruction_copy_val_base,
1567  ga_instruction_elementary_trans_base {
1568  // Z(ndof,target_dim) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim)
1569  virtual int exec() {
1570  GA_DEBUG_INFO("Instruction: value of test functions with elementary "
1571  "transformation");
1572  size_type ndof = Z.sizes()[0];
1573  size_type Qmult = qdim / Z.sizes()[1];
1574  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1575  ga_instruction_copy_val_base::exec();
1576  do_transformation(t_out.sizes()[0], ndof*Qmult);
1577  return 0;
1578  }
1579 
1580  ga_instruction_elementary_trans_val_base
1581  (base_tensor &t_, const base_tensor &Z_, size_type q,
1582  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1583  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1584  : ga_instruction_copy_val_base(t_in, Z_, q),
1585  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1586  M_, icv_) {}
1587  };
1588 
1589  struct ga_instruction_elementary_trans_grad_base
1590  : public ga_instruction_copy_grad_base,
1591  ga_instruction_elementary_trans_base {
1592  // Z(ndof,target_dim,N) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim,N)
1593  virtual int exec() {
1594  GA_DEBUG_INFO("Instruction: gradient of test functions with elementary "
1595  "transformation");
1596  size_type ndof = Z.sizes()[0];
1597  size_type Qmult = qdim / Z.sizes()[1];
1598  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1599  ga_instruction_copy_grad_base::exec();
1600  do_transformation(t_out.sizes()[0], ndof*Qmult);
1601  return 0;
1602  }
1603 
1604  ga_instruction_elementary_trans_grad_base
1605  (base_tensor &t_, const base_tensor &Z_, size_type q,
1606  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1607  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1608  : ga_instruction_copy_grad_base(t_in, Z_, q),
1609  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1610  M_, icv_) {}
1611  };
1612 
1613  struct ga_instruction_elementary_trans_hess_base
1614  : public ga_instruction_copy_hess_base,
1615  ga_instruction_elementary_trans_base {
1616  // Z(ndof,target_dim,N*N) --> t_out(Qmult*ndof,Qmult*target_dim,N,N)
1617  virtual int exec() {
1618  GA_DEBUG_INFO("Instruction: Hessian of test functions with elementary "
1619  "transformation");
1620  size_type ndof = Z.sizes()[0];
1621  size_type Qmult = qdim / Z.sizes()[1];
1622  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1623  ga_instruction_copy_hess_base::exec();
1624  do_transformation(t_out.sizes()[0], ndof*Qmult);
1625  return 0;
1626  }
1627 
1628  ga_instruction_elementary_trans_hess_base
1629  (base_tensor &t_, const base_tensor &Z_, size_type q,
1630  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1631  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1632  : ga_instruction_copy_hess_base(t_in, Z_, q),
1633  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1634  M_, icv_) {}
1635  };
1636 
1637  struct ga_instruction_elementary_trans_diverg_base
1638  : public ga_instruction_copy_diverg_base,
1639  ga_instruction_elementary_trans_base {
1640  // Z(ndof,target_dim,N) --> t_out(Qmult*ndof)
1641  virtual int exec() {
1642  GA_DEBUG_INFO("Instruction: divergence of test functions with elementary "
1643  "transformation");
1644  size_type ndof = Z.sizes()[0];
1645  size_type Qmult = qdim / Z.sizes()[1];
1646  t_in.adjust_sizes(Qmult*ndof);
1647  ga_instruction_copy_diverg_base::exec();
1648  do_transformation(t_out.sizes()[0], ndof*Qmult);
1649  return 0;
1650  }
1651 
1652  ga_instruction_elementary_trans_diverg_base
1653  (base_tensor &t_, const base_tensor &Z_, size_type q,
1654  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1655  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1656  : ga_instruction_copy_diverg_base(t_in, Z_, q),
1657  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1658  M_, icv_) {}
1659  };
1660 
1661 
1662  struct ga_instruction_add : public ga_instruction {
1663  base_tensor &t;
1664  const base_tensor &tc1, &tc2;
1665  virtual int exec() {
1666  GA_DEBUG_INFO("Instruction: addition");
1667  GA_DEBUG_ASSERT(t.size() == tc1.size(),
1668  "internal error " << t.size() << " != " << tc1.size());
1669  GA_DEBUG_ASSERT(t.size() == tc2.size(),
1670  "internal error " << t.size() << " != " << tc2.size());
1671  gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1672  return 0;
1673  }
1674  ga_instruction_add(base_tensor &t_,
1675  const base_tensor &tc1_, const base_tensor &tc2_)
1676  : t(t_), tc1(tc1_), tc2(tc2_) {}
1677  };
1678 
1679  struct ga_instruction_add_to : public ga_instruction {
1680  base_tensor &t;
1681  const base_tensor &tc1;
1682  virtual int exec() {
1683  GA_DEBUG_INFO("Instruction: addition");
1684  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1685  << " incompatible with " << tc1.size());
1686  gmm::add(tc1.as_vector(), t.as_vector());
1687  return 0;
1688  }
1689  ga_instruction_add_to(base_tensor &t_, const base_tensor &tc1_)
1690  : t(t_), tc1(tc1_) {}
1691  };
1692 
1693  struct ga_instruction_add_to_coeff : public ga_instruction {
1694  base_tensor &t;
1695  const base_tensor &tc1;
1696  scalar_type &coeff;
1697  virtual int exec() {
1698  GA_DEBUG_INFO("Instruction: addition with scale");
1699  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1700  << " incompatible with " << tc1.size());
1701  gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1702  return 0;
1703  }
1704  ga_instruction_add_to_coeff(base_tensor &t_, const base_tensor &tc1_,
1705  scalar_type &coeff_)
1706  : t(t_), tc1(tc1_), coeff(coeff_) {}
1707  };
1708 
1709  struct ga_instruction_sub : public ga_instruction {
1710  base_tensor &t;
1711  const base_tensor &tc1, &tc2;
1712  virtual int exec() {
1713  GA_DEBUG_INFO("Instruction: subtraction");
1714  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1715  "internal error");
1716  gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1717  t.as_vector());
1718  return 0;
1719  }
1720  ga_instruction_sub(base_tensor &t_,
1721  const base_tensor &tc1_, const base_tensor &tc2_)
1722  : t(t_), tc1(tc1_), tc2(tc2_) {}
1723  };
1724 
1725  struct ga_instruction_opposite : public ga_instruction {
1726  base_tensor &t;
1727  virtual int exec() {
1728  GA_DEBUG_INFO("Instruction: multiplication with -1");
1729  gmm::scale(t.as_vector(), scalar_type(-1));
1730  return 0;
1731  }
1732  ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1733  };
1734 
1735  struct ga_instruction_print_tensor : public ga_instruction {
1736  base_tensor &t;
1737  pga_tree_node pnode;
1738  const fem_interpolation_context &ctx;
1739  size_type &nbpt, &ipt;
1740  virtual int exec() {
1741  GA_DEBUG_INFO("Instruction: tensor print");
1742  cout << "Print term "; ga_print_node(pnode, cout);
1743  cout << " on Gauss point " << ipt << "/" << nbpt << " of element "
1744  << ctx.convex_num() << ": " << t << endl;
1745  return 0;
1746  }
1747  ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1748  const fem_interpolation_context &ctx_,
1749  size_type &nbpt_, size_type &ipt_)
1750  : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1751  };
1752 
1753  struct ga_instruction_copy_tensor : public ga_instruction {
1754  base_tensor &t;
1755  const base_tensor &tc1;
1756  virtual int exec() {
1757  GA_DEBUG_INFO("Instruction: tensor copy");
1758  std::copy(tc1.begin(), tc1.end(), t.begin());
1759  // gmm::copy(tc1.as_vector(), t.as_vector());
1760  return 0;
1761  }
1762  ga_instruction_copy_tensor(base_tensor &t_, const base_tensor &tc1_)
1763  : t(t_), tc1(tc1_) {}
1764  };
1765 
1766  struct ga_instruction_clear_tensor : public ga_instruction {
1767  base_tensor &t;
1768  virtual int exec() {
1769  GA_DEBUG_INFO("Instruction: clear tensor");
1770  std::fill(t.begin(), t.end(), scalar_type(0));
1771  return 0;
1772  }
1773  ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1774  };
1775 
1776  struct ga_instruction_copy_tensor_possibly_void : public ga_instruction {
1777  base_tensor &t;
1778  const base_tensor &tc1;
1779  virtual int exec() {
1780  GA_DEBUG_INFO("Instruction: tensor copy possibly void");
1781  if (tc1.size())
1782  gmm::copy(tc1.as_vector(), t.as_vector());
1783  else
1784  gmm::clear(t.as_vector());
1785  return 0;
1786  }
1787  ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1788  const base_tensor &tc1_)
1789  : t(t_), tc1(tc1_) {}
1790  };
1791 
1792  struct ga_instruction_copy_scalar : public ga_instruction {
1793  scalar_type &t; const scalar_type &t1;
1794  virtual int exec() {
1795  GA_DEBUG_INFO("Instruction: scalar copy");
1796  t = t1;
1797  return 0;
1798  }
1799  ga_instruction_copy_scalar(scalar_type &t_, const scalar_type &t1_)
1800  : t(t_), t1(t1_) {}
1801  };
1802 
1803  struct ga_instruction_copy_vect : public ga_instruction {
1804  base_vector &t;
1805  const base_vector &t1;
1806  virtual int exec() {
1807  GA_DEBUG_INFO("Instruction: fixed size tensor copy");
1808  gmm::copy(t1, t);
1809  return 0;
1810  }
1811  ga_instruction_copy_vect(base_vector &t_, const base_vector &t1_)
1812  : t(t_), t1(t1_) {}
1813  };
1814 
1815  struct ga_instruction_trace : public ga_instruction {
1816  base_tensor &t;
1817  const base_tensor &tc1;
1818  size_type n;
1819  // tc1(:,:,...,n,n) --> t(:,:,...)
1820  virtual int exec() {
1821  GA_DEBUG_INFO("Instruction: Trace");
1822  GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(), "Wrong sizes");
1823  size_type s = t.size() * (n+1);
1824  auto it = t.begin();
1825  auto it1 = tc1.begin();
1826  for (; it != t.end(); ++it, ++it1) {
1827  auto it2 = it1;
1828  *it = *it2;
1829  for (size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1830  }
1831  return 0;
1832  }
1833 
1834  ga_instruction_trace(base_tensor &t_, const base_tensor &tc1_, size_type n_)
1835  : t(t_), tc1(tc1_), n(n_) {}
1836  };
1837 
1838  struct ga_instruction_deviator : public ga_instruction {
1839  base_tensor &t;
1840  const base_tensor &tc1;
1841  size_type n;
1842  // tc1(:,:,...,n,n) --> t(:,:,...,n,n)
1843  virtual int exec() {
1844  GA_DEBUG_INFO("Instruction: Deviator");
1845  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1846 
1847  gmm::copy(tc1.as_vector(), t.as_vector());
1848 
1849  size_type nb = t.size()/(n*n);
1850  size_type s = nb * (n+1), j = 0;
1851  base_tensor::iterator it = t.begin();
1852  base_tensor::const_iterator it1 = tc1.begin();
1853  for (; j < nb; ++it, ++it1, ++j) {
1854  scalar_type tr(0);
1855  base_tensor::const_iterator it2 = it1;
1856  tr += *it2;
1857  for (size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1858  tr /= scalar_type(n);
1859 
1860  base_tensor::iterator it3 = it;
1861  *it3 -= tr;
1862  for (size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1863  }
1864  return 0;
1865  }
1866 
1867  ga_instruction_deviator(base_tensor &t_, const base_tensor &tc1_,
1868  size_type n_)
1869  : t(t_), tc1(tc1_), n(n_) {}
1870  };
1871 
1872  struct ga_instruction_transpose : public ga_instruction { // To be optimized
1873  base_tensor &t;
1874  const base_tensor &tc1;
1875  size_type J, K, I;
1876  virtual int exec() {
1877  GA_DEBUG_INFO("Instruction: transpose");
1878  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1879 
1880  size_type L = tc1.size() / (J*K*I);
1881  auto it = t.begin();
1882  for (size_type i = 0; i < I; ++i) {
1883  size_type s1 = i*J*K*L;
1884  for (size_type j = 0; j < J; ++j) {
1885  size_type s2 = s1 + j*L;
1886  for (size_type k = 0; k < K; ++k) {
1887  size_type s3 = s2 + k*J*L;
1888  for (size_type l = 0; l < L; ++l, ++it)
1889  *it = tc1[s3+l];
1890  }
1891  }
1892  }
1893  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1894  return 0;
1895  }
1896  ga_instruction_transpose(base_tensor &t_, const base_tensor &tc1_,
1897  size_type J_, size_type K_, size_type I_)
1898  : t(t_), tc1(tc1_), J(J_), K(K_), I(I_) {}
1899  };
1900 
1901  struct ga_instruction_swap_indices : public ga_instruction {// To be optimized
1902  base_tensor &t;
1903  const base_tensor &tc1;
1904  size_type nn1, nn2, ii2, ii3;
1905  virtual int exec() {
1906  GA_DEBUG_INFO("Instruction: swap indices");
1907  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1908  size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1909 
1910  auto it = t.begin();
1911  for (size_type i = 0; i < ii3; ++i)
1912  for (size_type j = 0; j < nn1; ++j)
1913  for (size_type k = 0; k < ii2; ++k)
1914  for (size_type l = 0; l < nn2; ++l) {
1915  size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1916  for (size_type m = 0; m < ii1; ++m, ++it)
1917  *it = tc1[m+ind];
1918  }
1919  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1920  return 0;
1921  }
1922  ga_instruction_swap_indices(base_tensor &t_, const base_tensor &tc1_,
1923  size_type n1_, size_type n2_,
1924  size_type i2_, size_type i3_)
1925  : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1926  };
1927 
1928  struct ga_instruction_index_move_last : public ga_instruction {// To be optimized
1929  base_tensor &t;
1930  const base_tensor &tc1;
1931  size_type nn, ii2;
1932  virtual int exec() {
1933  GA_DEBUG_INFO("Instruction: swap indices");
1934  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1935  size_type ii1 = t.size() / (nn*ii2);
1936 
1937  auto it = t.begin();
1938  for (size_type i = 0; i < nn; ++i)
1939  for (size_type j = 0; j < ii2; ++j) {
1940  size_type ind = i*ii1+j*ii1*nn;
1941  for (size_type k = 0; k < ii1; ++k, ++it)
1942  *it = tc1[k+ind];
1943  }
1944  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1945  return 0;
1946  }
1947  ga_instruction_index_move_last(base_tensor &t_, const base_tensor &tc1_,
1948  size_type n_, size_type i2_)
1949  : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1950  };
1951 
1952  struct ga_instruction_transpose_no_test : public ga_instruction {
1953  base_tensor &t;
1954  const base_tensor &tc1;
1955  size_type n1, n2, nn;
1956  virtual int exec() {
1957  GA_DEBUG_INFO("Instruction: transpose");
1958  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1959 
1960  auto it = t.begin();
1961  for (size_type i = 0; i < nn; ++i) {
1962  size_type s1 = i*n1*n2;
1963  for (size_type j = 0; j < n1; ++j) {
1964  size_type s2 = s1 + j;
1965  for (size_type k = 0; k < n2; ++k, ++it)
1966  *it = tc1[s2 + k*n1];
1967  }
1968  }
1969  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1970  return 0;
1971  }
1972  ga_instruction_transpose_no_test(base_tensor &t_, const base_tensor &tc1_,
1973  size_type n1_, size_type n2_,
1974  size_type nn_)
1975  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1976  };
1977 
1978  struct ga_instruction_transpose_test : public ga_instruction {
1979  base_tensor &t;
1980  const base_tensor &tc1;
1981  virtual int exec() {
1982  GA_DEBUG_INFO("Instruction: copy tensor and transpose test functions");
1983  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1984  GA_DEBUG_ASSERT(t.sizes().size() >= 2, "Wrong sizes");
1985 
1986  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1987  size_type s = t.size() / s3;
1988  base_tensor::iterator it = t.begin();
1989  for (size_type k = 0; k < s; ++k)
1990  for (size_type j = 0; j < s2; ++j)
1991  for (size_type i = 0; i < s1; ++i, ++it)
1992  *it = tc1[j+s2*i+k*s3];
1993  return 0;
1994  }
1995  ga_instruction_transpose_test(base_tensor &t_, const base_tensor &tc1_)
1996  : t(t_), tc1(tc1_) {}
1997  };
1998 
1999  struct ga_instruction_sym : public ga_instruction {
2000  base_tensor &t;
2001  const base_tensor &tc1;
2002  virtual int exec() {
2003  GA_DEBUG_INFO("Instruction: symmetric part");
2004  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2005  size_type order = t.sizes().size();
2006  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2007  size_type s = t.size() / (s1*s2);
2008  for (size_type i = 0; i < s1; ++i)
2009  for (size_type j = 0; j < s2; ++j) {
2010  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2011  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2012  it1T = tc1.begin() + s*(j + s2*i);
2013  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2014  }
2015  return 0;
2016  }
2017  ga_instruction_sym(base_tensor &t_, const base_tensor &tc1_)
2018  : t(t_), tc1(tc1_) {}
2019  };
2020 
2021  struct ga_instruction_skew : public ga_instruction {
2022  base_tensor &t;
2023  const base_tensor &tc1;
2024  virtual int exec() {
2025  GA_DEBUG_INFO("Instruction: skew-symmetric part");
2026  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2027  size_type order = t.sizes().size();
2028  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2029  size_type s = t.size() / (s1*s2);
2030  for (size_type i = 0; i < s1; ++i)
2031  for (size_type j = 0; j < s2; ++j) {
2032  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2033  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2034  it1T = tc1.begin() + s*(j + s2*i);
2035  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2036  }
2037  return 0;
2038  }
2039  ga_instruction_skew(base_tensor &t_, const base_tensor &tc1_)
2040  : t(t_), tc1(tc1_) {}
2041  };
2042 
2043  struct ga_instruction_scalar_add : public ga_instruction {
2044  scalar_type &t;
2045  const scalar_type &c, &d;
2046  virtual int exec() {
2047  GA_DEBUG_INFO("Instruction: scalar addition");
2048  t = c + d;
2049  return 0;
2050  }
2051  ga_instruction_scalar_add(scalar_type &t_, const scalar_type &c_,
2052  const scalar_type &d_)
2053  : t(t_), c(c_), d(d_) {}
2054  };
2055 
2056  struct ga_instruction_scalar_sub : public ga_instruction {
2057  scalar_type &t;
2058  const scalar_type &c, &d;
2059  virtual int exec() {
2060  GA_DEBUG_INFO("Instruction: scalar subtraction");
2061  t = c - d;
2062  return 0;
2063  }
2064  ga_instruction_scalar_sub(scalar_type &t_, const scalar_type &c_,
2065  const scalar_type &d_)
2066  : t(t_), c(c_), d(d_) {}
2067  };
2068 
2069  struct ga_instruction_scalar_scalar_mult : public ga_instruction {
2070  scalar_type &t;
2071  const scalar_type &c, &d;
2072  virtual int exec() {
2073  GA_DEBUG_INFO("Instruction: scalar multiplication");
2074  t = c * d;
2075  return 0;
2076  }
2077  ga_instruction_scalar_scalar_mult(scalar_type &t_, const scalar_type &c_,
2078  const scalar_type &d_)
2079  : t(t_), c(c_), d(d_) {}
2080  };
2081 
2082  struct ga_instruction_scalar_scalar_div : public ga_instruction {
2083  scalar_type &t;
2084  const scalar_type &c, &d;
2085  virtual int exec() {
2086  GA_DEBUG_INFO("Instruction: scalar division");
2087  t = c / d;
2088  return 0;
2089  }
2090  ga_instruction_scalar_scalar_div(scalar_type &t_, const scalar_type &c_,
2091  const scalar_type &d_)
2092  : t(t_), c(c_), d(d_) {}
2093  };
2094 
2095  template<int I> inline void dax__(base_tensor::iterator &it,
2096  base_tensor::const_iterator &itx,
2097  const scalar_type &a) {
2098  constexpr int I1 = I/8;
2099  constexpr int I2 = I - I1*8;
2100  for (int i=0; i < I1; ++i)
2101  dax__<8>(it, itx , a);
2102  dax__<I2>(it, itx , a);
2103  }
2104  template<> inline void dax__<8>(base_tensor::iterator &it,
2105  base_tensor::const_iterator &itx,
2106  const scalar_type &a) {
2107  *it++ = *itx++ * a;
2108  *it++ = *itx++ * a;
2109  *it++ = *itx++ * a;
2110  *it++ = *itx++ * a;
2111  *it++ = *itx++ * a;
2112  *it++ = *itx++ * a;
2113  *it++ = *itx++ * a;
2114  *it++ = *itx++ * a;
2115  }
2116  template<> inline void dax__<7>(base_tensor::iterator &it,
2117  base_tensor::const_iterator &itx,
2118  const scalar_type &a) {
2119  *it++ = *itx++ * a;
2120  *it++ = *itx++ * a;
2121  *it++ = *itx++ * a;
2122  *it++ = *itx++ * a;
2123  *it++ = *itx++ * a;
2124  *it++ = *itx++ * a;
2125  *it++ = *itx++ * a;
2126  }
2127  template<> inline void dax__<6>(base_tensor::iterator &it,
2128  base_tensor::const_iterator &itx,
2129  const scalar_type &a) {
2130  *it++ = *itx++ * a;
2131  *it++ = *itx++ * a;
2132  *it++ = *itx++ * a;
2133  *it++ = *itx++ * a;
2134  *it++ = *itx++ * a;
2135  *it++ = *itx++ * a;
2136  }
2137  template<> inline void dax__<5>(base_tensor::iterator &it,
2138  base_tensor::const_iterator &itx,
2139  const scalar_type &a) {
2140  *it++ = *itx++ * a;
2141  *it++ = *itx++ * a;
2142  *it++ = *itx++ * a;
2143  *it++ = *itx++ * a;
2144  *it++ = *itx++ * a;
2145  }
2146  template<> inline void dax__<4>(base_tensor::iterator &it,
2147  base_tensor::const_iterator &itx,
2148  const scalar_type &a) {
2149  *it++ = *itx++ * a;
2150  *it++ = *itx++ * a;
2151  *it++ = *itx++ * a;
2152  *it++ = *itx++ * a;
2153  }
2154  template<> inline void dax__<3>(base_tensor::iterator &it,
2155  base_tensor::const_iterator &itx,
2156  const scalar_type &a) {
2157  *it++ = *itx++ * a;
2158  *it++ = *itx++ * a;
2159  *it++ = *itx++ * a;
2160  }
2161  template<> inline void dax__<2>(base_tensor::iterator &it,
2162  base_tensor::const_iterator &itx,
2163  const scalar_type &a) {
2164  *it++ = *itx++ * a;
2165  *it++ = *itx++ * a;
2166  }
2167  template<> inline void dax__<1>(base_tensor::iterator &it,
2168  base_tensor::const_iterator &itx,
2169  const scalar_type &a) {
2170  *it++ = *itx++ * a;
2171  }
2172  template<> inline void dax__<0>(base_tensor::iterator &,
2173  base_tensor::const_iterator &,
2174  const scalar_type &) {}
2175 
2176 
2177  template<int I> inline
2178  void reduc_elem_unrolled__(base_tensor::iterator &it,
2179  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2180  const size_type s1, const size_type s2) {
2181  *it = it1[0] * it2[0];
2182  for (int i=1; i < I; ++i)
2183  *it += it1[i*s1] * it2[i*s2];
2184  }
2185  template<> inline
2186  void reduc_elem_unrolled__<9>(base_tensor::iterator &it,
2187  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2188  const size_type s1, const size_type s2) {
2189  *it = it1[0] * it2[0] // (*it1) * (*it2)
2190  + it1[s1] * it2[s2] // (*(it1+s1)) * (*(it2+s2))
2191  + it1[2*s1] * it2[2*s2] // (*(it1+2*s1)) * (*(it2+2*s2))
2192  + it1[3*s1] * it2[3*s2] // (*(it1+3*s1)) * (*(it2+3*s2))
2193  + it1[4*s1] * it2[4*s2] // (*(it1+4*s1)) * (*(it2+4*s2))
2194  + it1[5*s1] * it2[5*s2] // (*(it1+5*s1)) * (*(it2+5*s2))
2195  + it1[6*s1] * it2[6*s2] // (*(it1+6*s1)) * (*(it2+6*s2))
2196  + it1[7*s1] * it2[7*s2] // (*(it1+7*s1)) * (*(it2+7*s2))
2197  + it1[8*s1] * it2[8*s2]; // (*(it1+8*s1)) * (*(it2+8*s2));
2198  }
2199  template<> inline
2200  void reduc_elem_unrolled__<8>(base_tensor::iterator &it,
2201  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2202  const size_type s1, const size_type s2) {
2203  *it = it1[0] * it2[0]
2204  + it1[s1] * it2[s2]
2205  + it1[2*s1] * it2[2*s2]
2206  + it1[3*s1] * it2[3*s2]
2207  + it1[4*s1] * it2[4*s2]
2208  + it1[5*s1] * it2[5*s2]
2209  + it1[6*s1] * it2[6*s2]
2210  + it1[7*s1] * it2[7*s2];
2211  }
2212  template<> inline
2213  void reduc_elem_unrolled__<7>(base_tensor::iterator &it,
2214  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2215  const size_type s1, const size_type s2) {
2216  *it = it1[0] * it2[0]
2217  + it1[s1] * it2[s2]
2218  + it1[2*s1] * it2[2*s2]
2219  + it1[3*s1] * it2[3*s2]
2220  + it1[4*s1] * it2[4*s2]
2221  + it1[5*s1] * it2[5*s2]
2222  + it1[6*s1] * it2[6*s2];
2223  }
2224  template<> inline
2225  void reduc_elem_unrolled__<6>(base_tensor::iterator &it,
2226  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2227  const size_type s1, const size_type s2) {
2228  *it = it1[0] * it2[0]
2229  + it1[s1] * it2[s2]
2230  + it1[2*s1] * it2[2*s2]
2231  + it1[3*s1] * it2[3*s2]
2232  + it1[4*s1] * it2[4*s2]
2233  + it1[5*s1] * it2[5*s2];
2234  }
2235  template<> inline
2236  void reduc_elem_unrolled__<5>(base_tensor::iterator &it,
2237  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2238  const size_type s1, const size_type s2) {
2239  *it = it1[0] * it2[0]
2240  + it1[s1] * it2[s2]
2241  + it1[2*s1] * it2[2*s2]
2242  + it1[3*s1] * it2[3*s2]
2243  + it1[4*s1] * it2[4*s2];
2244  }
2245  template<> inline
2246  void reduc_elem_unrolled__<4>(base_tensor::iterator &it,
2247  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2248  const size_type s1, const size_type s2) {
2249  *it = it1[0] * it2[0]
2250  + it1[s1] * it2[s2]
2251  + it1[2*s1] * it2[2*s2]
2252  + it1[3*s1] * it2[3*s2];
2253  }
2254  template<> inline
2255  void reduc_elem_unrolled__<3>(base_tensor::iterator &it,
2256  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2257  const size_type s1, const size_type s2) {
2258  *it = it1[0] * it2[0]
2259  + it1[s1] * it2[s2]
2260  + it1[2*s1] * it2[2*s2];
2261  }
2262  template<> inline
2263  void reduc_elem_unrolled__<2>(base_tensor::iterator &it,
2264  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2265  const size_type s1, const size_type s2) {
2266  *it = it1[0] * it2[0]
2267  + it1[s1] * it2[s2];
2268  }
2269  template<> inline
2270  void reduc_elem_unrolled__<1>(base_tensor::iterator &it,
2271  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2272  const size_type /*s1*/, const size_type /*s2*/)
2273  { *it = it1[0] * it2[0]; }
2274 
2275 
2276  struct ga_instruction_scalar_mult : public ga_instruction {
2277  base_tensor &t;
2278  const base_tensor &tc1;
2279  const scalar_type &c;
2280  virtual int exec() {
2281  GA_DEBUG_INFO("Instruction: multiplication of a tensor by a scalar " << c);
2282  gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2283  return 0;
2284  }
2285  ga_instruction_scalar_mult(base_tensor &t_,
2286  const base_tensor &tc1_, const scalar_type &c_)
2287  : t(t_), tc1(tc1_), c(c_) {}
2288  };
2289 
2290  struct ga_instruction_scalar_div : public ga_instruction {
2291  base_tensor &t;
2292  const base_tensor &tc1;
2293  const scalar_type &c;
2294  virtual int exec() {
2295  GA_DEBUG_INFO("Instruction: division of a tensor by a scalar");
2296  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2297  base_tensor::iterator it = t.begin();
2298  base_tensor::const_iterator it1 = tc1.cbegin();
2299  for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2300  return 0;
2301  }
2302  ga_instruction_scalar_div(base_tensor &t_,
2303  const base_tensor &tc1_, const scalar_type &c_)
2304  : t(t_), tc1(tc1_), c(c_) {}
2305  };
2306 
2307  // Performs Cross product in the presence of test functions
2308  struct ga_instruction_cross_product_tf : public ga_instruction {
2309  base_tensor &t;
2310  const base_tensor &tc1, &tc2;
2311  bool inv;
2312  virtual int exec() {
2313  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2314 
2315  size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2316  GA_DEBUG_ASSERT(t.size() == nn*3, "Bad tensor size for cross product");
2317  size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2318  base_tensor::iterator it = t.begin();
2319  base_tensor::const_iterator it2 = tc2.cbegin();
2320  if (inv) {
2321  for (size_type i = 0; i < n2; ++i, ++it2) {
2322  base_tensor::const_iterator it1 = tc1.cbegin();
2323  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2324  *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2325  it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2326  it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2327  }
2328  }
2329  } else {
2330  for (size_type i = 0; i < n2; ++i, ++it2) {
2331  base_tensor::const_iterator it1 = tc1.cbegin();
2332  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2333  *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2334  it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2335  it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2336  }
2337  }
2338  }
2339  return 0;
2340  }
2341  ga_instruction_cross_product_tf(base_tensor &t_,
2342  const base_tensor &tc1_,
2343  const base_tensor &tc2_, bool inv_)
2344  : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2345  };
2346 
2347  // Performs Cross product in the absence of test functions
2348  struct ga_instruction_cross_product : public ga_instruction {
2349  base_tensor &t;
2350  const base_tensor &tc1, &tc2;
2351  virtual int exec() {
2352  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2353  GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2354  "Bad tensor size for cross product");
2355  t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2356  t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2357  t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2358  return 0;
2359  }
2360  ga_instruction_cross_product(base_tensor &t_,
2361  const base_tensor &tc1_, const base_tensor &tc2_)
2362  : t(t_), tc1(tc1_), tc2(tc2_) {}
2363  };
2364 
2365 
2366 
2367 
2368  struct ga_instruction_dotmult : public ga_instruction {
2369  base_tensor &t;
2370  const base_tensor &tc1, &tc2;
2371  virtual int exec() {
2372  GA_DEBUG_INFO("Instruction: componentwise multiplication");
2373  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2374  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2375 
2376  base_tensor::iterator it = t.begin();
2377  for (size_type i = 0; i < s2; ++i)
2378  for (size_type m = 0; m < s1_1; ++m, ++it)
2379  *it = tc1[m+s1_1*i] * tc2[i];
2380  return 0;
2381  }
2382  ga_instruction_dotmult(base_tensor &t_,
2383  const base_tensor &tc1_, const base_tensor &tc2_)
2384  : t(t_), tc1(tc1_), tc2(tc2_) {}
2385  };
2386 
2387  struct ga_instruction_dotdiv : public ga_instruction {
2388  base_tensor &t;
2389  const base_tensor &tc1, &tc2;
2390  virtual int exec() {
2391  GA_DEBUG_INFO("Instruction: componentwise division");
2392  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2393  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2394 
2395  base_tensor::iterator it = t.begin();
2396  for (size_type i = 0; i < s2; ++i)
2397  for (size_type m = 0; m < s1_1; ++m, ++it)
2398  *it = tc1[m+s1_1*i] / tc2[i];
2399  return 0;
2400  }
2401  ga_instruction_dotdiv(base_tensor &t_,
2402  const base_tensor &tc1_, const base_tensor &tc2_)
2403  : t(t_), tc1(tc1_), tc2(tc2_) {}
2404  };
2405 
2406  // Performs Ami Bni -> Cmni
2407  struct ga_instruction_dotmult_spec : public ga_instruction {
2408  base_tensor &t;
2409  const base_tensor &tc1, &tc2;
2410  virtual int exec() {
2411  GA_DEBUG_INFO("Instruction: specific componentwise multiplication");
2412  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2413  size_type s1_1 = tc1.size() / s2_2;
2414 
2415  base_tensor::iterator it = t.begin();
2416  for (size_type i = 0; i < s2_2; ++i)
2417  for (size_type n = 0; n < s2_1; ++n)
2418  for (size_type m = 0; m < s1_1; ++m, ++it)
2419  *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2420  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2421  return 0;
2422  }
2423  ga_instruction_dotmult_spec(base_tensor &t_,
2424  const base_tensor &tc1_, const base_tensor &tc2_)
2425  : t(t_), tc1(tc1_), tc2(tc2_) {}
2426  };
2427 
2428  // Performs Amijik -> Cmjk. To be optimized
2429  struct ga_instruction_contract_1_1 : public ga_instruction {
2430  base_tensor &t;
2431  const base_tensor &tc1;
2432  size_type nn, ii2, ii3;
2433  virtual int exec() {
2434  GA_DEBUG_INFO("Instruction: single contraction on a single tensor");
2435 
2436  size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2437 
2438  base_tensor::iterator it = t.begin();
2439  for (size_type i = 0; i < ii3; ++i)
2440  for (size_type j = 0; j < ii2; ++j)
2441  for (size_type k = 0; k < ii1; ++k, ++it) {
2442  *it = scalar_type(0);
2443  size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2444  for (size_type n = 0; n < nn; ++n)
2445  *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2446  }
2447 
2448  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2449  return 0;
2450  }
2451  ga_instruction_contract_1_1(base_tensor &t_, const base_tensor &tc1_,
2452  size_type n_, size_type i2_, size_type i3_)
2453  : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2454  };
2455 
2456  // Performs Amijk Bnljp -> Cmniklp. To be optimized
2457  struct ga_instruction_contract_2_1 : public ga_instruction {
2458  base_tensor &t;
2459  const base_tensor &tc1, &tc2;
2460  size_type nn, ii1, ii2, ii3, ii4;
2461  virtual int exec() {
2462  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2463 
2464  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2465  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2466 
2467  base_tensor::iterator it = t.begin();
2468  for (size_type i = 0; i < ii4; ++i)
2469  for (size_type j = 0; j < ii3; ++j)
2470  for (size_type k = 0; k < ii2; ++k)
2471  for (size_type l = 0; l < ii1; ++l)
2472  for (size_type p = 0; p < ift2; ++p)
2473  for (size_type q = 0; q < ift1; ++q, ++it) {
2474  *it = scalar_type(0);
2475  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2476  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2477  for (size_type n = 0; n < nn; ++n)
2478  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2479  }
2480 
2481  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2482  return 0;
2483  }
2484  ga_instruction_contract_2_1(base_tensor &t_,
2485  const base_tensor &tc1_, const base_tensor &tc2_,
2486  size_type n_, size_type i1_, size_type i2_,
2487  size_type i3_, size_type i4_)
2488  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2489  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2490  };
2491 
2492  // Performs Amijk Bnljp -> Cnmiklp. To be optimized
2493  struct ga_instruction_contract_2_1_rev : public ga_instruction {
2494  base_tensor &t;
2495  const base_tensor &tc1, &tc2;
2496  size_type nn, ii1, ii2, ii3, ii4;
2497  virtual int exec() {
2498  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2499 
2500  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2501  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2502 
2503  base_tensor::iterator it = t.begin();
2504  for (size_type i = 0; i < ii4; ++i)
2505  for (size_type j = 0; j < ii3; ++j)
2506  for (size_type k = 0; k < ii2; ++k)
2507  for (size_type l = 0; l < ii1; ++l)
2508  for (size_type q = 0; q < ift1; ++q)
2509  for (size_type p = 0; p < ift2; ++p, ++it) {
2510  *it = scalar_type(0);
2511  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2512  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2513  for (size_type n = 0; n < nn; ++n)
2514  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2515  }
2516 
2517  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2518  return 0;
2519  }
2520  ga_instruction_contract_2_1_rev(base_tensor &t_,
2521  const base_tensor &tc1_, const base_tensor &tc2_,
2522  size_type n_, size_type i1_, size_type i2_,
2523  size_type i3_, size_type i4_)
2524  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2525  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2526  };
2527 
2528  // Performs Amijklp Bnqjrls -> Cmnikpqrs. To be optimized
2529  struct ga_instruction_contract_2_2 : public ga_instruction {
2530  base_tensor &t;
2531  const base_tensor &tc1, &tc2;
2532  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2533  bool inv_tc2;
2534  virtual int exec() {
2535  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2536 
2537  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2538  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2539 
2540  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2541  if (inv_tc2) std::swap(sn1, sn2);
2542 
2543  base_tensor::iterator it = t.begin();
2544  for (size_type i = 0; i < ii6; ++i)
2545  for (size_type j = 0; j < ii5; ++j)
2546  for (size_type k = 0; k < ii4; ++k)
2547  for (size_type l = 0; l < ii3; ++l)
2548  for (size_type p = 0; p < ii2; ++p)
2549  for (size_type q = 0; q < ii1; ++q)
2550  for (size_type r = 0; r < ift2; ++r)
2551  for (size_type s = 0; s < ift1; ++s, ++it) {
2552  *it = scalar_type(0);
2553  size_type ind1
2554  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2555  size_type ind2
2556  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2557  for (size_type n1 = 0; n1 < nn1; ++n1)
2558  for (size_type n2 = 0; n2 < nn2; ++n2)
2559  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2560  * tc2[ind2+n1*sn1+n2*sn2];
2561  }
2562 
2563  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2564  return 0;
2565  }
2566  ga_instruction_contract_2_2(base_tensor &t_,
2567  const base_tensor &tc1_, const base_tensor &tc2_,
2568  size_type n1_, size_type n2_,
2569  size_type i1_, size_type i2_, size_type i3_,
2570  size_type i4_, size_type i5_, size_type i6_,
2571  bool intc2)
2572  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2573  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2574  inv_tc2(intc2) {}
2575  };
2576 
2577  // Performs Amijklp Bnqjrls -> Cnmikpqrs. To be optimized
2578  struct ga_instruction_contract_2_2_rev : public ga_instruction {
2579  base_tensor &t;
2580  const base_tensor &tc1, &tc2;
2581  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2582  bool inv_tc2;
2583  virtual int exec() {
2584  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2585 
2586  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2587  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2588 
2589  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2590  if (inv_tc2) std::swap(sn1, sn2);
2591 
2592  base_tensor::iterator it = t.begin();
2593  for (size_type i = 0; i < ii6; ++i)
2594  for (size_type j = 0; j < ii5; ++j)
2595  for (size_type k = 0; k < ii4; ++k)
2596  for (size_type l = 0; l < ii3; ++l)
2597  for (size_type p = 0; p < ii2; ++p)
2598  for (size_type q = 0; q < ii1; ++q)
2599  for (size_type s = 0; s < ift1; ++s)
2600  for (size_type r = 0; r < ift2; ++r, ++it) {
2601  *it = scalar_type(0);
2602  size_type ind1
2603  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2604  size_type ind2
2605  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2606  for (size_type n1 = 0; n1 < nn1; ++n1)
2607  for (size_type n2 = 0; n2 < nn2; ++n2)
2608  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2609  * tc2[ind2+n1*sn1+n2*sn2];
2610  }
2611 
2612  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2613  return 0;
2614  }
2615  ga_instruction_contract_2_2_rev(base_tensor &t_,
2616  const base_tensor &tc1_, const base_tensor &tc2_,
2617  size_type n1_, size_type n2_,
2618  size_type i1_, size_type i2_, size_type i3_,
2619  size_type i4_, size_type i5_, size_type i6_,
2620  bool intc2)
2621  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2622  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2623  inv_tc2(intc2) {}
2624  };
2625 
2626 
2627  // Performs Amj Bjk -> Cmk. To be optimized
2628  struct ga_instruction_matrix_mult : public ga_instruction {
2629  base_tensor &t;
2630  const base_tensor &tc1, &tc2;
2631  const size_type J;
2632  virtual int exec() {
2633  GA_DEBUG_INFO("Instruction: order one contraction "
2634  "(dot product or matrix multiplication)");
2635  size_type M = tc1.size() / J,
2636  K = tc2.size() / J;
2637 #if defined(GA_USES_BLAS)
2638  if (M*J*K > 27) {
2639  const BLAS_INT M_=BLAS_INT(M), J_=BLAS_INT(J), K_=BLAS_INT(K);
2640  constexpr char notrans = 'N';
2641  constexpr scalar_type one(1), zero(0);
2642  gmm::dgemm_(&notrans, &notrans, &M_, &K_, &J_, &one,
2643  &(tc1[0]), &M_, &(tc2[0]), &J_, &zero, &(t[0]), &M_);
2644  } else
2645 #endif
2646  {
2647  auto it = t.begin();
2648  if (M==2 && J==2 && K == 2) {
2649  *it++ = tc1[0]*tc2[0] + tc1[2]*tc2[1]; // k=0,m=0
2650  *it++ = tc1[1]*tc2[0] + tc1[3]*tc2[1]; // k=0,m=1
2651  *it++ = tc1[0]*tc2[2] + tc1[2]*tc2[3]; // k=1,m=0
2652  *it++ = tc1[1]*tc2[2] + tc1[3]*tc2[3]; // k=1,m=1
2653  } else if (M==3 && J==3 && K == 3) {
2654  *it++ = tc1[0]*tc2[0] + tc1[3]*tc2[1] + tc1[6]*tc2[2]; // k=0,m=0
2655  *it++ = tc1[1]*tc2[0] + tc1[4]*tc2[1] + tc1[7]*tc2[2]; // k=0,m=1
2656  *it++ = tc1[2]*tc2[0] + tc1[5]*tc2[1] + tc1[8]*tc2[2]; // k=0,m=2
2657  *it++ = tc1[0]*tc2[3] + tc1[3]*tc2[4] + tc1[6]*tc2[5]; // k=1,m=0
2658  *it++ = tc1[1]*tc2[3] + tc1[4]*tc2[4] + tc1[7]*tc2[5]; // k=1,m=1
2659  *it++ = tc1[2]*tc2[3] + tc1[5]*tc2[4] + tc1[8]*tc2[5]; // k=1,m=2
2660  *it++ = tc1[0]*tc2[6] + tc1[3]*tc2[7] + tc1[6]*tc2[8]; // k=2,m=0
2661  *it++ = tc1[1]*tc2[6] + tc1[4]*tc2[7] + tc1[7]*tc2[8]; // k=2,m=1
2662  *it++ = tc1[2]*tc2[6] + tc1[5]*tc2[7] + tc1[8]*tc2[8]; // k=2,m=2
2663  } else {
2664  for (size_type k = 0; k < K; ++k)
2665  for (size_type m = 0; m < M; ++m, ++it) {
2666  *it = scalar_type(0);
2667  for (size_type j = 0; j < J; ++j)
2668  *it += tc1[m+M*j] * tc2[j+J*k];
2669  }
2670  }
2671  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2672  }
2673  return 0;
2674  }
2675  ga_instruction_matrix_mult(base_tensor &t_,
2676  const base_tensor &tc1_,
2677  const base_tensor &tc2_, size_type J_)
2678  : t(t_), tc1(tc1_), tc2(tc2_), J(J_) {}
2679  };
2680 
2681  // Performs Amij Bnjk -> Cmnik. To be optimized
2682  struct ga_instruction_matrix_mult_spec : public ga_instruction {
2683  base_tensor &t;
2684  const base_tensor &tc1, &tc2;
2685  size_type J, I, K; // tc1 of size M*I*J, tc2 of size N*J*K
2686  // t of size M*N*I*K
2687  virtual int exec() {
2688  GA_DEBUG_INFO("Instruction: specific order one contraction "
2689  "(dot product or matrix multiplication)");
2690  const size_type MI = tc1.size() / J, M = MI / I,
2691  NJ = tc2.size() / K, N = NJ / J;
2692 #if defined(GA_USES_BLAS)
2693  const BLAS_INT J_ = BLAS_INT(J), M_ = BLAS_INT(M), N_ = BLAS_INT(N),
2694  MI_ = BLAS_INT(MI);
2695  constexpr char notrans = 'N', trans = 'T';
2696  constexpr scalar_type one(1), zero(0);
2697  size_type MN = M*N;
2698  auto it = t.begin();
2699  for (size_type k = 0; k < K; ++k)
2700  for (size_type i = 0; i < I; ++i, it += MN) // => t[M*N*(i+I*k)]
2701  gmm::dgemm_(&notrans, &trans, &M_, &N_, &J_, &one,
2702  &(tc1[M*i]), &MI_, &(tc2[NJ*k]), &N_, &zero,
2703  &(*it), &M_);
2704  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2705 #else
2706  auto it = t.begin();
2707  for (size_type k = 0; k < K; ++k)
2708  for (size_type i = 0; i < I; ++i)
2709  for (size_type n = 0; n < N; ++n)
2710  for (size_type m = 0; m < M; ++m, ++it) {
2711  *it = scalar_type(0);
2712  for (size_type j = 0; j < J; ++j)
2713  *it += tc1[m+M*i+MI*j] * tc2[n+N*j+NJ*k];
2714  }
2715  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2716 #endif
2717  return 0;
2718  }
2719  ga_instruction_matrix_mult_spec(base_tensor &t_,
2720  const base_tensor &tc1_,
2721  const base_tensor &tc2_,
2722  size_type J_, size_type I_, size_type K_)
2723  : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2724  };
2725 
2726  // Performs Amij Bnjk -> Cnmik. To be optimized
2727  struct ga_instruction_matrix_mult_spec2 : public ga_instruction {
2728  base_tensor &t;
2729  const base_tensor &tc1, &tc2;
2730  size_type J, I, K; // tc1 of size M*I*J, tc2 of size N*J*K
2731  // t of size N*M*I*K
2732  virtual int exec() {
2733  GA_DEBUG_INFO("Instruction: specific order one contraction "
2734  "(dot product or matrix multiplication)");
2735  const size_type MI = tc1.size() / J,
2736  NJ = tc2.size() / K, N = NJ / J;
2737 #if defined(GA_USES_BLAS)
2738  const BLAS_INT J_ = BLAS_INT(J), MI_ = BLAS_INT(MI), N_ = BLAS_INT(N);
2739  constexpr char notrans = 'N', trans = 'T';
2740  constexpr scalar_type one(1), zero(0);
2741  size_type NMI = N*MI;
2742  auto it = t.begin();
2743  for (size_type k = 0; k < K; ++k, it += NMI) // => it[N*M*I*k]
2744  gmm::dgemm_(&notrans, &trans, &N_, &MI_, &J_, &one,
2745  &(tc2[NJ*k]), &N_, &(tc1[0]), &MI_, &zero,
2746  &(*it), &N_);
2747  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2748 #else
2749  auto it = t.begin();
2750  for (size_type k = 0; k < K; ++k)
2751  for (size_type mi = 0; mi < MI; ++mi)
2752  for (size_type n = 0; n < N; ++n, ++it) {
2753  *it = scalar_type(0);
2754  for (size_type j = 0; j < J; ++j)
2755  *it += tc1[mi+MI*j] * tc2[n+N*j+NJ*k];
2756  }
2757  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2758 #endif
2759  return 0;
2760  }
2761  ga_instruction_matrix_mult_spec2(base_tensor &t_,
2762  const base_tensor &tc1_,
2763  const base_tensor &tc2_,
2764  size_type J_, size_type I_, size_type K_)
2765  : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2766  };
2767 
2768  // Performs Ani Bmi -> Cmn
2769  struct ga_instruction_contraction : public ga_instruction {
2770  base_tensor &t;
2771  const base_tensor &tc1, &tc2;
2772  const size_type I;
2773  virtual int exec() {
2774  GA_DEBUG_INFO("Instruction: contraction operation of size " << I);
2775  size_type N = tc1.size()/I,
2776  M = tc2.size()/I;
2777  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error");
2778 #if defined(GA_USES_BLAS)
2779  if (M*N*I > 27) {
2780  BLAS_INT N_ = BLAS_INT(N), I_ = BLAS_INT(I), M_ = BLAS_INT(M);
2781  char notrans = 'N', trans = 'T';
2782  static const scalar_type one(1), zero(0);
2783  gmm::dgemm_(&notrans, &trans, &M_, &N_, &I_, &one,
2784  &(tc2[0]), &M_, &(tc1[0]), &N_, &zero, &(t[0]), &M_);
2785  } else
2786 #endif
2787  {
2788  auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
2789  if (I==7) {
2790  for (auto it = t.begin(); it != t.end(); ++it) {
2791  reduc_elem_unrolled__<7>(it, it1, it2, N, M);
2792  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2793  }
2794  } else if (I==8) {
2795  for (auto it = t.begin(); it != t.end(); ++it) {
2796  reduc_elem_unrolled__<8>(it, it1, it2, N, M);
2797  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2798  }
2799  } else if (I==9) {
2800  for (auto it = t.begin(); it != t.end(); ++it) {
2801  reduc_elem_unrolled__<9>(it, it1, it2, N, M);
2802  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2803  }
2804  } else if (I==10) {
2805  for (auto it = t.begin(); it != t.end(); ++it) {
2806  reduc_elem_unrolled__<10>(it, it1, it2, N, M);
2807  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2808  }
2809  } else {
2810  for (auto it = t.begin(); it != t.end(); ++it) {
2811  auto it11 = it1, it22 = it2;
2812  scalar_type a = (*it11) * (*it22);
2813  for (size_type i = 1; i < I; ++i)
2814  { it11 += N; it22 += M; a += (*it11) * (*it22); }
2815  *it = a;
2816  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2817  }
2818  }
2819  }
2820  // auto it = t.begin(); // Unoptimized version.
2821  // for (size_type n = 0; n < N; ++n)
2822  // for (size_type m = 0; m < M; ++m, ++it) {
2823  // *it = scalar_type(0);
2824  // for (size_type i = 0; i < I; ++i)
2825  // *it += tc1[n+N*i] * tc2[m+M*i];
2826  // }
2827  return 0;
2828  }
2829  ga_instruction_contraction(base_tensor &t_,
2830  const base_tensor &tc1_,
2831  const base_tensor &tc2_, size_type I_)
2832  : t(t_), tc1(tc1_), tc2(tc2_), I(I_) {}
2833  };
2834 
2835  // Performs Ani Bmi -> Cmn
2836  struct ga_instruction_contraction_opt0_2 : public ga_instruction {
2837  base_tensor &t;
2838  const base_tensor &tc1, &tc2;
2839  size_type n, q;
2840  virtual int exec() {
2841  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2842  " optimized for vectorized second tensor of type 2");
2843  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2844  size_type s1_qq = s1*q, s2_qq = s2*q;
2845  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2846 
2847  auto it = t.begin();
2848  auto it1 = tc1.cbegin();
2849  for (size_type i = 0; i < s1; ++i, ++it1) {
2850  auto it2 = tc2.cbegin();
2851  for (size_type j = 0; j < s2_q; ++j) {
2852  if (j) it2+=q;
2853  auto itt1 = it1;
2854  for (size_type l = 0; l < q; ++l, ++it) {
2855  if (l) itt1 += s1;
2856  auto ittt1 = itt1, ittt2 = it2;
2857  *it = *ittt1 * (*ittt2);
2858  for (size_type m = 1; m < n; ++m) {
2859  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2860  }
2861  }
2862  }
2863  }
2864  // base_tensor u = t;
2865  // ga_instruction_contraction toto(t, tc1, tc2, n*q);
2866  // toto.exec();
2867  // GMM_ASSERT1(gmm::vect_dist2(t.as_vector(), u.as_vector()) < 1E-9, "Erroneous");
2868  return 0;
2869  }
2870  ga_instruction_contraction_opt0_2(base_tensor &t_,
2871  const base_tensor &tc1_,
2872  const base_tensor &tc2_,
2873  size_type n_, size_type q_)
2874  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2875  };
2876 
2877  // Performs Ani Bmi -> Cmn
2878  template <int N>
2879  struct ga_instruction_contraction_opt0_2_unrolled : public ga_instruction {
2880  base_tensor &t;
2881  const base_tensor &tc1, &tc2;
2882  size_type q;
2883  virtual int exec() {
2884  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q <<
2885  " optimized for vectorized second tensor of type 2");
2886  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2887  size_type s1_qq = s1*q, s2_qq = s2*q;
2888  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2889 
2890  auto it = t.begin();
2891  auto it1 = tc1.cbegin();
2892  for (size_type i = 0; i < s1; ++i, ++it1) {
2893  auto it2 = tc2.cbegin();
2894  for (size_type j = 0; j < s2_q; ++j) {
2895  if (j) it2+=q;
2896  auto itt1 = it1;
2897  for (size_type l = 0; l < q; ++l, ++it) {
2898  if (l) itt1 += s1;
2899  auto ittt1 = itt1, ittt2 = it2;
2900  *it = *ittt1 * (*ittt2);
2901  for (size_type m = 1; m < N; ++m) {
2902  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2903  }
2904  }
2905  }
2906  }
2907  return 0;
2908  }
2909  ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_,
2910  const base_tensor &tc1_,
2911  const base_tensor &tc2_,
2912  size_type q_)
2913  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2914  };
2915 
2916  // Performs Ani Bmi -> Cmn
2917  template <int N, int Q>
2918  struct ga_instruction_contraction_opt0_2_dunrolled : public ga_instruction {
2919  base_tensor &t;
2920  const base_tensor &tc1, &tc2;
2921  virtual int exec() {
2922  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
2923  << " optimized for vectorized second tensor of type 2");
2924  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2925  size_type s1_qq = s1*Q, s2_qq = s2*Q;
2926  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2927 
2928  auto it = t.begin();
2929  auto it1 = tc1.cbegin();
2930  for (size_type i = 0; i < s1; ++i, ++it1) {
2931  auto it2 = tc2.cbegin();
2932  for (size_type j = 0; j < s2_q; ++j) {
2933  if (j) it2+=Q;
2934  auto itt1 = it1;
2935  for (size_type l = 0; l < Q; ++l, ++it) {
2936  if (l) itt1 += s1;
2937  auto ittt1 = itt1, ittt2 = it2;
2938  *it = *ittt1 * (*ittt2);
2939  for (size_type m = 1; m < N; ++m) {
2940  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2941  }
2942  }
2943  }
2944  }
2945  return 0;
2946  }
2947  ga_instruction_contraction_opt0_2_dunrolled(base_tensor &t_,
2948  const base_tensor &tc1_,
2949  const base_tensor &tc2_)
2950  : t(t_), tc1(tc1_), tc2(tc2_) {}
2951  };
2952 
2953  // Performs Ani Bmi -> Cmn
2954  struct ga_instruction_contraction_opt2_0 : public ga_instruction {
2955  base_tensor &t;
2956  const base_tensor &tc1, &tc2;
2957  size_type n, q;
2958  virtual int exec() {
2959  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2960  " optimized for vectorized second tensor of type 2");
2961  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2962  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2963  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2964 
2965  auto it = t.begin();
2966  for (size_type i = 0; i < s1_q; ++i) {
2967  auto it1 = tc1.cbegin() + i*q;
2968  for (size_type l = 0; l < q; ++l) {
2969  auto it2 = tc2.cbegin() + l*s2;
2970  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2971  auto itt1 = it1, itt2 = it2;
2972  *it = *itt1 * (*itt2);
2973  for (size_type m = 1; m < n; ++m) {
2974  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2975  }
2976  }
2977  }
2978  }
2979  return 0;
2980  }
2981  ga_instruction_contraction_opt2_0(base_tensor &t_,
2982  const base_tensor &tc1_,
2983  const base_tensor &tc2_,
2984  size_type n_, size_type q_)
2985  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2986  };
2987 
2988  // Performs Ani Bmi -> Cmn
2989  template <int N>
2990  struct ga_instruction_contraction_opt2_0_unrolled : public ga_instruction {
2991  base_tensor &t;
2992  const base_tensor &tc1, &tc2;
2993  size_type q;
2994  virtual int exec() {
2995  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q
2996  << " optimized for vectorized second tensor of type 2");
2997  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2998  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2999  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
3000 
3001  auto it = t.begin();
3002  auto it1 = tc1.cbegin();
3003  for (size_type i = 0; i < s1_q; ++i, it1 += q) {
3004  for (size_type l = 0; l < q; ++l) {
3005  auto it2 = tc2.cbegin() + l*s2;
3006  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
3007  auto itt1 = it1, itt2 = it2;
3008  *it = *itt1 * (*itt2);
3009  for (size_type m = 1; m < N; ++m) {
3010  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3011  }
3012  }
3013  }
3014  }
3015  return 0;
3016  }
3017  ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_,
3018  const base_tensor &tc1_,
3019  const base_tensor &tc2_,
3020  size_type q_)
3021  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
3022  };
3023 
3024  // Performs Ani Bmi -> Cmn
3025  template <int N, int Q>
3026  struct ga_instruction_contraction_opt2_0_dunrolled : public ga_instruction {
3027  base_tensor &t;
3028  const base_tensor &tc1, &tc2;
3029  virtual int exec() {
3030  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
3031  << " optimized for vectorized second tensor of type 2");
3032  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
3033  size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
3034  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
3035 
3036  auto it = t.begin();
3037  auto it1 = tc1.cbegin();
3038  for (size_type i = 0; i < s1_q; ++i, it1 += Q) {
3039  for (size_type l = 0; l < Q; ++l) {
3040  auto it2 = tc2.cbegin() + l*s2;
3041  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
3042  auto itt1 = it1, itt2 = it2;
3043  *it = *itt1 * (*itt2);
3044  for (size_type m = 1; m < N; ++m) {
3045  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3046  }
3047  }
3048  }
3049  }
3050  return 0;
3051  }
3052  ga_instruction_contraction_opt2_0_dunrolled(base_tensor &t_,
3053  const base_tensor &tc1_,
3054  const base_tensor &tc2_)
3055  : t(t_), tc1(tc1_), tc2(tc2_) {}
3056  };
3057 
3058  // Performs Ani Bmi -> Cmn
3059  struct ga_instruction_contraction_opt0_1 : public ga_instruction {
3060  base_tensor &t;
3061  const base_tensor &tc1, &tc2;
3062  size_type nn;
3063  virtual int exec() {
3064  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
3065  " optimized for vectorized second tensor of type 1");
3066  size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
3067 
3068  auto it = t.begin();
3069  auto it1 = tc1.cbegin();
3070  for (size_type i = 0; i < s1; ++i, ++it1) {
3071  auto it2 = tc2.cbegin();
3072  for (size_type j = 0; j < s2_n; ++j) {
3073  if (j) it2 += nn;
3074  auto itt1 = it1;
3075  *it++ = (*itt1) * (*it2);
3076  for (size_type k = 1; k < nn; ++k)
3077  { itt1 += s1; *it++ = (*itt1) * (*it2); }
3078  }
3079  }
3080  return 0;
3081  }
3082  ga_instruction_contraction_opt0_1(base_tensor &t_,
3083  const base_tensor &tc1_,
3084  const base_tensor &tc2_,
3085  size_type n_)
3086  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3087  };
3088 
3089  template<int N> inline void reduc_elem_unrolled_opt1_
3090  (const base_vector::iterator &it, const base_vector::const_iterator &it1,
3091  scalar_type a, size_type s1) {
3092  it[N-1] = it1[(N-1)*s1] * a;
3093  reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
3094  }
3095  template<> inline void reduc_elem_unrolled_opt1_<1>
3096  (const base_vector::iterator &it, const base_vector::const_iterator &it1,
3097  scalar_type a, size_type /* s1 */)
3098  { *it = (*it1) * a; }
3099 
3100  // Performs Ani Bmi -> Cmn
3101  template <int N>
3102  struct ga_instruction_contraction_opt0_1_unrolled : public ga_instruction {
3103  base_tensor &t;
3104  const base_tensor &tc1, &tc2;
3105  virtual int exec() {
3106  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N
3107  << " optimized for vectorized second tensor of type 1");
3108  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
3109  auto it = t.begin();
3110  auto it1 = tc1.cbegin();
3111  for (size_type i = 0; i < s1; ++i, ++it1) {
3112  auto it2 = tc2.cbegin(), it2e = it2 + s2;
3113  for (; it2 != it2e; it2 += N, it += N)
3114  reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
3115  }
3116  return 0;
3117  }
3118  ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_,
3119  const base_tensor &tc1_,
3120  const base_tensor &tc2_)
3121  : t(t_), tc1(tc1_), tc2(tc2_) {}
3122  };
3123 
3124  // Performs Ani Bmi -> Cmn
3125  struct ga_instruction_contraction_opt1_1 : public ga_instruction {
3126  base_tensor &t;
3127  const base_tensor &tc1, &tc2;
3128  size_type nn;
3129  virtual int exec() {
3130  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
3131  " optimized for both vectorized tensor of type 1");
3132  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
3133  GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
3134  size_type ss1 = s1/nn, ss2 = s2/nn;
3135 
3136  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
3137  auto it2 = tc2.cbegin();
3138  for (size_type j = 0; j < ss2; ++j) {
3139  if (j) it2 += nn;
3140  auto it1 = tc1.cbegin();
3141  auto it = t.begin() + j*nn;
3142  for (size_type i = 0; i < ss1; ++i) {
3143  if (i) { it1 += nn, it += s2*nn; }
3144  scalar_type a = (*it1) * (*it2);
3145  auto itt = it;
3146  *itt = a; itt += s2_1; *itt = a;
3147  for (size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
3148  }
3149  }
3150  return 0;
3151  }
3152  ga_instruction_contraction_opt1_1(base_tensor &t_,
3153  const base_tensor &tc1_,
3154  const base_tensor &tc2_, size_type n_)
3155  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3156  };
3157 
3158 
3159 
3160  // Performs Ani Bmi -> Cmn. Unrolled operation.
3161  template<int I>
3162  struct ga_instruction_contraction_unrolled
3163  : public ga_instruction {
3164  base_tensor &t;
3165  const base_tensor &tc1, &tc2;
3166  virtual int exec() {
3167  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << I);
3168  size_type N = tc1.size()/I, M = tc2.size()/I;
3169  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3170  << " != " << N << "*" << M);
3171  auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
3172  for (auto it = t.begin(); it != t.end(); ++it) {
3173  reduc_elem_unrolled__<I>(it, it1, it2, N, M);
3174  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
3175  }
3176  return 0;
3177  }
3178  ga_instruction_contraction_unrolled(base_tensor &t_,
3179  const base_tensor &tc1_,
3180  const base_tensor &tc2_)
3181  : t(t_), tc1(tc1_), tc2(tc2_) {}
3182  };
3183 
3184  // Performs An Bm -> Cmn. Unrolled operation.
3185  template<>
3186  struct ga_instruction_contraction_unrolled<1> : public ga_instruction {
3187  base_tensor &t;
3188  const base_tensor &tc1, &tc2;
3189  virtual int exec() {
3190  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size 1");
3191  size_type N = tc1.size(), M = tc2.size();
3192  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3193  << " != " << N << "*" << M);
3194 
3195  base_tensor::iterator it = t.begin();
3196  base_tensor::const_iterator it1 = tc1.cbegin();
3197  switch(M) {
3198  case(1):
3199  for (size_type n = 0; n < N; ++n, ++it1)
3200  *it++ = tc2[0] * (*it1);
3201  break;
3202  case(2):
3203  for (size_type n = 0; n < N; ++n, ++it1) {
3204  base_tensor::const_iterator it2 = tc2.cbegin();
3205  dax__<2>(it, it2, *it1);
3206  }
3207  break;
3208  case(3):
3209  for (size_type n = 0; n < N; ++n, ++it1) {
3210  base_tensor::const_iterator it2 = tc2.cbegin();
3211  dax__<4>(it, it2, *it1);
3212  }
3213  break;
3214  case(4):
3215  for (size_type n = 0; n < N; ++n, ++it1) {
3216  base_tensor::const_iterator it2 = tc2.cbegin();
3217  dax__<4>(it, it2, *it1);
3218  }
3219  break;
3220  default:
3221  const int M1 = int(M)/4;
3222  const int M2 = int(M) - M1*4;
3223  for (size_type n = 0; n < N; ++n, ++it1) {
3224  base_tensor::const_iterator it2 = tc2.cbegin();
3225  for (int mm=0; mm < M1; ++mm)
3226  dax__<4>(it, it2, *it1);
3227  for (int mm=0; mm < M2; ++mm)
3228  *it++ = (*it2++) * (*it1);
3229  }
3230  }
3231  return 0;
3232  }
3233  ga_instruction_contraction_unrolled(base_tensor &t_,
3234  const base_tensor &tc1_,
3235  const base_tensor &tc2_)
3236  : t(t_), tc1(tc1_), tc2(tc2_) {}
3237  };
3238 
3239  template<int N, int S2>
3240  inline void reduc_elem_d_unrolled__(base_tensor::iterator &it,
3241  base_tensor::const_iterator &it1,
3242  base_tensor::const_iterator &it2,
3243  size_type s1, size_type s2) {
3244  reduc_elem_unrolled__<N>(it, it1, it2, s1, s2);
3245  reduc_elem_d_unrolled__<N, S2-1>(++it, it1, ++it2, s1, s2);
3246  }
3247  // A Repeated definition is following because partial specialization
3248  // of functions is not allowed in C++ for the moment.
3249  // The gain in assembly time is small compared to the simply unrolled version
3250  template<> inline void reduc_elem_d_unrolled__<1, 0>
3251  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3252  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3253  template<> inline void reduc_elem_d_unrolled__<2, 0>
3254  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3255  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3256  template<> inline void reduc_elem_d_unrolled__<3, 0>
3257  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3258  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3259  template<> inline void reduc_elem_d_unrolled__<4, 0>
3260  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3261  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3262  template<> inline void reduc_elem_d_unrolled__<5, 0>
3263  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3264  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3265  template<> inline void reduc_elem_d_unrolled__<6, 0>
3266  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3267  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3268  template<> inline void reduc_elem_d_unrolled__<7, 0>
3269  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3270  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3271  template<> inline void reduc_elem_d_unrolled__<8, 0>
3272  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3273  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3274  template<> inline void reduc_elem_d_unrolled__<9, 0>
3275  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3276  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3277  template<> inline void reduc_elem_d_unrolled__<10, 0>
3278  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3279  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3280  template<> inline void reduc_elem_d_unrolled__<11, 0>
3281  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3282  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3283  template<> inline void reduc_elem_d_unrolled__<12, 0>
3284  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3285  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3286  template<> inline void reduc_elem_d_unrolled__<13, 0>
3287  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3288  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3289  template<> inline void reduc_elem_d_unrolled__<14, 0>
3290  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3291  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3292  template<> inline void reduc_elem_d_unrolled__<15, 0>
3293  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3294  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3295  template<> inline void reduc_elem_d_unrolled__<16, 0>
3296  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3297  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3298 
3299  // Performs Ani Bmi -> Cmn. Automatically doubly unrolled operation
3300  // (for uniform meshes).
3301  template<int I, int M>
3302  struct ga_ins_red_d_unrolled : public ga_instruction {
3303  base_tensor &t;
3304  const base_tensor &tc1, &tc2;
3305  virtual int exec() {
3306  GA_DEBUG_INFO("Instruction: doubly unrolled contraction operation of size "
3307  << M << "x" << I);
3308  size_type N = tc1.size()/I, M_ = tc2.size()/I;
3309  GA_DEBUG_ASSERT(M_ == M, "Internal error");
3310  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3311  << " != " << N << "*" << M);
3312  auto it = t.begin();
3313  auto it1 = tc1.cbegin();
3314  for (size_type n = 0; n < N; ++n, ++it1) {
3315  auto it2 = tc2.cbegin();
3316  reduc_elem_d_unrolled__<I, M>(it, it1, it2, N, M); // M argument is known at compile time it can be optimized
3317  }
3318  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3319  return 0;
3320  }
3321  ga_ins_red_d_unrolled(base_tensor &t_,
3322  const base_tensor &tc1_, const base_tensor &tc2_)
3323  : t(t_), tc1(tc1_), tc2(tc2_) {}
3324  };
3325 
3326 
3327  pga_instruction ga_instruction_contraction_switch
3328  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3329  size_type n, bool &to_clear) {
3330  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3331 
3332  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3333  tc1_.qdim() == n && tc2_.qdim() == n) {
3334  to_clear = true;
3335  t_.set_sparsity(10, tc1_.qdim());
3336  return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
3337  }
3338 
3339  if (tc2_.sparsity() == 1) {
3340  switch(n) {
3341  case 2:
3342  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3343  (t, tc1, tc2);
3344  case 3:
3345  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3346  (t, tc1, tc2);
3347  case 4:
3348  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3349  (t, tc1, tc2);
3350  case 5:
3351  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3352  (t, tc1, tc2);
3353  default:
3354  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2,n);
3355  }
3356  }
3357  if (tc2_.sparsity() == 2) {
3358  size_type q2 = tc2.sizes()[1];
3359  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3360  if (n2*q2 == n) {
3361  switch (n2) {
3362  case 1:
3363  switch (q2) {
3364  case 2:
3365  return
3366  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3367  (t, tc1, tc2);
3368  case 3:
3369  return
3370  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3371  (t, tc1, tc2);
3372  case 4:
3373  return
3374  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3375  (t, tc1, tc2);
3376  default :
3377  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3378  (t, tc1, tc2, q2);
3379  }
3380  case 2:
3381  switch (q2) {
3382  case 2:
3383  return
3384  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3385  (t, tc1, tc2);
3386  case 3:
3387  return
3388  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3389  (t, tc1, tc2);
3390  case 4:
3391  return
3392  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3393  (t, tc1, tc2);
3394  default :
3395  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3396  (t, tc1, tc2, q2);
3397  }
3398  case 3:
3399  switch (q2) {
3400  case 2:
3401  return
3402  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3403  (t, tc1, tc2);
3404  case 3:
3405  return
3406  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3407  (t, tc1, tc2);
3408  case 4:
3409  return
3410  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3411  (t, tc1, tc2);
3412  default :
3413  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3414  (t, tc1, tc2, q2);
3415  }
3416  case 4:
3417  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3418  (t, tc1, tc2, q2);
3419  case 5:
3420  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3421  (t, tc1, tc2, q2);
3422  default:
3423  return std::make_shared<ga_instruction_contraction_opt0_2>
3424  (t,tc1,tc2,n2,q2);
3425  }
3426  }
3427  }
3428  if (tc1_.sparsity() == 2) {
3429  size_type q1 = tc1.sizes()[1];
3430  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3431  if (n1*q1 == n) {
3432  switch (n1) {
3433  case 1:
3434  switch (q1) {
3435  case 2:
3436  return
3437  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3438  (t, tc1, tc2);
3439  case 3:
3440  return
3441  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3442  (t, tc1, tc2);
3443  case 4:
3444  return
3445  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3446  (t, tc1, tc2);
3447  default :
3448  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3449  (t, tc1, tc2, q1);
3450  }
3451  case 2:
3452  switch (q1) {
3453  case 2:
3454  return
3455  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3456  (t, tc1, tc2);
3457  case 3:
3458  return
3459  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3460  (t, tc1, tc2);
3461  case 4:
3462  return
3463  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3464  (t, tc1, tc2);
3465  default :
3466  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3467  (t, tc1, tc2, q1);
3468  }
3469  case 3:
3470  switch (q1) {
3471  case 2:
3472  return
3473  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3474  (t, tc1, tc2);
3475  case 3:
3476  return
3477  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3478  (t, tc1, tc2);
3479  case 4:
3480  return
3481  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3482  (t, tc1, tc2);
3483  default :
3484  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3485  (t, tc1, tc2, q1);
3486  }
3487  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3488  (t, tc1, tc2, q1);
3489  case 4:
3490  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3491  (t, tc1, tc2, q1);
3492  case 5:
3493  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3494  (t, tc1, tc2, q1);
3495  default:
3496  return std::make_shared<ga_instruction_contraction_opt2_0>
3497  (t,tc1,tc2, n1, q1);
3498  }
3499  }
3500  }
3501 
3502  switch(n) {
3503  case 1 : return std::make_shared<ga_instruction_contraction_unrolled< 1>>
3504  (t, tc1, tc2);
3505  case 2 : return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3506  (t, tc1, tc2);
3507  case 3 : return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3508  (t, tc1, tc2);
3509  case 4 : return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3510  (t, tc1, tc2);
3511  case 5 : return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3512  (t, tc1, tc2);
3513  case 6 : return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3514  (t, tc1, tc2);
3515  // above 6 it is decided inside ga_instruction_contraction::exec() whether
3516  // an unrolled loop or dgemm is used
3517  default : return std::make_shared<ga_instruction_contraction>
3518  (t, tc1, tc2, n);
3519  }
3520  }
3521 
3522  pga_instruction ga_uniform_instruction_contraction_switch
3523  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3524  size_type n, bool &to_clear) {
3525  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3526 
3527  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3528  tc1_.qdim() == n && tc2_.qdim() == n) {
3529  to_clear = true;
3530  t_.set_sparsity(10, tc1_.qdim());
3531  return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3532  }
3533  if (tc2_.sparsity() == 1) {
3534  switch(n) {
3535  case 2:
3536  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3537  (t, tc1, tc2);
3538  case 3:
3539  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3540  (t, tc1, tc2);
3541  case 4:
3542  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3543  (t, tc1, tc2);
3544  case 5:
3545  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3546  (t, tc1, tc2);
3547  default:
3548  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3549  }
3550  }
3551  if (tc2_.sparsity() == 2) {
3552  size_type q2 = tc2.sizes()[1];
3553  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3554  if (n2*q2 == n) {
3555  switch (n2) {
3556  case 1:
3557  switch (q2) {
3558  case 2:
3559  return
3560  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3561  (t, tc1, tc2);
3562  case 3:
3563  return
3564  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3565  (t, tc1, tc2);
3566  case 4:
3567  return
3568  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3569  (t, tc1, tc2);
3570  default :
3571  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3572  (t, tc1, tc2, q2);
3573  }
3574  case 2:
3575  switch (q2) {
3576  case 2:
3577  return
3578  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3579  (t, tc1, tc2);
3580  case 3:
3581  return
3582  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3583  (t, tc1, tc2);
3584  case 4:
3585  return
3586  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3587  (t, tc1, tc2);
3588  default :
3589  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3590  (t, tc1, tc2, q2);
3591  }
3592  case 3:
3593  switch (q2) {
3594  case 2:
3595  return
3596  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3597  (t, tc1, tc2);
3598  case 3:
3599  return
3600  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3601  (t, tc1, tc2);
3602  case 4:
3603  return
3604  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3605  (t, tc1, tc2);
3606  default :
3607  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3608  (t, tc1, tc2, q2);
3609  }
3610  case 4:
3611  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3612  (t, tc1, tc2, q2);
3613  case 5:
3614  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3615  (t, tc1, tc2, q2);
3616  default:
3617  return std::make_shared<ga_instruction_contraction_opt0_2>
3618  (t,tc1,tc2,n2,q2);
3619  }
3620  }
3621  }
3622  if (tc1_.sparsity() == 2) {
3623  size_type q1 = tc1.sizes()[1];
3624  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3625  if (n1*q1 == n) {
3626  switch (n1) {
3627  case 1:
3628  switch (q1) {
3629  case 2:
3630  return
3631  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3632  (t, tc1, tc2);
3633  case 3:
3634  return
3635  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3636  (t, tc1, tc2);
3637  case 4:
3638  return
3639  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3640  (t, tc1, tc2);
3641  default :
3642  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3643  (t, tc1, tc2, q1);
3644  }
3645  case 2:
3646  switch (q1) {
3647  case 2:
3648  return
3649  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3650  (t, tc1, tc2);
3651  case 3:
3652  return
3653  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3654  (t, tc1, tc2);
3655  case 4:
3656  return
3657  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3658  (t, tc1, tc2);
3659  default :
3660  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3661  (t, tc1, tc2, q1);
3662  }
3663  case 3:
3664  switch (q1) {
3665  case 2:
3666  return
3667  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3668  (t, tc1, tc2);
3669  case 3:
3670  return
3671  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3672  (t, tc1, tc2);
3673  case 4:
3674  return
3675  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3676  (t, tc1, tc2);
3677  default :
3678  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3679  (t, tc1, tc2, q1);
3680  }
3681  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3682  (t, tc1, tc2, q1);
3683  case 4:
3684  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3685  (t, tc1, tc2, q1);
3686  case 5:
3687  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3688  (t, tc1, tc2, q1);
3689  default:
3690  return std::make_shared<ga_instruction_contraction_opt2_0>
3691  (t,tc1,tc2, n1, q1);
3692  }
3693  }
3694  }
3695 
3696  // Only specialized for certain values
3697  size_type s2 = tc2.size()/n;
3698  switch(s2) {
3699  case 1 :
3700  switch(n) {
3701  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3702  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3703  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3704  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3705  }
3706  case 2 :
3707  switch(n) {
3708  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3709  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3710  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3711  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3712  }
3713  case 3 :
3714  switch(n) {
3715  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3716  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3717  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3718  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3719  }
3720  case 4 :
3721  switch(n) {
3722  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3723  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3724  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3725  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3726  }
3727  case 5 :
3728  switch(n) {
3729  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3730  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3731  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3732  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3733  }
3734  case 6 :
3735  switch(n) {
3736  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3737  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3738  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3739  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3740  }
3741  case 7 :
3742  switch(n) {
3743  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3744  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3745  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3746  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3747  }
3748  case 8 :
3749  switch(n) {
3750  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3751  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3752  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3753  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3754  }
3755  case 9 :
3756  switch(n) {
3757  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3758  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3759  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3760  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3761  }
3762  case 10:
3763  switch(n) {
3764  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3765  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3766  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3767  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3768  }
3769  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3770  }
3771  }
3772 
3773 
3774  // Performs Amij Bnj -> Cmni. To be optimized.
3775  struct ga_instruction_spec_contraction : public ga_instruction {
3776  base_tensor &t;
3777  const base_tensor &tc1, &tc2;
3778  size_type nn;
3779  virtual int exec() {
3780  GA_DEBUG_INFO("Instruction: specific contraction operation of "
3781  "size " << nn);
3782  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3783  size_type s2 = tc2.sizes()[0];
3784  base_tensor::iterator it = t.begin();
3785  for (size_type i = 0; i < s11; ++i)
3786  for (size_type n = 0; n < s2; ++n)
3787  for (size_type m = 0; m < s1; ++m, ++it) {
3788  *it = scalar_type(0);
3789  for (size_type j = 0; j < nn; ++j)
3790  *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3791  }
3792  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3793  return 0;
3794  }
3795  ga_instruction_spec_contraction(base_tensor &t_,
3796  const base_tensor &tc1_,
3797  const base_tensor &tc2_, size_type n_)
3798  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3799  };
3800 
3801  // Performs Amik Bnjk -> Cmnij. To be optimized.
3802  struct ga_instruction_spec2_contraction : public ga_instruction {
3803  base_tensor &t;
3804  const base_tensor &tc1, &tc2;
3805  size_type nn;
3806  virtual int exec() {
3807  GA_DEBUG_INFO("Instruction: second specific contraction operation of "
3808  "size " << nn);
3809  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3810  size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3811  base_tensor::iterator it = t.begin();
3812  for (size_type j = 0; j < s22; ++j)
3813  for (size_type i = 0; i < s11; ++i)
3814  for (size_type m = 0; m < s1; ++m)
3815  for (size_type n = 0; n < s2; ++n, ++it) {
3816  *it = scalar_type(0);
3817  for (size_type k = 0; k < nn; ++k)
3818  *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3819  }
3820  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3821  return 0;
3822  }
3823  ga_instruction_spec2_contraction(base_tensor &t_,
3824  const base_tensor &tc1_,
3825  const base_tensor &tc2_, size_type n_)
3826  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3827  };
3828 
3829  // Performs Aij Bkl -> Cijkl
3830  struct ga_instruction_simple_tmult : public ga_instruction {
3831  base_tensor &t;
3832  const base_tensor &tc1, &tc2;
3833  virtual int exec() {
3834  GA_DEBUG_INFO("Instruction: simple tensor product");
3835  size_type s1 = tc1.size();
3836  GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(), "Wrong sizes");
3837  base_tensor::const_iterator it2=tc2.cbegin(), it1=tc1.cbegin(), it1end=it1 + s1;
3838  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3839  *it = *(it2) * (*it1);
3840  if (++it1 == it1end) { it1 = tc1.cbegin(), ++it2; }
3841  }
3842  return 0;
3843  }
3844  ga_instruction_simple_tmult(base_tensor &t_,
3845  const base_tensor &tc1_, const base_tensor &tc2_)
3846  : t(t_), tc1(tc1_), tc2(tc2_) {}
3847  };
3848 
3849 
3850  // Performs Aij Bkl -> Cijkl, partially unrolled version
3851  template<int IJ> struct ga_instruction_simple_tmult_unrolled
3852  : public ga_instruction {
3853  base_tensor &t;
3854  const base_tensor &tc1, &tc2;
3855  virtual int exec() {
3856  size_type KL = tc2.size();
3857  GA_DEBUG_ASSERT(tc1.size() == IJ,
3858  "Wrong sizes " << tc1.size() << " != " << IJ);
3859  GA_DEBUG_INFO("Instruction: simple tensor product, unrolled with "
3860  << IJ << " operations");
3861  GA_DEBUG_ASSERT(t.size() == IJ * KL,
3862  "Wrong sizes " << t.size() << " != " << IJ << "*" << KL);
3863 #if 0 // too slow, how can this be? that's what dger should be good at. (it is slower even without the std::fill overhead)
3864  const BLAS_INT IJ_=BLAS_INT(IJ), KL_=BLAS_INT(KL), INC(1);
3865  const scalar_type one(1);
3866  std::fill(t.begin(), t.end(), scalar_type(0));
3867  gmm::dger_(&IJ_, &KL_, &one, &tc1[0], &INC, &tc2[0], &INC, &(t[0]), &IJ_);
3868 #else
3869  base_tensor::iterator it = t.begin();
3870  base_tensor::const_iterator it2 = tc2.cbegin();
3871  for (size_type kl = 0; kl < KL; ++kl, ++it2) {
3872  base_tensor::const_iterator it1 = tc1.cbegin();
3873  dax__<IJ>(it, it1, *it2);
3874  }
3875  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3876 #endif
3877  return 0;
3878  }
3879  ga_instruction_simple_tmult_unrolled(base_tensor &t_,
3880  const base_tensor &tc1_,
3881  const base_tensor &tc2_)
3882  : t(t_), tc1(tc1_), tc2(tc2_) {}
3883  };
3884 
3885  pga_instruction ga_uniform_instruction_simple_tmult
3886  (base_tensor &t, const base_tensor &tc1, const base_tensor &tc2) {
3887  switch(tc1.size()) {
3888  case 1 : GMM_ASSERT1(false, "size 1 should not happen");
3889  case 2 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3890  (t, tc1, tc2);
3891  case 3 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3892  (t, tc1, tc2);
3893  case 4 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3894  (t, tc1, tc2);
3895  case 5 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3896  (t, tc1, tc2);
3897  case 6 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3898  (t, tc1, tc2);
3899  case 7 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3900  (t, tc1, tc2);
3901  case 8 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3902  (t, tc1, tc2);
3903  case 9 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3904  (t, tc1, tc2);
3905  case 10 : return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3906  (t, tc1, tc2);
3907  case 11 : return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3908  (t, tc1, tc2);
3909  case 12 : return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3910  (t, tc1, tc2);
3911  case 13 : return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3912  (t, tc1, tc2);
3913  case 14 : return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3914  (t, tc1, tc2);
3915  case 15 : return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3916  (t, tc1, tc2);
3917  case 16 : return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3918  (t, tc1, tc2);
3919  default : return std::make_shared<ga_instruction_simple_tmult>
3920  (t, tc1, tc2);
3921  }
3922  }
3923 
3924 
3925  // Performs Ami Bnj -> Cmnij. To be optimized.
3926  struct ga_instruction_spec_tmult : public ga_instruction {
3927  base_tensor &t;
3928  const base_tensor &tc1, &tc2;
3929  const size_type I, J;
3930  virtual int exec() {
3931  GA_DEBUG_INFO("Instruction: specific tensor product");
3932  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3933  const size_type M = tc1.size() / I,
3934  N = tc2.size() / J;
3935  auto it = t.begin();
3936 #if 1 // there could be a smarter way to implement this, but this hardcoded version is fast and robust
3937  switch(M) {
3938  case 1:
3939  for (size_type j = 0; j < J; ++j)
3940  for (auto it1 = tc1.cbegin(); it1 != tc1.end(); ++it1)
3941  for (size_type n = 0; n < N; ++n)
3942  *it++ = (*it1) * tc2[n+N*j];
3943  break;
3944  case 2:
3945  for (size_type j = 0; j < J; ++j)
3946  for (size_type i = 0; i < I; ++i)
3947  for (size_type n = 0; n < N; ++n) {
3948  auto it1 = tc1.cbegin() + M*i;
3949  dax__<2>(it, it1, tc2[n+N*j]);
3950  }
3951  break;
3952  case 3:
3953  for (size_type j = 0; j < J; ++j)
3954  for (size_type i = 0; i < I; ++i)
3955  for (size_type n = 0; n < N; ++n) {
3956  auto it1 = tc1.cbegin() + M*i;
3957  dax__<3>(it, it1, tc2[n+N*j]);
3958  }
3959  break;
3960  case 4:
3961  for (size_type j = 0; j < J; ++j)
3962  for (size_type i = 0; i < I; ++i)
3963  for (size_type n = 0; n < N; ++n) {
3964  auto it1 = tc1.cbegin() + M*i;
3965  dax__<4>(it, it1, tc2[n+N*j]);
3966  }
3967  break;
3968  case 5:
3969  for (size_type j = 0; j < J; ++j)
3970  for (size_type i = 0; i < I; ++i)
3971  for (size_type n = 0; n < N; ++n) {
3972  auto it1 = tc1.cbegin() + M*i;
3973  dax__<5>(it, it1, tc2[n+N*j]);
3974  }
3975  break;
3976  case 6:
3977  for (size_type j = 0; j < J; ++j)
3978  for (size_type i = 0; i < I; ++i)
3979  for (size_type n = 0; n < N; ++n) {
3980  auto it1 = tc1.cbegin() + M*i;
3981  dax__<6>(it, it1, tc2[n+N*j]);
3982  }
3983  break;
3984  case 7:
3985  for (size_type j = 0; j < J; ++j)
3986  for (size_type i = 0; i < I; ++i)
3987  for (size_type n = 0; n < N; ++n) {
3988  auto it1 = tc1.cbegin() + M*i;
3989  dax__<7>(it, it1, tc2[n+N*j]);
3990  }
3991  break;
3992  case 8:
3993  for (size_type j = 0; j < J; ++j)
3994  for (size_type i = 0; i < I; ++i)
3995  for (size_type n = 0; n < N; ++n) {
3996  auto it1 = tc1.cbegin() + M*i;
3997  dax__<8>(it, it1, tc2[n+N*j]);
3998  }
3999  break;
4000  default:
4001  const int M1 = int(M)/8;
4002  const int M2 = int(M) - M1*8;
4003  switch(M2) {
4004  case 0:
4005  for (size_type j = 0; j < J; ++j)
4006  for (size_type i = 0; i < I; ++i)
4007  for (size_type n = 0; n < N; ++n) {
4008  auto it1 = tc1.cbegin() + M*i;
4009  for (int mm=0; mm < M1; ++mm)
4010  dax__<8>(it, it1, tc2[n+N*j]);
4011  }
4012  break;
4013  case 1:
4014  for (size_type j = 0; j < J; ++j)
4015  for (size_type i = 0; i < I; ++i)
4016  for (size_type n = 0; n < N; ++n) {
4017  auto it1 = tc1.cbegin() + M*i;
4018  for (int mm=0; mm < M1; ++mm)
4019  dax__<8>(it, it1, tc2[n+N*j]);
4020  dax__<1>(it, it1, tc2[n+N*j]);
4021  }
4022  break;
4023  case 2:
4024  for (size_type j = 0; j < J; ++j)
4025  for (size_type i = 0; i < I; ++i)
4026  for (size_type n = 0; n < N; ++n) {
4027  auto it1 = tc1.cbegin() + M*i;
4028  for (int mm=0; mm < M1; ++mm)
4029  dax__<8>(it, it1, tc2[n+N*j]);
4030  dax__<2>(it, it1, tc2[n+N*j]);
4031  }
4032  break;
4033  case 3:
4034  for (size_type j = 0; j < J; ++j)
4035  for (size_type i = 0; i < I; ++i)
4036  for (size_type n = 0; n < N; ++n) {
4037  auto it1 = tc1.cbegin() + M*i;
4038  for (int mm=0; mm < M1; ++mm)
4039  dax__<8>(it, it1, tc2[n+N*j]);
4040  dax__<3>(it, it1, tc2[n+N*j]);
4041  }
4042  break;
4043  case 4:
4044  for (size_type j = 0; j < J; ++j)
4045  for (size_type i = 0; i < I; ++i)
4046  for (size_type n = 0; n < N; ++n) {
4047  auto it1 = tc1.cbegin() + M*i;
4048  for (int mm=0; mm < M1; ++mm)
4049  dax__<8>(it, it1, tc2[n+N*j]);
4050  dax__<4>(it, it1, tc2[n+N*j]);
4051  }
4052  break;
4053  case 5:
4054  for (size_type j = 0; j < J; ++j)
4055  for (size_type i = 0; i < I; ++i)
4056  for (size_type n = 0; n < N; ++n) {
4057  auto it1 = tc1.cbegin() + M*i;
4058  for (int mm=0; mm < M1; ++mm)
4059  dax__<8>(it, it1, tc2[n+N*j]);
4060  dax__<5>(it, it1, tc2[n+N*j]);
4061  }
4062  break;
4063  case 6:
4064  for (size_type j = 0; j < J; ++j)
4065  for (size_type i = 0; i < I; ++i)
4066  for (size_type n = 0; n < N; ++n) {
4067  auto it1 = tc1.cbegin() + M*i;
4068  for (int mm=0; mm < M1; ++mm)
4069  dax__<8>(it, it1, tc2[n+N*j]);
4070  dax__<6>(it, it1, tc2[n+N*j]);
4071  }
4072  break;
4073  case 7:
4074  for (size_type j = 0; j < J; ++j)
4075  for (size_type i = 0; i < I; ++i)
4076  for (size_type n = 0; n < N; ++n) {
4077  auto it1 = tc1.cbegin() + M*i;
4078  for (int mm=0; mm < M1; ++mm)
4079  dax__<8>(it, it1, tc2[n+N*j]);
4080  dax__<7>(it, it1, tc2[n+N*j]);
4081  }
4082  break;
4083  default:
4084  GMM_ASSERT1(false, "should not happen");
4085  }
4086  }
4087  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4088 #else // runtime performance of this implementation often affected by totally unrelated changes
4089  // even if it actually compiles to the same assembly instructions
4090  for (size_type j = 0; j < J; ++j)
4091  for (size_type i = 0; i < I; ++i)
4092  for (size_type n = 0; n < N; ++n) {
4093  scalar_type val = tc2[n+N*j];
4094  for (size_type m = 0; m < M; ++m, ++it)
4095  *it = tc1[m+M*i] * val;
4096  }
4097  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4098 #endif
4099  return 0;
4100  }
4101  ga_instruction_spec_tmult(base_tensor &t_,
4102  const base_tensor &tc1_,
4103  const base_tensor &tc2_,
4104  size_type I_, size_type J_)
4105  : t(t_), tc1(tc1_), tc2(tc2_), I(I_), J(J_) {}
4106  };
4107 
4108  // Performs Ai Bmj -> Cmij. To be optimized.
4109  struct ga_instruction_spec2_tmult : public ga_instruction {
4110  base_tensor &t;
4111  const base_tensor &tc1, &tc2;
4112  virtual int exec() {
4113  GA_DEBUG_INFO("Instruction: second specific tensor product");
4114  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
4115  size_type I = tc1.size();
4116  size_type M = tc2.sizes()[0], J = tc2.size() / M;
4117 
4118  base_tensor::iterator it = t.begin();
4119  for (size_type j = 0; j < J; ++j)
4120  for (size_type i = 0; i < I; ++i)
4121  for (size_type m = 0; m < M; ++m, ++it)
4122  *it = tc1[i] * tc2[m+M*j];
4123  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4124  return 0;
4125  }
4126  ga_instruction_spec2_tmult(base_tensor &t_,
4127  const base_tensor &tc1_, const base_tensor &tc2_)
4128  : t(t_), tc1(tc1_), tc2(tc2_) {}
4129  };
4130 
4131 
4132 
4133  struct ga_instruction_simple_c_matrix : public ga_instruction {
4134  base_tensor &t;
4135  std::vector<scalar_type *> components;
4136  virtual int exec() {
4137  GA_DEBUG_INFO("Instruction: gathering components for explicit "
4138  "matrix");
4139  GA_DEBUG_ASSERT(t.size() == components.size(), "Wrong sizes");
4140  for (size_type i = 0; i < components.size(); ++i)
4141  t[i] = *(components[i]);
4142  return 0;
4143  }
4144  ga_instruction_simple_c_matrix(base_tensor &t_,
4145  std::vector<scalar_type *> &components_)
4146  : t(t_), components(components_) {}
4147  };
4148 
4149  struct ga_instruction_c_matrix_with_tests : public ga_instruction {
4150  base_tensor &t;
4151  const std::vector<const base_tensor *> components;
4152  virtual int exec() {
4153  GA_DEBUG_INFO("Instruction: gathering components for explicit "
4154  "matrix with tests functions");
4155  size_type s = t.size() / components.size();
4156  GA_DEBUG_ASSERT(s, "Wrong sizes");
4157  base_tensor::iterator it = t.begin();
4158  for (size_type i = 0; i < components.size(); ++i) {
4159  const base_tensor &t1 = *(components[i]);
4160  if (t1.size() > 1) {
4161  GA_DEBUG_ASSERT(t1.size() == s, "Wrong sizes, " << t1.size()
4162  << " != " << s);
4163  for (size_type j = 0; j < s; ++j) *it++ = t1[j];
4164  } else {
4165  for (size_type j = 0; j < s; ++j) *it++ = t1[0];
4166  }
4167  }
4168  return 0;
4169  }
4170  ga_instruction_c_matrix_with_tests
4171  (base_tensor &t_, const std::vector<const base_tensor *> &components_)
4172  : t(t_), components(components_) {}
4173  };
4174 
4175  struct ga_instruction_eval_func_1arg_1res : public ga_instruction {
4176  scalar_type &t;
4177  const scalar_type &c;
4178  pscalar_func_onearg f1;
4179  virtual int exec() {
4180  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4181  "predefined function on a scalar");
4182  t = (*f1)(c);
4183  return 0;
4184  }
4185  ga_instruction_eval_func_1arg_1res(scalar_type &t_, const scalar_type &c_,
4186  pscalar_func_onearg f1_)
4187  : t(t_), c(c_), f1(f1_) {}
4188  };
4189 
4190  struct ga_instruction_eval_func_1arg_1res_expr : public ga_instruction {
4191  scalar_type &t;
4192  const scalar_type &c;
4193  const ga_predef_function &F;
4194  virtual int exec() {
4195  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4196  "predefined function on a scalar");
4197  t = F(c);
4198  return 0;
4199  }
4200  ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
4201  const scalar_type &c_,
4202  const ga_predef_function &F_)
4203  : t(t_), c(c_), F(F_) {}
4204  };
4205 
4206  struct ga_instruction_eval_func_1arg : public ga_instruction {
4207  base_tensor &t;
4208  const base_tensor &tc1;
4209  pscalar_func_onearg f1;
4210  virtual int exec() {
4211  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4212  "predefined function on tensor");
4213  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4214  for (size_type i = 0; i < t.size(); ++i)
4215  t[i] = (*f1)(tc1[i]);
4216  return 0;
4217  }
4218  ga_instruction_eval_func_1arg(base_tensor &t_,
4219  const base_tensor &c_, pscalar_func_onearg f1_)
4220  : t(t_), tc1(c_), f1(f1_) {}
4221  };
4222 
4223  struct ga_instruction_eval_func_1arg_expr : public ga_instruction {
4224  base_tensor &t;
4225  const base_tensor &tc1;
4226  const ga_predef_function &F;
4227  virtual int exec() {
4228  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4229  "predefined function on tensor");
4230  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4231  for (size_type i = 0; i < t.size(); ++i)
4232  t[i] = F(tc1[i]);
4233  return 0;
4234  }
4235  ga_instruction_eval_func_1arg_expr(base_tensor &t_,
4236  const base_tensor &c_,
4237  const ga_predef_function &F_)
4238  : t(t_), tc1(c_), F(F_) {}
4239  };
4240 
4241  struct ga_instruction_eval_func_2arg_1res : public ga_instruction {
4242  scalar_type &t;
4243  const scalar_type &c, &d;
4244  pscalar_func_twoargs f2;
4245  virtual int exec() {
4246  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4247  "predefined function on two scalar");
4248  t = (*f2)(c, d);
4249  return 0;
4250  }
4251  ga_instruction_eval_func_2arg_1res(scalar_type &t_, const scalar_type &c_,
4252  const scalar_type &d_,
4253  pscalar_func_twoargs f2_)
4254  : t(t_), c(c_), d(d_), f2(f2_) {}
4255  };
4256 
4257  struct ga_instruction_eval_func_2arg_1res_expr : public ga_instruction {
4258  scalar_type &t;
4259  const scalar_type &c, &d;
4260  const ga_predef_function &F;
4261  virtual int exec() {
4262  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4263  "predefined function on two scalar");
4264  t = F(c, d);
4265  return 0;
4266  }
4267  ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
4268  const scalar_type &c_,
4269  const scalar_type &d_,
4270  const ga_predef_function &F_)
4271  : t(t_), c(c_), d(d_), F(F_) {}
4272  };
4273 
4274  struct ga_instruction_eval_func_2arg_first_scalar : public ga_instruction {
4275  base_tensor &t;
4276  const base_tensor &tc1, &tc2;
4277  pscalar_func_twoargs f2;
4278  virtual int exec() {
4279  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4280  "predefined function on one scalar and one tensor");
4281  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
4282  for (size_type i = 0; i < t.size(); ++i)
4283  t[i] = (*f2)(tc1[0], tc2[i]);
4284  return 0;
4285  }
4286  ga_instruction_eval_func_2arg_first_scalar(base_tensor &t_,
4287  const base_tensor &c_,
4288  const base_tensor &d_,
4289  pscalar_func_twoargs f2_)
4290  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4291  };
4292 
4293  struct ga_instruction_eval_func_2arg_first_scalar_expr
4294  : public ga_instruction {
4295  base_tensor &t;
4296  const base_tensor &tc1, &tc2;
4297  const ga_predef_function &F;
4298  virtual int exec() {
4299  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4300  "predefined function on one scalar and one tensor");
4301  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
4302  for (size_type i = 0; i < t.size(); ++i)
4303  t[i] = F(tc1[0], tc2[i]);
4304  return 0;
4305  }
4306  ga_instruction_eval_func_2arg_first_scalar_expr(base_tensor &t_,
4307  const base_tensor &c_,
4308  const base_tensor &d_,
4309  const ga_predef_function &F_)
4310  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4311  };
4312 
4313  struct ga_instruction_eval_func_2arg_second_scalar : public ga_instruction {
4314  base_tensor &t;
4315  const base_tensor &tc1, &tc2;
4316  pscalar_func_twoargs f2;
4317  virtual int exec() {
4318  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4319  "predefined function on one tensor and one scalar");
4320  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4321  for (size_type i = 0; i < t.size(); ++i)
4322  t[i] = (*f2)(tc1[i], tc2[0]);
4323  return 0;
4324  }
4325  ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
4326  const base_tensor &c_,
4327  const base_tensor &d_,
4328  pscalar_func_twoargs f2_)
4329  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4330  };
4331 
4332  struct ga_instruction_eval_func_2arg_second_scalar_expr
4333  : public ga_instruction {
4334  base_tensor &t;
4335  const base_tensor &tc1, &tc2;
4336  const ga_predef_function &F;
4337  virtual int exec() {
4338  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4339  "predefined function on one tensor and one scalar");
4340  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4341  for (size_type i = 0; i < t.size(); ++i)
4342  t[i] = F(tc1[i], tc2[0]);
4343  return 0;
4344  }
4345  ga_instruction_eval_func_2arg_second_scalar_expr(base_tensor &t_,
4346  const base_tensor &c_,
4347  const base_tensor &d_,
4348  const ga_predef_function &F_)
4349  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4350  };
4351 
4352  struct ga_instruction_eval_func_2arg : public ga_instruction {
4353  base_tensor &t;
4354  const base_tensor &tc1, &tc2;
4355  pscalar_func_twoargs f2;
4356  virtual int exec() {
4357  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4358  "predefined function on two tensors");
4359  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4360  "Wrong sizes");
4361  for (size_type i = 0; i < t.size(); ++i)
4362  t[i] = (*f2)(tc1[i], tc2[i]);
4363  return 0;
4364  }
4365  ga_instruction_eval_func_2arg(base_tensor &t_,
4366  const base_tensor &c_,
4367  const base_tensor &d_, pscalar_func_twoargs f2_)
4368  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4369  };
4370 
4371  struct ga_instruction_eval_func_2arg_expr : public ga_instruction {
4372  base_tensor &t;
4373  const base_tensor &tc1, &tc2;
4374  const ga_predef_function &F;
4375  virtual int exec() {
4376  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4377  "predefined function on two tensors");
4378  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4379  "Wrong sizes");
4380  for (size_type i = 0; i < t.size(); ++i)
4381  t[i] = F(tc1[i], tc2[i]);
4382  return 0;
4383  }
4384  ga_instruction_eval_func_2arg_expr(base_tensor &t_,
4385  const base_tensor &c_,
4386  const base_tensor &d_,
4387  const ga_predef_function &F_)
4388  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4389  };
4390 
4391  struct ga_instruction_eval_OP : public ga_instruction {
4392  base_tensor &t;
4393  const ga_nonlinear_operator &OP;
4394  ga_nonlinear_operator::arg_list args;
4395  virtual int exec() {
4396  GA_DEBUG_INFO("Instruction: operator evaluation");
4397  OP.value(args, t);
4398  return 0;
4399  }
4400  ga_instruction_eval_OP(base_tensor &t_, const ga_nonlinear_operator &OP_,
4401  ga_nonlinear_operator::arg_list &args_)
4402  : t(t_), OP(OP_), args(args_) {}
4403  };
4404 
4405  struct ga_instruction_eval_derivative_OP : public ga_instruction {
4406  base_tensor &t;
4407  const ga_nonlinear_operator &OP;
4408  ga_nonlinear_operator::arg_list args;
4409  size_type der1;
4410  virtual int exec() {
4411  GA_DEBUG_INFO("Instruction: operator derivative evaluation");
4412  OP.derivative(args, der1, t);
4413  return 0;
4414  }
4415  ga_instruction_eval_derivative_OP(base_tensor &t_,
4416  const ga_nonlinear_operator &OP_,
4417  ga_nonlinear_operator::arg_list &args_,
4418  size_type der1_)
4419  : t(t_), OP(OP_), args(args_), der1(der1_) {}
4420  };
4421 
4422  struct ga_instruction_eval_second_derivative_OP : public ga_instruction {
4423  base_tensor &t;
4424  const ga_nonlinear_operator &OP;
4425  ga_nonlinear_operator::arg_list args;
4426  size_type der1, der2;
4427  virtual int exec() {
4428  GA_DEBUG_INFO("Instruction: operator second derivative evaluation");
4429  OP.second_derivative(args, der1, der2, t);
4430  return 0;
4431  }
4432  ga_instruction_eval_second_derivative_OP
4433  (base_tensor &t_, const ga_nonlinear_operator &OP_,
4434  ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
4435  : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
4436  };
4437 
4438  struct ga_instruction_tensor_slice : public ga_instruction {
4439  base_tensor &t;
4440  const base_tensor &tc1;
4441  bgeot::multi_index mi, indices;
4442  virtual int exec() {
4443  GA_DEBUG_INFO("Instruction: tensor slice");
4444  size_type order = t.sizes().size();
4445  for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
4446  mi3.incrementation(t.sizes())) {
4447  for (size_type j = 0; j < order; ++j)
4448  mi[indices[j]] = mi3[j];
4449  t(mi3) = tc1(mi);
4450  }
4451  return 0;
4452  }
4453  ga_instruction_tensor_slice(base_tensor &t_,
4454  const base_tensor &tc1_,
4455  bgeot::multi_index &mi_,
4456  bgeot::multi_index &indices_)
4457  : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
4458  };
4459 
4460  struct ga_instruction_transformation_call : public ga_instruction {
4461  const ga_workspace &workspace;
4462  ga_instruction_set::interpolate_info &inin;
4463  pinterpolate_transformation trans;
4464  fem_interpolation_context &ctx;
4465  const base_small_vector &Normal;
4466  const mesh &m;
4467  bool compute_der;
4468 
4469  virtual int exec() {
4470  GA_DEBUG_INFO("Instruction: call interpolate transformation");
4471  base_node P_ref;
4472  size_type cv;
4473  short_type face_num;
4474  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
4475  face_num, P_ref, inin.Normal,
4476  inin.derivatives, compute_der);
4477  if (inin.pt_type) {
4478  if (cv != size_type(-1)) {
4479  inin.m->points_of_convex(cv, inin.G);
4480  inin.ctx.change((inin.m)->trans_of_convex(cv),
4481  0, P_ref, inin.G, cv, face_num);
4482  inin.has_ctx = true;
4483  if (face_num != short_type(-1)) {
4484  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4485  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4486  } else
4487  inin.Normal.resize(0);
4488  inin.pt_y = inin.ctx.xreal();
4489  } else {
4490  inin.ctx.invalid_convex_num();
4491  inin.Normal.resize(0);
4492  inin.pt_y = P_ref;
4493  inin.has_ctx = false;
4494  }
4495  } else {
4496  inin.ctx.invalid_convex_num();
4497  inin.Normal.resize(0);
4498  inin.pt_y.resize(0);
4499  inin.has_ctx = false;
4500  }
4501  GA_DEBUG_INFO("Instruction: end of call interpolate transformation");
4502  return 0;
4503  }
4504  ga_instruction_transformation_call
4505  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4506  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4507  const base_small_vector &No, const mesh &mm, bool compute_der_)
4508  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4509  compute_der(compute_der_) {}
4510  };
4511 
4512  struct ga_instruction_neighbor_transformation_call : public ga_instruction {
4513  const ga_workspace &workspace;
4514  ga_instruction_set::interpolate_info &inin;
4515  pinterpolate_transformation trans;
4516  fem_interpolation_context &ctx;
4517  base_small_vector dummy_normal;
4518  const mesh &m;
4519  size_type &ipt;
4520  papprox_integration &pai;
4522  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
4523 
4524  virtual int exec() {
4525  bool cancel_optimization = false;
4526  GA_DEBUG_INFO("Instruction: call interpolate neighbor transformation");
4527  if (ipt == 0) {
4528  if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
4529  || cancel_optimization) {
4530  inin.ctx.invalid_convex_num();
4531  } else {
4532  // Test if the situation has already been encountered
4533  size_type cv = ctx.convex_num();
4534  short_type f = ctx.face_num();
4535  auto adj_face = m.adjacent_face(cv, f);
4536  if (adj_face.cv == size_type(-1)) {
4537  GMM_WARNING2("Adjacent face not found, "
4538  "probably an non-interior face");
4539  inin.ctx.invalid_convex_num();
4540  } else {
4541  gauss_pt_corresp gpc;
4542  gpc.pgt1 = m.trans_of_convex(cv);
4543  gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4544  gpc.pai = pai;
4545  auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4546  auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4547  adj_face.f);
4548  auto str1 = gpc.pgt1->structure();
4549  auto str2 = gpc.pgt2->structure();
4550  size_type nbptf1 = str1->nb_points_of_face(f);
4551  size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4552  gpc.nodes.resize(nbptf1*2);
4553  for (size_type i = 0; i < nbptf1; ++i) {
4554  gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4555  bool found = false;
4556  for (size_type j = 0; j < nbptf2; ++j) {
4557  if (inds_pt2[j] == inds_pt1[i]) {
4558  gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4559  found = true;
4560  break;
4561  }
4562  }
4563  GMM_ASSERT1(found, "Internal error");
4564  }
4565  bgeot::pstored_point_tab pspt = 0;
4566  auto itm = neighbor_corresp.find(gpc);
4567  if (itm != neighbor_corresp.end()) {
4568  pspt = itm->second;
4569  } else {
4570  size_type nbpt = pai->nb_points_on_face(f);
4572  gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4573  size_type first_ind = pai->ind_first_point_on_face(f);
4575  &spt = *(pai->pintegration_points());
4576  base_matrix G;
4577  m.points_of_convex(cv, G);
4578  fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4579  std::vector<base_node> P_ref(nbpt);
4580 
4581  for (size_type i = 0; i < nbpt; ++i) {
4582  ctx_x.set_xref(spt[first_ind+i]);
4583  bool converged = true;
4584  gic.invert(ctx_x.xreal(), P_ref[i], converged);
4585  bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4586  GMM_ASSERT1(is_in && converged,"Geometric transformation "
4587  "inversion has failed in neighbor transformation");
4588  }
4589  pspt = store_point_tab(P_ref);
4590  neighbor_corresp[gpc] = pspt;
4591  }
4592  m.points_of_convex(adj_face.cv, inin.G);
4593  bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4594  inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4595  }
4596  }
4597  }
4598 
4599  if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4600  inin.ctx.set_ii(ipt);
4601  inin.pt_type = 1;
4602  inin.has_ctx = true;
4603  inin.pt_y = inin.ctx.xreal();
4604  inin.Normal = bgeot::compute_normal(inin.ctx, inin.ctx.face_num());
4605  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4606  inin.m = &m;
4607  } else {
4608  base_node P_ref;
4609  size_type cv;
4610  short_type face_num;
4611  gmm::clear(inin.Normal);
4612  inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4613  &(inin.m), cv, face_num, P_ref,
4614  dummy_normal, inin.derivatives,
4615  false);
4616  if (inin.pt_type) {
4617  if (cv != size_type(-1)) {
4618  inin.m->points_of_convex(cv, inin.G);
4619  inin.ctx.change((inin.m)->trans_of_convex(cv),
4620  0, P_ref, inin.G, cv, face_num);
4621  inin.has_ctx = true;
4622  if (face_num != short_type(-1)) {
4623  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4624  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4625  } else
4626  inin.Normal.resize(0);
4627  inin.pt_y = inin.ctx.xreal();
4628  } else {
4629  inin.ctx.invalid_convex_num();
4630  inin.pt_y = P_ref;
4631  inin.has_ctx = false;
4632  }
4633  } else {
4634  inin.ctx.invalid_convex_num();
4635  inin.Normal.resize(0);
4636  inin.pt_y.resize(0);
4637  inin.has_ctx = false;
4638  }
4639  }
4640  GA_DEBUG_INFO("Instruction: end of call neighbor interpolate "
4641  "transformation");
4642  return 0;
4643  }
4644  ga_instruction_neighbor_transformation_call
4645  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4646  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4647  const mesh &mm, size_type &ipt_, papprox_integration &pai_,
4648  bgeot::geotrans_precomp_pool &gp_pool_,
4649  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4650  : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4651  ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4652  neighbor_corresp(neighbor_corresp_) {}
4653  };
4654 
4655 
4656  struct ga_instruction_scalar_assembly : public ga_instruction {
4657  const base_tensor &t;
4658  scalar_type &E, &coeff;
4659  virtual int exec() {
4660  GA_DEBUG_INFO("Instruction: scalar term assembly");
4661  E += t[0] * coeff;
4662  return 0;
4663  }
4664  ga_instruction_scalar_assembly(const base_tensor &t_, scalar_type &E_,
4665  scalar_type &coeff_)
4666  : t(t_), E(E_), coeff(coeff_) {}
4667  };
4668 
4669  struct ga_instruction_vector_assembly_mf : public ga_instruction
4670  {
4671  const base_tensor &t;
4672  base_vector &VI, &Vi;
4673  const fem_interpolation_context &ctx;
4674  const gmm::sub_interval *const&I, *const I__;
4675  const mesh_fem *const&mf, *const mf__;
4676  const bool &reduced_mf;
4677  const scalar_type &coeff;
4678  const size_type &nbpt, &ipt;
4679  base_vector elem;
4680  const bool interpolate;
4681  virtual int exec() {
4682  GA_DEBUG_INFO("Instruction: vector term assembly for fem variable");
4683  bool empty_weight = (coeff == scalar_type(0));
4684  if (ipt == 0 || interpolate) {
4685  if (empty_weight) elem.resize(0);
4686  elem.resize(t.size());
4687  if (!empty_weight)
4688  copy_scaled_4(t, coeff, elem);
4689  } else if (!empty_weight)
4690  // gmm::add(gmm::scaled(t.as_vector(), coeff), elem);
4691  add_scaled_4(t, coeff, elem);
4692 
4693  if (ipt == nbpt-1 || interpolate) { // finalize
4694  GA_DEBUG_ASSERT(mf, "Internal error");
4695  if (!ctx.is_convex_num_valid()) return 0;
4696  size_type cv_1 = ctx.convex_num();
4697  size_type qmult = mf->get_qdim();
4698  if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4699  base_vector &V = reduced_mf ? Vi : VI;
4700  GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4701  "Bad assembly vector size " << V.size() << ">=" <<
4702  I->first() << "+"<< mf->nb_basic_dof());
4703  auto itr = elem.cbegin();
4704  auto itw = V.begin() + I->first();
4705  for (const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4706  for (size_type q = 0; q < qmult; ++q)
4707  *(itw+dof+q) += *itr++;
4708  GMM_ASSERT1(itr == elem.end(), "Internal error");
4709  }
4710  return 0;
4711  }
4712 
4713  ga_instruction_vector_assembly_mf
4714  (const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4715  const fem_interpolation_context &ctx_,
4716  const gmm::sub_interval *&I_, const mesh_fem *&mf_,
4717  const bool &reduced_mf_,
4718  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4719  bool interpolate_)
4720  : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4721  I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4722  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4723 
4724  ga_instruction_vector_assembly_mf
4725  (const base_tensor &t_, base_vector &V_,
4726  const fem_interpolation_context &ctx_,
4727  const gmm::sub_interval &I_, const mesh_fem &mf_,
4728  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4729  bool interpolate_)
4730  : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4731  I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4732  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4733  protected:
4734  const bool false_=false;
4735  };
4736 
4737  struct ga_instruction_vector_assembly_imd : public ga_instruction {
4738  const base_tensor &t;
4739  base_vector &V;
4740  const fem_interpolation_context &ctx;
4741  const gmm::sub_interval &I;
4742  const im_data &imd;
4743  scalar_type &coeff;
4744  const size_type &ipt;
4745  const bool initialize;
4746  virtual int exec() {
4747  GA_DEBUG_INFO("Instruction: vector term assembly for im_data variable");
4748  size_type cv = ctx.convex_num();
4749  size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4750  GMM_ASSERT1(i+t.size() <= I.size(),
4751  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4752  auto itw = V.begin() + I.first() + i;
4753  if (initialize)
4754  for (const auto &val : t.as_vector())
4755  *itw++ = coeff*val;
4756  else
4757  for (const auto &val : t.as_vector())
4758  *itw++ += coeff*val;
4759  return 0;
4760  }
4761  ga_instruction_vector_assembly_imd
4762  (const base_tensor &t_, base_vector &V_,
4763  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4764  const im_data &imd_, scalar_type &coeff_, const size_type &ipt_,
4765  bool initialize_=false)
4766  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4767  initialize(initialize_)
4768  {}
4769  };
4770 
4771  struct ga_instruction_vector_assembly : public ga_instruction {
4772  const base_tensor &t;
4773  base_vector &V;
4774  const gmm::sub_interval &I;
4775  scalar_type &coeff;
4776  virtual int exec() {
4777  GA_DEBUG_INFO("Instruction: vector term assembly for "
4778  "fixed size variable");
4779  gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4780  return 0;
4781  }
4782  ga_instruction_vector_assembly(const base_tensor &t_, base_vector &V_,
4783  const gmm::sub_interval &I_,
4784  scalar_type &coeff_)
4785  : t(t_), V(V_), I(I_), coeff(coeff_) {}
4786  };
4787 
4788  struct ga_instruction_assignment : public ga_instruction {
4789  const base_tensor &t;
4790  base_vector &V;
4791  const fem_interpolation_context &ctx;
4792  const im_data *imd;
4793  virtual int exec() {
4794  GA_DEBUG_INFO("Instruction: Assignement to im_data");
4795  imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4796  return 0;
4797  }
4798  ga_instruction_assignment(const base_tensor &t_, base_vector &V_,
4799  const fem_interpolation_context &ctx_,
4800  const im_data *imd_)
4801  : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4802  };
4803 
4804  struct ga_instruction_extract_residual_on_imd_dofs : public ga_instruction {
4805  base_tensor &t;
4806  const base_vector &V;
4807  const fem_interpolation_context &ctx;
4808  const gmm::sub_interval &I;
4809  const im_data &imd;
4810  const size_type &ipt;
4811  virtual int exec() {
4812  GA_DEBUG_INFO("Instruction: extract residual for im_data variable");
4813  size_type ifirst = I.first();
4814  size_type cv = ctx.convex_num();
4815  size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4816  GMM_ASSERT1(i+t.size() <= I.size(),
4817  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4818  for (auto &&val : t.as_vector())
4819  val = V[ifirst+(i++)];
4820  return 0;
4821  }
4822  ga_instruction_extract_residual_on_imd_dofs
4823  (base_tensor &t_, const base_vector &V_,
4824  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4825  const im_data &imd_, const size_type &ipt_)
4826  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4827  {}
4828  };
4829 
4830 
4831  template <class MAT>
4832  inline void add_elem_matrix
4833  (MAT &K, const std::vector<size_type> &dofs1,
4834  const std::vector<size_type> &dofs2, std::vector<size_type> &/*dofs1_sort*/,
4835  const base_vector &elem, scalar_type threshold, size_type /* N */) {
4836 
4837  base_vector::const_iterator it = elem.cbegin();
4838  for (const size_type &dof2 : dofs2)
4839  for (const size_type &dof1 : dofs1) {
4840  if (gmm::abs(*it) > threshold)
4841  K(dof1, dof2) += *it;
4842  ++it;
4843  }
4844  }
4845 
4846  // static const std::vector<size_type> *the_indto_sort;
4847  // int compare_my_indices(const void *a, const void *b) {
4848  // size_type aa = *((const size_type *)(a));
4849  // size_type bb = *((const size_type *)(b));
4850  // return int((*the_indto_sort)[aa]) - int((*the_indto_sort)[bb]);
4851  // }
4852 
4853  inline void add_elem_matrix
4854  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4855  const std::vector<size_type> &dofs1, const std::vector<size_type> &dofs2,
4856  std::vector<size_type> &dofs1_sort,
4857  const base_vector &elem, scalar_type threshold, size_type N) {
4858 
4859  size_type s1 = dofs1.size();
4860 
4861  dofs1_sort.resize(s1);
4862  for (size_type i = 0; i < s1; ++i) { // insertion sort
4863  size_type j = i, k = j-1;
4864  while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4865  { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4866  dofs1_sort[j] = i;
4867  }
4868 
4869  // dofs1_sort.resize(s1); // test with qsort: not faster in the tested cases
4870  // for (size_type i = 0; i < s1; ++i) dofs1_sort[i] = i;
4871  // the_indto_sort = &dofs1;
4872  // qsort(&(dofs1_sort[0]), s1, sizeof(size_type), compare_my_indices);
4873 
4874  gmm::elt_rsvector_<scalar_type> ev;
4875 
4876  size_type maxest = (N+1) * s1;
4877  base_vector::const_iterator it = elem.cbegin();
4878  bool first(true);
4879  for (const size_type &dof2 : dofs2) { // Iteration on columns
4880  if (first) first = false;
4881  else it += s1;
4882  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4883  size_type nb = col.size();
4884 
4885  if (nb == 0) {
4886  col.reserve(maxest);
4887  for (size_type k : dofs1_sort) {
4888  ev.e = *(it+k);
4889  if (gmm::abs(ev.e) > threshold) {
4890  ev.c=dofs1[k];
4891  col.push_back(ev);
4892  }
4893  }
4894  } else { // column merge
4895  size_type ind = 0;
4896  for (size_type k : dofs1_sort) {
4897  ev.e = *(it+k);
4898  if (gmm::abs(ev.e) > threshold) {
4899  ev.c = dofs1[k];
4900 
4901  size_type count = nb - ind, step, l;
4902  while (count > 0) {
4903  step = count / 2;
4904  l = ind + step;
4905  if (col[l].c < ev.c) {
4906  ind = ++l;
4907  count -= step + 1;
4908  }
4909  else
4910  count = step;
4911  }
4912 
4913  auto itc = col.begin() + ind;
4914  if (ind != nb && itc->c == ev.c)
4915  itc->e += ev.e;
4916  else {
4917  if (nb - ind > 1300)
4918  GMM_WARNING2("Inefficient addition of element in rsvector with "
4919  << col.size() - ind << " non-zero entries");
4920  col.push_back(ev);
4921  if (ind != nb) {
4922  itc = col.begin() + ind;
4923  auto ite = col.end();
4924  --ite;
4925  auto itee = ite;
4926  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4927  *itc = ev;
4928  }
4929  ++nb;
4930  }
4931  ++ind;
4932  }
4933  }
4934  }
4935  }
4936  }
4937 
4938 
4939  inline void add_elem_matrix_contiguous_rows
4940  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4941  const size_type &i1, const size_type &s1,
4942  const std::vector<size_type> &dofs2,
4943  const base_vector &elem, scalar_type threshold) {
4944 
4945  gmm::elt_rsvector_<scalar_type> ev;
4946 
4947  base_vector::const_iterator it = elem.cbegin();
4948  bool first(true);
4949  for (const size_type &dof2 : dofs2) { // Iteration on columns
4950  if (first) first = false;
4951  else it += s1;
4952  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4953  size_type nb = col.size();
4954 
4955  if (nb == 0) {
4956  col.reserve(s1);
4957  for (size_type i = 0; i < s1; ++i) {
4958  ev.e = *(it+i);
4959  if (gmm::abs(ev.e) > threshold) {
4960  ev.c = i1 + i;
4961  col.push_back(ev);
4962  }
4963  }
4964  } else { // column merge (can be optimized for a contiguous range)
4965  size_type ind = 0;
4966  for (size_type i = 0; i < s1; ++i) {
4967  ev.e = *(it+i);
4968  if (gmm::abs(ev.e) > threshold) {
4969  ev.c = i1 + i;
4970 
4971  size_type count = nb - ind, step, l;
4972  while (count > 0) {
4973  step = count / 2;
4974  l = ind + step;
4975  if (col[l].c < ev.c) {
4976  ind = ++l;
4977  count -= step + 1;
4978  }
4979  else
4980  count = step;
4981  }
4982 
4983  auto itc = col.begin() + ind;
4984  if (ind != nb && itc->c == ev.c)
4985  itc->e += ev.e;
4986  else {
4987  if (nb - ind > 1300)
4988  GMM_WARNING2("Inefficient addition of element in rsvector with "
4989  << col.size() - ind << " non-zero entries");
4990  col.push_back(ev);
4991  if (ind != nb) {
4992  itc = col.begin() + ind;
4993  auto ite = col.end();
4994  --ite;
4995  auto itee = ite;
4996  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4997  *itc = ev;
4998  }
4999  ++nb;
5000  }
5001  ++ind;
5002  }
5003  }
5004  }
5005  }
5006  }
5007 
5008  inline void populate_dofs_vector
5009  (std::vector<size_type> &dofs,
5010  const size_type &size, const size_type &ifirst, const size_type &qmult,
5011  const getfem::mesh::ind_set &mfdofs)
5012  {
5013  dofs.assign(size, ifirst);
5014  auto itd = dofs.begin();
5015  if (qmult == 1)
5016  for (const auto &dof : mfdofs) *itd++ += dof;
5017  else
5018  for (const auto &dof : mfdofs)
5019  for (size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
5020  }
5021 
5022  inline void populate_dofs_vector // special case for qmult == 1
5023  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst,
5024  const getfem::mesh::ind_set &mfdofs)
5025  {
5026  dofs.assign(size, ifirst);
5027  auto itd = dofs.begin();
5028  for (const auto &dof : mfdofs) *itd++ += dof;
5029  }
5030 
5031 
5032  inline void populate_contiguous_dofs_vector
5033  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst)
5034  {
5035  dofs.assign(size, ifirst);
5036  for (size_type i=0; i < size; ++i) dofs[i] += i;
5037  }
5038 
5039  struct ga_instruction_matrix_assembly_base : public ga_instruction {
5040  const base_tensor &t;
5041  const fem_interpolation_context &ctx1, &ctx2;
5042  const scalar_type &alpha1, &alpha2, &coeff;
5043  const size_type &nbpt, &ipt;
5044  base_vector elem;
5045  bool interpolate;
5046  std::vector<size_type> dofs1, dofs2, dofs1_sort;
5047  void add_tensor_to_element_matrix(bool initialize, bool empty_weight) {
5048  if (initialize) {
5049  if (empty_weight) elem.resize(0);
5050  elem.resize(t.size());
5051  if (!empty_weight)
5052  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5053  } else if (!empty_weight)
5054  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5055  // Faster than a daxpy blas call on my config
5056  add_scaled_4(t, coeff*alpha1*alpha2, elem);
5057  }
5058  ga_instruction_matrix_assembly_base
5059  (const base_tensor &t_,
5060  const fem_interpolation_context &ctx1_,
5061  const fem_interpolation_context &ctx2_,
5062  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5063  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
5064  : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
5065  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
5066  dofs1(0), dofs2(0), dofs1_sort(0)
5067  {}
5068  protected:
5069  const bool false_=false;
5070  const size_type zero_=0;
5071  };
5072 
5073 
5074  struct ga_instruction_matrix_assembly_mf_mf
5075  : public ga_instruction_matrix_assembly_base
5076  {
5077  model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
5078  const gmm::sub_interval *const&I1, *const&I2, *const I1__, *const I2__;
5079  const mesh_fem *const&mf1, *const&mf2, *const mf1__, *const mf2__;
5080  const bool &reduced_mf1, &reduced_mf2; // refs to mf1/2->is_reduced()
5081  virtual int exec() {
5082  GA_DEBUG_INFO("Instruction: matrix term assembly mf-mf");
5083  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5084 
5085  bool initialize = (ipt == 0 || interpolate);
5086  bool empty_weight = (coeff == scalar_type(0));
5087  add_tensor_to_element_matrix(initialize, empty_weight); // t --> elem
5088 
5089  if (ipt == nbpt-1 || interpolate) { // finalize
5090  model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
5091  : (reduced_mf2 ? Kru : Krr);
5092  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5093 
5094  scalar_type ninf = gmm::vect_norminf(elem);
5095  if (ninf == scalar_type(0)) return 0;
5096 
5097  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5098  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5099  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5100 
5101  size_type N = ctx1.N();
5102  size_type qmult1 = mf1->get_qdim();
5103  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5104  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
5105  mf1->ind_scalar_basic_dof_of_element(cv1));
5106  if (mf1 == mf2 && cv1 == cv2) {
5107  if (ifirst1 == ifirst2) {
5108  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5109  } else {
5110  populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
5111  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5112  }
5113  } else {
5114  N = std::max(N, ctx2.N());
5115  size_type qmult2 = mf2->get_qdim();
5116  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5117  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
5118  mf2->ind_scalar_basic_dof_of_element(cv2));
5119  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5120  }
5121  }
5122  return 0;
5123  }
5124 
5125  ga_instruction_matrix_assembly_mf_mf
5126  (const base_tensor &t_,
5127  model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
5128  model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
5129  const fem_interpolation_context &ctx1_,
5130  const fem_interpolation_context &ctx2_,
5131  const ga_instruction_set::variable_group_info &vgi1,
5132  const ga_instruction_set::variable_group_info &vgi2,
5133  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5134  bool interpolate_)
5135  : ga_instruction_matrix_assembly_base
5136  (t_, ctx1_, ctx2_, vgi1.alpha, vgi2.alpha, coeff_, nbpt_, ipt_,
5137  interpolate_),
5138  Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
5139  I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
5140  mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
5141  reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
5142 
5143  ga_instruction_matrix_assembly_mf_mf
5144  (const base_tensor &t_,
5145  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5146  const fem_interpolation_context &ctx1_,
5147  const fem_interpolation_context &ctx2_,
5148  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5149  const ga_instruction_set::variable_group_info &vgi2,
5150  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5151  bool interpolate_)
5152  : ga_instruction_matrix_assembly_base
5153  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, nbpt_, ipt_, interpolate_),
5154  Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
5155  I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
5156  mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
5157  reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
5158 
5159  ga_instruction_matrix_assembly_mf_mf
5160  (const base_tensor &t_,
5161  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5162  const fem_interpolation_context &ctx1_,
5163  const fem_interpolation_context &ctx2_,
5164  const ga_instruction_set::variable_group_info &vgi1,
5165  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5166  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5167  bool interpolate_)
5168  : ga_instruction_matrix_assembly_base
5169  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
5170  Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
5171  I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
5172  mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
5173  reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
5174 
5175  ga_instruction_matrix_assembly_mf_mf
5176  (const base_tensor &t_, model_real_sparse_matrix &K_,
5177  const fem_interpolation_context &ctx1_,
5178  const fem_interpolation_context &ctx2_,
5179  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5180  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5181  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5182  bool interpolate_)
5183  : ga_instruction_matrix_assembly_base
5184  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
5185  Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
5186  I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
5187  mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
5188  reduced_mf1(false_), reduced_mf2(false_) {}
5189  };
5190 
5191 
5192  struct ga_instruction_matrix_assembly_imd_mf
5193  : public ga_instruction_matrix_assembly_base
5194  {
5195  model_real_sparse_matrix &Kxr, &Kxu;
5196  const gmm::sub_interval *I1, *I2__, * const &I2;
5197  const im_data *imd1;
5198  const mesh_fem * const mf2__, * const &mf2;
5199  const bool &reduced_mf2; // ref to mf2->is_reduced()
5200  virtual int exec() {
5201  GA_DEBUG_INFO("Instruction: matrix term assembly "
5202  "(imdata or fixed size)-mf");
5203  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5204 
5205  bool empty_weight = (coeff == scalar_type(0));
5206  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5207 
5208  scalar_type ninf = gmm::vect_norminf(elem);
5209  if (ninf == scalar_type(0)) return 0;
5210 
5211  model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
5212  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5213  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5214  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5215  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5216  if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ctx1.ii());
5217 
5218  populate_contiguous_dofs_vector(dofs1, s1, ifirst1); // --> dofs1
5219  size_type qmult2 = mf2->get_qdim();
5220  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5221  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
5222  mf2->ind_scalar_basic_dof_of_element(cv2));
5223  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
5224  return 0;
5225  }
5226 
5227  ga_instruction_matrix_assembly_imd_mf
5228  (const base_tensor &t_,
5229  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5230  const fem_interpolation_context &ctx1_,
5231  const fem_interpolation_context &ctx2_,
5232  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5233  const ga_instruction_set::variable_group_info &vgi2,
5234  const scalar_type &coeff_, const size_type &ipt_)
5235  : ga_instruction_matrix_assembly_base
5236  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, zero_, ipt_, false),
5237  Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
5238  imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
5239  {}
5240 
5241  ga_instruction_matrix_assembly_imd_mf
5242  (const base_tensor &t_, model_real_sparse_matrix &K_,
5243  const fem_interpolation_context &ctx1_,
5244  const fem_interpolation_context &ctx2_,
5245  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5246  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5247  const scalar_type &coeff_, const size_type &ipt_)
5248  : ga_instruction_matrix_assembly_base
5249  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5250  Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
5251  imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
5252  };
5253 
5254  struct ga_instruction_matrix_assembly_mf_imd
5255  : public ga_instruction_matrix_assembly_base
5256  {
5257  model_real_sparse_matrix &Krx, &Kux;
5258  const gmm::sub_interval * const &I1, *const I1__, *I2;
5259  const mesh_fem * const &mf1, *const mf1__;
5260  const bool &reduced_mf1; // ref to mf1->is_reduced()
5261  const im_data *imd2;
5262  virtual int exec() {
5263  GA_DEBUG_INFO("Instruction: matrix term assembly "
5264  "mf-(imdata or fixed size)");
5265  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5266 
5267  bool empty_weight = (coeff == scalar_type(0));
5268  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5269 
5270  scalar_type ninf = gmm::vect_norminf(elem);
5271  if (ninf == scalar_type(0)) return 0;
5272 
5273  model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
5274  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5275  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5276  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5277  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5278  if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ctx2.ii());
5279 
5280  size_type qmult1 = mf1->get_qdim();
5281  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5282  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
5283  mf1->ind_scalar_basic_dof_of_element(cv1));
5284  populate_contiguous_dofs_vector(dofs2, s2, ifirst2); // --> dofs2
5285  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
5286  return 0;
5287  }
5288 
5289  ga_instruction_matrix_assembly_mf_imd
5290  (const base_tensor &t_,
5291  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5292  const fem_interpolation_context &ctx1_,
5293  const fem_interpolation_context &ctx2_,
5294  const ga_instruction_set::variable_group_info &vgi1,
5295  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5296  const scalar_type &coeff_, const size_type &ipt_)
5297  : ga_instruction_matrix_assembly_base
5298  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, zero_, ipt_, false),
5299  Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
5300  mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
5301  {}
5302 
5303  ga_instruction_matrix_assembly_mf_imd
5304  (const base_tensor &t_, model_real_sparse_matrix &K_,
5305  const fem_interpolation_context &ctx1_,
5306  const fem_interpolation_context &ctx2_,
5307  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5308  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5309  const scalar_type &coeff_, const size_type &ipt_)
5310  : ga_instruction_matrix_assembly_base
5311  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5312  Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
5313  mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
5314  };
5315 
5316 
5317 
5318  struct ga_instruction_matrix_assembly_imd_imd
5319  : public ga_instruction_matrix_assembly_base
5320  {
5321  model_real_sparse_matrix &K;
5322  const gmm::sub_interval &I1, &I2;
5323  const im_data *imd1, *imd2;
5324  virtual int exec() {
5325  GA_DEBUG_INFO("Instruction: matrix term assembly "
5326  "(imdata or fixed size)-(imdata or fixed size)");
5327  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5328 
5329  bool empty_weight = (coeff == scalar_type(0));
5330  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5331 
5332  scalar_type ninf = gmm::vect_norminf(elem);
5333  if (ninf == scalar_type(0)) return 0;
5334 
5335  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5336  size_type ifirst1 = I1.first(), ifirst2 = I2.first();
5337  if (imd1)
5338  ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ctx1.ii());
5339  if (imd2)
5340  ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ctx2.ii());
5341 
5342  populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5343  add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
5344  return 0;
5345  }
5346  ga_instruction_matrix_assembly_imd_imd
5347  (const base_tensor &t_, model_real_sparse_matrix &K_,
5348  const fem_interpolation_context &ctx1_,
5349  const fem_interpolation_context &ctx2_,
5350  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5351  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5352  const scalar_type &coeff_, const size_type &ipt_)
5353  : ga_instruction_matrix_assembly_base
5354  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5355  K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
5356  };
5357 
5358 
5359  struct ga_instruction_matrix_assembly_standard_scalar
5360  : public ga_instruction_matrix_assembly_base
5361  {
5362  model_real_sparse_matrix &K;
5363  const gmm::sub_interval &I1, &I2;
5364  const mesh_fem *pmf1, *pmf2;
5365  virtual int exec() {
5366  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5367  "scalar fems");
5368  if (ipt == 0) {
5369  elem.resize(t.size());
5370  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5371  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5372  } else
5373  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5374  // Faster than a daxpy blas call on my config
5375  add_scaled_4(t, coeff*alpha1*alpha2, elem);
5376 
5377  if (ipt == nbpt-1) { // finalize
5378  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5379 
5380  scalar_type ninf = gmm::vect_norminf(elem);
5381  if (ninf == scalar_type(0)) return 0;
5382 
5383  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
5384  if (cv1 == size_type(-1)) return 0;
5385  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
5386  GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0], "Internal error");
5387  populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
5388 
5389  if (pmf2 == pmf1 && cv1 == cv2) {
5390  if (I1.first() == I2.first()) {
5391  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5392  } else {
5393  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5394  dofs1);
5395  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5396  }
5397  } else {
5398  if (cv2 == size_type(-1)) return 0;
5399  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
5400  GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1], "Internal error");
5401  populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
5402  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5403  }
5404  }
5405  return 0;
5406  }
5407  ga_instruction_matrix_assembly_standard_scalar
5408  (const base_tensor &t_, model_real_sparse_matrix &K_,
5409  const fem_interpolation_context &ctx1_,
5410  const fem_interpolation_context &ctx2_,
5411  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
5412  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5413  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5414  const size_type &nbpt_, const size_type &ipt_)
5415  : ga_instruction_matrix_assembly_base
5416  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5417  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5418  };
5419 
5420  struct ga_instruction_matrix_assembly_standard_vector
5421  : public ga_instruction_matrix_assembly_base
5422  {
5423  model_real_sparse_matrix &K;
5424  const gmm::sub_interval &I1, &I2;
5425  const mesh_fem *pmf1, *pmf2;
5426  virtual int exec() {
5427  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5428  "vector fems");
5429  if (ipt == 0) {
5430  elem.resize(t.size());
5431  copy_scaled_8(t, coeff*alpha1*alpha2, elem);
5432  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5433  } else
5434  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5435  // (Far) faster than a daxpy blas call on my config.
5436  add_scaled_8(t, coeff*alpha1*alpha2, elem);
5437 
5438  if (ipt == nbpt-1) { // finalize
5439  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5440 
5441  scalar_type ninf = gmm::vect_norminf(elem);
5442  if (ninf == scalar_type(0)) return 0;
5443  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
5444 
5445  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5446  if (cv1 == size_type(-1)) return 0;
5447  size_type qmult1 = pmf1->get_qdim();
5448  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
5449  populate_dofs_vector(dofs1, s1, I1.first(), qmult1, // --> dofs1
5450  pmf1->ind_scalar_basic_dof_of_element(cv1));
5451 
5452  if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
5453  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5454  } else {
5455  if (pmf2 == pmf1 && cv1 == cv2) {
5456  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5457  dofs1);
5458  } else {
5459  if (cv2 == size_type(-1)) return 0;
5460  size_type qmult2 = pmf2->get_qdim();
5461  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
5462  populate_dofs_vector(dofs2, s2, I2.first(), qmult2, // --> dofs2
5463  pmf2->ind_scalar_basic_dof_of_element(cv2));
5464  }
5465  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5466  }
5467  }
5468  return 0;
5469  }
5470  ga_instruction_matrix_assembly_standard_vector
5471  (const base_tensor &t_, model_real_sparse_matrix &K_,
5472  const fem_interpolation_context &ctx1_,
5473  const fem_interpolation_context &ctx2_,
5474  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
5475  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5476  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5477  const size_type &nbpt_, const size_type &ipt_)
5478  : ga_instruction_matrix_assembly_base
5479  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5480  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5481  };
5482 
5483  template<int QQ>
5484  struct ga_instruction_matrix_assembly_standard_vector_opt10
5485  : public ga_instruction_matrix_assembly_base
5486  {
5487  model_real_sparse_matrix &K;
5488  const gmm::sub_interval &I1, &I2;
5489  const mesh_fem *pmf1, *pmf2;
5490  virtual int exec() {
5491  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5492  "vector fems optimized for format 10 qdim " << QQ);
5493  size_type s1_q = QQ*t.sizes()[0];
5494  size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
5495  scalar_type e = coeff*alpha1*alpha2;
5496  if (ipt == 0) {
5497  elem.resize(ss1*ss2);
5498  auto itel = elem.begin();
5499  for (size_type j = 0; j < ss2; ++j) {
5500  auto it = t.begin() + j*s1_q;
5501  for (size_type i = 0; i < ss1; ++i, it += QQ)
5502  *itel++ = (*it) * e;
5503  }
5504  } else {
5505  auto itel = elem.begin();
5506  for (size_type j = 0; j < ss2; ++j) {
5507  auto it = t.begin() + j*s1_q;
5508  for (size_type i = 0; i < ss1; ++i, it += QQ)
5509  *itel++ += (*it) * e;
5510  }
5511  }
5512  if (ipt == nbpt-1) { // finalize
5513  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5514 
5515  scalar_type ninf = gmm::vect_norminf(elem) * 1E-14;
5516  if (ninf == scalar_type(0)) return 0;
5517  size_type N = ctx1.N();
5518  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5519  size_type i1 = I1.first(), i2 = I2.first();
5520  if (cv1 == size_type(-1)) return 0;
5521  populate_dofs_vector(dofs1, ss1, i1,
5522  pmf1->ind_scalar_basic_dof_of_element(cv1));
5523  bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
5524 
5525  if (!same_dofs) {
5526  if (cv2 == size_type(-1)) return 0;
5527  populate_dofs_vector(dofs2, ss2, i2,
5528  pmf2->ind_scalar_basic_dof_of_element(cv2));
5529  }
5530  std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
5531  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5532  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5533  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5534  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5535  if (QQ >= 3) {
5536  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5537  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5538  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5539  }
5540  }
5541  return 0;
5542  }
5543 
5544  ga_instruction_matrix_assembly_standard_vector_opt10
5545  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
5546  const fem_interpolation_context &ctx1_,
5547  const fem_interpolation_context &ctx2_,
5548  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
5549  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5550  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5551  const size_type &nbpt_, const size_type &ipt_)
5552  : ga_instruction_matrix_assembly_base
5553  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5554  K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5555  {
5556  static_assert(QQ >= 2 && QQ <=3,
5557  "Template implemented only for QQ=2 and QQ=3");
5558  }
5559  };
5560 
5561 
5562  struct ga_instruction_condensation_sub : public ga_instruction {
5563  // one such instruction is used for every cluster of intercoupled
5564  // condensed variables
5565  gmm::dense_matrix<base_tensor *> KQJprime;
5566  std::vector<base_tensor *> RQprime;
5567  gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5568  base_tensor invKqqqq, Kqqjj;
5569  base_vector Rqq;
5570  std::vector<std::array<size_type,3>> partQ, partJ;
5571  const scalar_type &coeff; // &alpha1, &alpha2 ?
5572  virtual int exec() { // implementation can be optimized
5573  GA_DEBUG_INFO("Instruction: variable cluster subdiagonal condensation");
5574  // copy from KQQ to invKqqqq
5575  for (const auto &qqq1 : partQ) {
5576  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5577  for (const auto &qqq2 : partQ) {
5578  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5579  if (KQQloc(q1,q2)) {
5580  auto itr = KQQloc(q1,q2)->cbegin();
5581  GMM_ASSERT1(KQQloc(q1,q2)->size()
5582  == (qq1end-qq1start)*(qq2end-qq2start),
5583  "Internal error");
5584  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5585  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5586  invKqqqq(qq1,qq2) = *itr++;
5587  }
5588  }
5589  }
5590  // calculate inverse matrix invKqqqq
5591  bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5592 
5593  // Resize Kqqjj as primary variable sizes may change dynamically
5594  size_type prev_j(0);
5595  for (auto &&jjj : partJ) {
5596  size_type j=jjj[0];
5597  size_type new_j(0);
5598  for (const auto &qqq : partQ) {
5599  size_type q=qqq[0];
5600  if (KQJloc(q,j)) {
5601  if (new_j) {
5602  GMM_ASSERT1(new_j == KQJloc(q,j)->size(1), "Internal error");
5603  } else
5604  new_j = KQJloc(q,j)->size(1);
5605  }
5606  }
5607  // Resize KQJprime submatrices to match KQJloc sizes
5608  for (const auto &qqq : partQ) {
5609  size_type q=qqq[0];
5610  KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5611  }
5612  jjj[1] = prev_j;
5613  prev_j += new_j;
5614  jjj[2] = prev_j;
5615  }
5616 
5617  Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5618  gmm::clear(Kqqjj.as_vector());
5619  gmm::clear(Rqq);
5620 
5621  // multiply invKqqqq with all submatrices in KQJloc and RQprime and store
5622  // the results in Kqqjj and Rqq
5623  for (const auto &jjj : partJ) {
5624  size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5625  for (const auto &qqq2 : partQ) {
5626  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5627  if (KQJloc(q2,j)) {
5628  auto itr = KQJloc(q2,j)->begin(); // auto &mat = KQJloc(q2,j);
5629  for (size_type jj=jjstart; jj < jjend; ++jj) {
5630  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5631  for (size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5632  Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5633  // Kqqjj(qq1,jj) += invKqq(qq1,qq2)*mat(qq2-qqstart,jj-jjstart);
5634  } // for qq1
5635  } // for qq2
5636  } // for jj
5637  GMM_ASSERT1(itr == KQJloc(q2,j)->cend(), "Internal error");
5638  }
5639  } // in partQ
5640  } // in partJ
5641  for (const auto &qqq2 : partQ) {
5642  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5643  if (RQprime[q2]) {
5644  auto itr = RQprime[q2]->cbegin();
5645  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5646  for (size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5647  Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5648  } // for qq2
5649  GMM_ASSERT1(itr == RQprime[q2]->cend(), "Internal error");
5650  }
5651  } // in partQ
5652 
5653  // distribute the results from Kqqjj/Rqq to KQJprime/RQprime
5654  // submatrices/subvectors
5655  for (const auto &qqq1 : partQ) {
5656  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5657  { // writing into RQprime
5658  auto itw = RQprime[q1]->begin();
5659  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5660  *itw++ = Rqq[qq1]/coeff;
5661  }
5662  for (const auto &jjj2 : partJ) {
5663  size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5664  auto itw = KQJprime(q1,j2)->begin();
5665  for (size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5666  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5667  *itw++ = Kqqjj(qq1,jj2);
5668  }
5669  }
5670  return 0;
5671  }
5672 
5673  ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5674  std::vector<base_tensor *> &RQpr, // input/output
5675  const gmm::dense_matrix<base_tensor *> &KQQ,
5676  const gmm::dense_matrix<base_tensor *> &KQJ,
5677  const std::set<size_type> &Qset,
5678  const scalar_type &coeff_)
5679  : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5680  {
5681  // * to const *
5682  KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5683  KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5684  for (size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5685  for (size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5686 
5687  for (size_type j=0; j < KQJ.ncols(); ++j)
5688  for (const size_type &q : Qset)
5689  if (KQJ(q,j)) {
5690  partJ.push_back(std::array<size_type,3>{j,0,0});
5691  break;
5692  }
5693 
5694  partQ.resize(0);
5695  for (const size_type &q : Qset)
5696  partQ.push_back(std::array<size_type,3>{q,0,0});
5697  size_type prev_q(0);
5698  for (auto &qqq1 : partQ) {
5699  size_type q1 = qqq1[0];
5700  size_type new_q(0);
5701  for (const size_type &q2 : Qset)
5702  if (new_q) {
5703  GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5704  new_q == KQQ(q2,q1)->size(1), "Internal error");
5705  } else
5706  new_q = KQQ(q1,q2)->size(0);
5707  qqq1[1] = prev_q;
5708  prev_q += new_q;
5709  qqq1[2] = prev_q;
5710  }
5711  invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5712  Rqq.resize(partQ.back()[2]);
5713  // Kqqjj will be resized dynamically due to possible changes in j interval
5714  }
5715  };
5716 
5717 
5718  struct ga_instruction_condensation_super_K : public ga_instruction {
5719  base_tensor &Kij;
5720  std::vector<base_tensor *> KiQ, KQj; // indexed wrt q in Q
5721  size_type Qsize;
5722 
5723  virtual int exec() {
5724  GA_DEBUG_INFO("Instruction: contribution of condensation to kept part");
5725 
5726  size_type m = KiQ[0]->size(0);
5727  size_type n = KQj[0]->size(1);
5728  Kij.adjust_sizes(m,n);
5729  gmm::clear(Kij.as_vector());
5730  for (size_type k=0; k < Qsize; ++k) {
5731  const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5732  size_type qqsize = K1.size(1);
5733  GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5734  "Internal error");
5735 
5736  base_tensor::iterator it = Kij.begin();
5737  for (size_type jj = 0; jj < n; ++jj)
5738  for (size_type ii = 0; ii < m; ++ii, ++it)
5739  for (size_type qq = 0; qq < qqsize; ++qq)
5740  *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5741  GA_DEBUG_ASSERT(it == Kij.end(), "Wrong sizes");
5742  }
5743  return 0;
5744  }
5745  ga_instruction_condensation_super_K(base_tensor &Kij_,
5746  const std::vector<base_tensor *> KiQ_,
5747  const std::vector<base_tensor *> KQj_)
5748  : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5749  {
5750  Qsize = KiQ.size();
5751  GMM_ASSERT1(KiQ.size() == KQj.size(), "Internal error");
5752  }
5753  };
5754 
5755  struct ga_instruction_condensation_super_R : public ga_instruction {
5756  base_tensor &Ri;
5757  std::vector<base_tensor *> KiQ, RQpr; // indexed wrt q in Q
5758  size_type Qsize;
5759 
5760  virtual int exec() {
5761  GA_DEBUG_INFO("Instruction: contribution of condensation to primary rhs");
5762 
5763  size_type m = KiQ[0]->size(0);
5764  Ri.adjust_sizes(m);
5765  gmm::clear(Ri.as_vector());
5766  for (size_type k=0; k < Qsize; ++k) {
5767  const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5768  size_type qqsize = K1.size(1);
5769  GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize, "Internal error");
5770  base_tensor::iterator it = Ri.begin();
5771  for (size_type ii = 0; ii < m; ++ii, ++it)
5772  for (size_type qq = 0; qq < qqsize; ++qq)
5773  *it -= K1[ii+qq*m] * R2[qq];
5774  GA_DEBUG_ASSERT(it == Ri.end(), "Wrong sizes");
5775  }
5776  return 0;
5777  }
5778  ga_instruction_condensation_super_R(base_tensor &Ri_,
5779  const std::vector<base_tensor *> KiQ_,
5780  const std::vector<base_tensor *> RQpr_)
5781  : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5782  {
5783  Qsize = KiQ.size();
5784  GMM_ASSERT1(KiQ.size() == RQpr.size(), "Internal error");
5785  }
5786  };
5787 
5788  //=========================================================================
5789  // Compilation of assembly trees into a list of basic instructions
5790  //=========================================================================
5791 
5792  static void extend_variable_in_gis(const ga_workspace &workspace,
5793  const std::string &varname,
5794  ga_instruction_set &gis) {
5795  if (workspace.variable_group_exists(varname)) {
5796  for (const std::string &v : workspace.variable_group(varname))
5797  extend_variable_in_gis(workspace, v, gis);
5798  } else if (gis.extended_vars.count(varname) == 0) {
5799  const mesh_fem *mf = workspace.associated_mf(varname);
5800  if (mf->is_reduced()) {
5801  auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5802  base_vector &U = gis.really_extended_vars[varname];
5803  gmm::resize(U, mf->nb_basic_dof() * n);
5804  mf->extend_vector(workspace.value(varname), U);
5805  gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5806  } else {
5807  gis.extended_vars[varname] = &(workspace.value(varname));
5808  }
5809  }
5810  }
5811 
5812  static void ga_clear_node_list
5813  (pga_tree_node pnode, std::map<scalar_type,
5814  std::list<pga_tree_node> > &node_list) {
5815  std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5816  for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5817  it != loc_node_list.end(); ) {
5818  if (*it == pnode) it = loc_node_list.erase(it); else ++it;
5819  }
5820  for (size_type i = 0; i < pnode->children.size(); ++i)
5821  ga_clear_node_list(pnode->children[i], node_list);
5822  }
5823 
5824  // workspace argument is not const because of declaration of temporary
5825  // unreduced variables
5826  static void ga_compile_node(const pga_tree_node pnode,
5827  ga_workspace &workspace,
5828  ga_instruction_set &gis,
5829  ga_instruction_set::region_mim_instructions &rmi,
5830  const mesh &m, bool function_case,
5831  ga_if_hierarchy &if_hierarchy) {
5832 
5833  if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5834  pnode->node_type == GA_NODE_OPERATOR ||
5835  pnode->node_type == GA_NODE_SPEC_FUNC ||
5836  pnode->node_type == GA_NODE_CONSTANT ||
5837  pnode->node_type == GA_NODE_ALLINDICES ||
5838  pnode->node_type == GA_NODE_RESHAPE ||
5839  pnode->node_type == GA_NODE_SWAP_IND ||
5840  pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5841  pnode->node_type == GA_NODE_CONTRACT) return;
5842 
5843  // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
5844 
5845  pga_instruction pgai;
5846  ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5847  ga_if_hierarchy new_if_hierarchy;
5848 
5849  const mesh_fem *mf1 = 0, *mf2 = 0;
5850  const mesh_fem **mfg1 = 0, **mfg2 = 0;
5851  fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5852  bool tensor_to_clear = false;
5853  bool tensor_to_adapt = false;
5854 
5855  if (pnode->test_function_type) {
5856  if (pnode->name_test1.size())
5857  mf1 = workspace.associated_mf(pnode->name_test1);
5858  if (mf1) {
5859  pctx1 = &(gis.ctx);
5860  const std::string &intn1 = pnode->interpolate_name_test1;
5861  if (intn1.size()) {
5862  if (workspace.secondary_domain_exists(intn1)) {
5863  pctx1 = &(rmi.secondary_domain_infos.ctx);
5864  } else {
5865  tensor_to_adapt = true;
5866  pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5867  if (workspace.variable_group_exists(pnode->name_test1)) {
5868  ga_instruction_set::variable_group_info &vgi =
5869  rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5870  mfg1 = &(vgi.mf);
5871  mf1 = 0;
5872  }
5873  }
5874  }
5875  }
5876  if (pnode->name_test2.size())
5877  mf2 = workspace.associated_mf(pnode->name_test2);
5878  if (mf2) {
5879  pctx2 = &(gis.ctx);
5880  const std::string &intn2 = pnode->interpolate_name_test2;
5881  if (intn2.size()) {
5882  if (workspace.secondary_domain_exists(intn2)) {
5883  pctx2 = &(rmi.secondary_domain_infos.ctx);
5884  } else {
5885  tensor_to_adapt = true;
5886  pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5887  if (workspace.variable_group_exists(pnode->name_test2)) {
5888  ga_instruction_set::variable_group_info &vgi =
5889  rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5890  mfg2 = &(vgi.mf);
5891  mf2 = 0;
5892  }
5893  }
5894  }
5895  }
5896  }
5897 
5898  // Produce a resize instruction which is stored if no equivalent node is
5899  // detected and if the mesh is not uniform.
5900  pnode->t.set_to_original();
5901  pnode->t.set_sparsity(0, 0);
5902  bool is_uniform = false;
5903  if (pnode->test_function_type == 1) {
5904  if (mf1 || mfg1)
5905  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5906  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5907  if (mf1 && mf1->is_uniform())
5908  { is_uniform = true; pctx1->invalid_convex_num(); }
5909  } else if (pnode->test_function_type == 2) {
5910  if (mf2 || mfg2)
5911  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5912  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5913  if (mf2 && mf2->is_uniform())
5914  { is_uniform = true; pctx2->invalid_convex_num(); }
5915  } else if (pnode->test_function_type == 3) {
5916  if ((mf1 || mfg1) && (mf2 || mfg2)) {
5917  pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5918  (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5919  pnode->qdim2, mf2, mfg2);
5920  if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5921  is_uniform = true;
5922  pctx1->invalid_convex_num();
5923  pctx2->invalid_convex_num();
5924  }
5925  } else if (mf1 || mfg1) {
5926  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5927  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5928  if (mf1 && mf1->is_uniform())
5929  { is_uniform = true; pctx1->invalid_convex_num(); }
5930  } else if (mf2 || mfg2) {
5931  pgai = std::make_shared<ga_instruction_second_ind_tensor>
5932  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5933  if (mf2 && mf2->is_uniform())
5934  { is_uniform = true; pctx2->invalid_convex_num(); }
5935  }
5936  }
5937 
5938  // Optimization: detects if an equivalent node has already been compiled
5939  pnode->t.set_to_original();
5940  if (rmi.node_list.count(pnode->hash_value) != 0) {
5941  for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5942  // cout << "found potential equivalent nodes ";
5943  // ga_print_node(pnode, cout);
5944  // cout << " and "; ga_print_node(pnode1, cout); cout << endl;
5945  if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5946  pnode->t.set_to_copy(pnode1->t);
5947  return;
5948  }
5949  if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5950  // cout << "confirmed with transpose" << endl;
5951  if (pnode->nb_test_functions() == 2) {
5952  if (pgai) { // resize instruction if needed
5953  if (is_uniform)
5954  { pgai->exec(); }
5955  else { rmi.instructions.push_back(std::move(pgai)); }
5956  }
5957  pgai = std::make_shared<ga_instruction_transpose_test>
5958  (pnode->tensor(), pnode1->tensor());
5959  rmi.instructions.push_back(std::move(pgai));
5960  } else {
5961  pnode->t.set_to_copy(pnode1->t);
5962  }
5963  return;
5964  }
5965  // cout << "sub_tree_are_equal = " << int(sub_tree_are_equal(pnode, pnode1, workspace, 1)) << endl;
5966  std::stringstream ss;
5967  ss << "Detected wrong equivalent nodes:" << endl;
5968  ga_print_node(pnode, ss);
5969  ss << endl << " and " << endl;
5970  ga_print_node(pnode1, ss);
5971  ss << endl << "No problem, but hash values could be adapted." << endl;
5972  GMM_TRACE2(ss.str());
5973  }
5974  }
5975 
5976  if (pgai) { // resize instruction if needed and no equivalent node detected
5977  if (is_uniform) { pgai->exec(); }
5978  else {
5979  if (tensor_to_adapt)
5980  rmi.instructions.push_back(std::move(pgai));
5981  else
5982  rmi.elt_instructions.push_back(std::move(pgai));
5983  }
5984  }
5985 
5986  size_type interpolate_filter_inst = rmi.instructions.size();
5987  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5988  pgai = pga_instruction();
5989  rmi.instructions.push_back(std::move(pgai));
5990  if_hierarchy.increment();
5991  new_if_hierarchy.child_of(if_hierarchy);
5992  pif_hierarchy = &new_if_hierarchy;
5993  }
5994 
5995  for (size_type i = 0; i < pnode->children.size(); ++i)
5996  ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5997  function_case, *pif_hierarchy);
5998 
5999  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
6000  const std::string &intn = pnode->interpolate_name;
6001  ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
6002  pgai = std::make_shared<ga_instruction_interpolate_filter>
6003  (pnode->tensor(), inin, pnode->nbc1,
6004  int(rmi.instructions.size() - interpolate_filter_inst));
6005  rmi.instructions[interpolate_filter_inst].swap(pgai);
6006  pgai = std::make_shared<ga_instruction_copy_tensor>
6007  (pnode->tensor(), pnode->children[0]->tensor());
6008  rmi.instructions.push_back(std::move(pgai));
6009  ga_clear_node_list(pnode->children[0], rmi.node_list);
6010  }
6011 
6012  static scalar_type minus = -scalar_type(1);
6013  size_type nbch = pnode->children.size();
6014  pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
6015  pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
6016  bgeot::multi_index mi;
6017  const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
6018  // const bgeot::multi_index &size1 = child1 ? child1->t.sizes() : mi;
6019  size_type dim0 = child0 ? child0->tensor_order() : 0;
6020  size_type dim1 = child1 ? child1->tensor_order() : 0;
6021 
6022  switch (pnode->node_type) {
6023 
6024  case GA_NODE_PREDEF_FUNC: case GA_NODE_OPERATOR: case GA_NODE_SPEC_FUNC:
6025  case GA_NODE_CONSTANT: case GA_NODE_ALLINDICES: case GA_NODE_ZERO:
6026  case GA_NODE_RESHAPE: case GA_NODE_CROSS_PRODUCT:
6027  case GA_NODE_SWAP_IND: case GA_NODE_IND_MOVE_LAST:
6028  case GA_NODE_CONTRACT: case GA_NODE_INTERPOLATE_FILTER:
6029  break;
6030 
6031  case GA_NODE_X:
6032  GMM_ASSERT1(!function_case,
6033  "No use of X is allowed in scalar functions");
6034  if (pnode->nbc1) {
6035  GA_DEBUG_ASSERT(pnode->tensor().size() == 1, "dimensions mismatch");
6036  GMM_ASSERT1(pnode->nbc1 <= m.dim(),
6037  "Bad index for X in expression");
6038  pgai = std::make_shared<ga_instruction_X_component>
6039  (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
6040  } else {
6041  if (pnode->tensor().size() != m.dim())
6042  pnode->init_vector_tensor(m.dim());
6043  pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
6044  }
6045  rmi.instructions.push_back(std::move(pgai));
6046  break;
6047 
6048  case GA_NODE_ELT_SIZE:
6049  GMM_ASSERT1(!function_case,
6050  "No use of element_size is allowed in functions");
6051  if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
6052  pgai = std::make_shared<ga_instruction_element_size>
6053  (pnode->tensor(), gis.elt_size);
6054  gis.need_elt_size = true;
6055  rmi.instructions.push_back(std::move(pgai));
6056  break;
6057 
6058  case GA_NODE_ELT_K:
6059  GMM_ASSERT1(!function_case,
6060  "No use of element_K is allowed in functions");
6061  pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
6062  gis.ctx);
6063  rmi.instructions.push_back(std::move(pgai));
6064  break;
6065 
6066  case GA_NODE_ELT_B:
6067  GMM_ASSERT1(!function_case,
6068  "No use of element_B is allowed in functions");
6069  pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
6070  gis.ctx);
6071  rmi.instructions.push_back(std::move(pgai));
6072  break;
6073 
6074  case GA_NODE_NORMAL:
6075  {
6076  GMM_ASSERT1(!function_case,
6077  "No use of Normal is allowed in functions");
6078  if (pnode->tensor().size() != m.dim())
6079  pnode->init_vector_tensor(m.dim());
6080  const mesh_im_level_set *mimls
6081  = dynamic_cast<const mesh_im_level_set *>(rmi.im);
6082  if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
6083  // Appel avec ctx (pt de Gauss)
6084  pgai = std::make_shared<ga_instruction_level_set_normal_vector>
6085  (pnode->tensor(), mimls, gis.ctx);
6086  rmi.instructions.push_back(std::move(pgai));
6087  } else {
6088  pgai = std::make_shared<ga_instruction_copy_Normal>
6089  (pnode->tensor(), gis.Normal);
6090  rmi.instructions.push_back(std::move(pgai));
6091  }
6092  }
6093  break;
6094 
6095  case GA_NODE_INTERPOLATE_X:
6096  case GA_NODE_INTERPOLATE_NORMAL:
6097  GMM_ASSERT1(!function_case,
6098  "No use of Interpolate is allowed in functions");
6099  if (pnode->tensor().size() != m.dim())
6100  pnode->init_vector_tensor(m.dim());
6101  if (pnode->node_type == GA_NODE_INTERPOLATE_X)
6102  pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
6103  (pnode->tensor(),
6104  rmi.interpolate_infos[pnode->interpolate_name].pt_y,
6105  rmi.interpolate_infos[pnode->interpolate_name]);
6106  else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
6107  pgai = std::make_shared<ga_instruction_copy_Normal>
6108  (pnode->tensor(),
6109  rmi.interpolate_infos[pnode->interpolate_name].Normal);
6110  rmi.instructions.push_back(std::move(pgai));
6111  break;
6112 
6113  case GA_NODE_INTERPOLATE_ELT_K:
6114  case GA_NODE_INTERPOLATE_ELT_B:
6115  GMM_ASSERT1(!function_case,
6116  "No use of Interpolate is allowed in functions");
6117  if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
6118  pgai = std::make_shared<ga_instruction_element_K>
6119  (pnode->tensor(),
6120  rmi.interpolate_infos[pnode->interpolate_name].ctx);
6121  else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
6122  pgai = std::make_shared<ga_instruction_element_B>
6123  (pnode->tensor(),
6124  rmi.interpolate_infos[pnode->interpolate_name].ctx);
6125  rmi.instructions.push_back(std::move(pgai));
6126  break;
6127 
6128  case GA_NODE_SECONDARY_DOMAIN_X:
6129  case GA_NODE_SECONDARY_DOMAIN_NORMAL:
6130  {
6131  GMM_ASSERT1(!function_case,
6132  "No use of Secondary_domain is allowed in functions");
6133  auto psd = workspace.secondary_domain(pnode->interpolate_name);
6134  size_type sddim = psd->mim().linked_mesh().dim();
6135  if (pnode->tensor().size() != sddim)
6136  pnode->init_vector_tensor(sddim);
6137  if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
6138  pgai = std::make_shared<ga_instruction_X>
6139  (pnode->tensor(), rmi.secondary_domain_infos.ctx);
6140  else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
6141  pgai = std::make_shared<ga_instruction_copy_Normal>
6142  (pnode->tensor(), rmi.secondary_domain_infos.Normal);
6143  rmi.instructions.push_back(std::move(pgai));
6144  }
6145  break;
6146 
6147  case GA_NODE_VAL: case GA_NODE_GRAD:
6148  case GA_NODE_HESS: case GA_NODE_DIVERG:
6149  case GA_NODE_ELEMENTARY_VAL: case GA_NODE_ELEMENTARY_GRAD:
6150  case GA_NODE_ELEMENTARY_HESS: case GA_NODE_ELEMENTARY_DIVERG:
6151  case GA_NODE_XFEM_PLUS_VAL: case GA_NODE_XFEM_PLUS_GRAD:
6152  case GA_NODE_XFEM_PLUS_HESS: case GA_NODE_XFEM_PLUS_DIVERG:
6153  case GA_NODE_XFEM_MINUS_VAL: case GA_NODE_XFEM_MINUS_GRAD:
6154  case GA_NODE_XFEM_MINUS_HESS: case GA_NODE_XFEM_MINUS_DIVERG:
6155  {
6156  bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
6157  pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
6158  pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
6159  pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
6160  if (function_case) {
6161  GMM_ASSERT1(!is_elementary,
6162  "No elementary transformation is allowed in functions");
6163  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
6164  pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
6165  pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
6166  pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
6167  "Xfem_plus not allowed in functions");
6168  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
6169  pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
6170  pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
6171  pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
6172  "Xfem_plus not allowed in functions");
6173  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6174  const im_data *imd = workspace.associated_im_data(pnode->name);
6175  GMM_ASSERT1(!mf, "No fem expression is allowed in "
6176  "function expression");
6177  GMM_ASSERT1(!imd, "No integration method data is allowed in "
6178  "function expression");
6179  if (gmm::vect_size(workspace.value(pnode->name)) == 1)
6180  pgai = std::make_shared<ga_instruction_copy_scalar>
6181  (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
6182  else
6183  pgai = std::make_shared<ga_instruction_copy_vect>
6184  (pnode->tensor().as_vector(), workspace.value(pnode->name));
6185  rmi.instructions.push_back(std::move(pgai));
6186  } else {
6187  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6188  const im_data *imd = workspace.associated_im_data(pnode->name);
6189 
6190  if (is_elementary) {
6191  mf = workspace.associated_mf(pnode->elementary_target);
6192  GMM_ASSERT1(mf && mfo,
6193  "Wrong context for elementary transformation");
6194  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6195  "The finite element of variable " << pnode->name
6196  << " has to be defined on the same mesh as the "
6197  << "integration method or interpolation used");
6198  }
6199 
6200  if (imd) {
6201  GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
6202  "Only values can be extracted on im_data (no " <<
6203  "gradient, Hessian, xfem or elementary tranformation" <<
6204  " allowed)");
6205  pgai = std::make_shared<ga_instruction_extract_local_im_data>
6206  (pnode->tensor(), *imd, workspace.value(pnode->name),
6207  gis.pai, gis.ctx, workspace.qdim(pnode->name));
6208  rmi.instructions.push_back(std::move(pgai));
6209  } else {
6210  GMM_ASSERT1(mf, "Internal error");
6211 
6212  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6213  "The finite element of variable " <<
6214  (is_elementary ? pnode->elementary_target : pnode->name)
6215  << " has to be defined on the same mesh as the "
6216  << "integration method or interpolation used");
6217 
6218  // An instruction for extracting local dofs of the variable.
6219  if (rmi.local_dofs.count(pnode->name) == 0) {
6220  rmi.local_dofs[pnode->name] = base_vector(1);
6221  extend_variable_in_gis(workspace, pnode->name, gis);
6222  // cout << "local dof of " << pnode->name << endl;
6223  size_type qmult2 = mfo->get_qdim();
6224  if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
6225  qmult2 = size_type(-1);
6226  pgai = std::make_shared<ga_instruction_slice_local_dofs>
6227  (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
6228  rmi.local_dofs[pnode->name],
6229  workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
6230  rmi.elt_instructions.push_back(std::move(pgai));
6231  }
6232 
6233  // An instruction for pfp update
6234  if (mf->is_uniform()) {
6235  if (rmi.pfps.count(mf) == 0) {
6236  rmi.pfps[mf] = 0;
6237  pgai = std::make_shared<ga_instruction_update_pfp>
6238  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6239  rmi.begin_instructions.push_back(std::move(pgai));
6240  }
6241  } else if (rmi.pfps.count(mf) == 0 ||
6242  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6243  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6244  rmi.pfps[mf] = 0;
6245  pgai = std::make_shared<ga_instruction_update_pfp>
6246  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6247  rmi.instructions.push_back(std::move(pgai));
6248  }
6249 
6250  // An instruction for the base value
6251  pgai = pga_instruction();
6252  switch (pnode->node_type) {
6253  case GA_NODE_VAL: case GA_NODE_ELEMENTARY_VAL:
6254  if (rmi.base.count(mf) == 0 ||
6255  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6256  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6257  pgai = std::make_shared<ga_instruction_val_base>
6258  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6259  }
6260  break;
6261  case GA_NODE_XFEM_PLUS_VAL:
6262  if (rmi.xfem_plus_base.count(mf) == 0 ||
6263  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6264  {
6265  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6266  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6267  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6268  }
6269  break;
6270  case GA_NODE_XFEM_MINUS_VAL:
6271  if (rmi.xfem_minus_base.count(mf) == 0 ||
6272  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6273  {
6274  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6275  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6276  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6277  }
6278  break;
6279  case GA_NODE_GRAD: case GA_NODE_DIVERG:
6280  case GA_NODE_ELEMENTARY_GRAD: case GA_NODE_ELEMENTARY_DIVERG:
6281  if (rmi.grad.count(mf) == 0 ||
6282  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6283  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6284  pgai = std::make_shared<ga_instruction_grad_base>
6285  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6286  }
6287  break;
6288  case GA_NODE_XFEM_PLUS_GRAD: case GA_NODE_XFEM_PLUS_DIVERG:
6289  if (rmi.xfem_plus_grad.count(mf) == 0 ||
6290  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6291  {
6292  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6293  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6294  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6295  }
6296  break;
6297  case GA_NODE_XFEM_MINUS_GRAD: case GA_NODE_XFEM_MINUS_DIVERG:
6298  if (rmi.xfem_minus_grad.count(mf) == 0 ||
6299  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6300  {
6301  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6302  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6303  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6304  }
6305  break;
6306  case GA_NODE_HESS: case GA_NODE_ELEMENTARY_HESS:
6307  if (rmi.hess.count(mf) == 0 ||
6308  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6309  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6310  pgai = std::make_shared<ga_instruction_hess_base>
6311  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6312  }
6313  break;
6314  case GA_NODE_XFEM_PLUS_HESS:
6315  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6316  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6317  {
6318  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6319  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6320  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6321  }
6322  break;
6323  case GA_NODE_XFEM_MINUS_HESS:
6324  if (rmi.xfem_minus_hess.count(mf) == 0 ||
6325  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6326  {
6327  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6328  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6329  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6330  }
6331  break;
6332 
6333  default : GMM_ASSERT1(false, "Internal error");
6334  }
6335  if (pgai) rmi.instructions.push_back(std::move(pgai));
6336 
6337  // The eval instruction
6338  switch (pnode->node_type) {
6339  case GA_NODE_VAL: // --> t(target_dim*Qmult)
6340  pgai = std::make_shared<ga_instruction_val>
6341  (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
6342  workspace.qdim(pnode->name));
6343  break;
6344  case GA_NODE_GRAD: // --> t(target_dim*Qmult,N)
6345  pgai = std::make_shared<ga_instruction_grad>
6346  (pnode->tensor(), rmi.grad[mf],
6347  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6348  break;
6349  case GA_NODE_HESS: // --> t(target_dim*Qmult,N,N)
6350  pgai = std::make_shared<ga_instruction_hess>
6351  (pnode->tensor(), rmi.hess[mf],
6352  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6353  break;
6354  case GA_NODE_DIVERG: // --> t(1)
6355  pgai = std::make_shared<ga_instruction_diverg>
6356  (pnode->tensor(), rmi.grad[mf],
6357  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6358  break;
6359  case GA_NODE_XFEM_PLUS_VAL: // --> t(target_dim*Qmult)
6360  pgai = std::make_shared<ga_instruction_val>
6361  (pnode->tensor(), rmi.xfem_plus_base[mf],
6362  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6363  break;
6364  case GA_NODE_XFEM_PLUS_GRAD: // --> t(target_dim*Qmult,N)
6365  pgai = std::make_shared<ga_instruction_grad>
6366  (pnode->tensor(), rmi.xfem_plus_grad[mf],
6367  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6368  break;
6369  case GA_NODE_XFEM_PLUS_HESS: // --> t(target_dim*Qmult,N,N)
6370  pgai = std::make_shared<ga_instruction_hess>
6371  (pnode->tensor(), rmi.xfem_plus_hess[mf],
6372  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6373  break;
6374  case GA_NODE_XFEM_PLUS_DIVERG: // --> t(1)
6375  pgai = std::make_shared<ga_instruction_diverg>
6376  (pnode->tensor(), rmi.xfem_plus_grad[mf],
6377  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6378  break;
6379  case GA_NODE_XFEM_MINUS_VAL: // --> t(target_dim*Qmult)
6380  pgai = std::make_shared<ga_instruction_val>
6381  (pnode->tensor(), rmi.xfem_minus_base[mf],
6382  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6383  break;
6384  case GA_NODE_XFEM_MINUS_GRAD: // --> t(target_dim*Qmult,N)
6385  pgai = std::make_shared<ga_instruction_grad>
6386  (pnode->tensor(), rmi.xfem_minus_grad[mf],
6387  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6388  break;
6389  case GA_NODE_XFEM_MINUS_HESS: // --> t(target_dim*Qmult,N,N)
6390  pgai = std::make_shared<ga_instruction_hess>
6391  (pnode->tensor(), rmi.xfem_minus_hess[mf],
6392  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6393  break;
6394  case GA_NODE_XFEM_MINUS_DIVERG: // --> t(1)
6395  pgai = std::make_shared<ga_instruction_diverg>
6396  (pnode->tensor(), rmi.xfem_minus_grad[mf],
6397  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6398  break;
6399  case GA_NODE_ELEMENTARY_VAL:
6400  { // --> t(target_dim*Qmult)
6401  ga_instruction_set::elementary_trans_info &eti
6402  = rmi.elementary_trans_infos
6403  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6404  pgai =
6405  std::make_shared<ga_instruction_elementary_trans_val>
6406  (pnode->tensor(), rmi.base[mf],
6407  rmi.local_dofs[pnode->name],
6408  workspace.qdim(pnode->elementary_target),
6409  workspace.elementary_transformation(pnode->elementary_name),
6410  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6411  }
6412  break;
6413  case GA_NODE_ELEMENTARY_GRAD:
6414  { // --> t(target_dim*Qmult,N)
6415  ga_instruction_set::elementary_trans_info &eti
6416  = rmi.elementary_trans_infos
6417  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6418  pgai =
6419  std::make_shared<ga_instruction_elementary_trans_grad>
6420  (pnode->tensor(), rmi.grad[mf],
6421  rmi.local_dofs[pnode->name],
6422  workspace.qdim(pnode->elementary_target),
6423  workspace.elementary_transformation(pnode->elementary_name),
6424  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6425  }
6426  break;
6427  case GA_NODE_ELEMENTARY_HESS:
6428  { // --> t(target_dim*Qmult,N,N)
6429  ga_instruction_set::elementary_trans_info &eti
6430  = rmi.elementary_trans_infos
6431  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6432  pgai =
6433  std::make_shared<ga_instruction_elementary_trans_hess>
6434  (pnode->tensor(), rmi.hess[mf],
6435  rmi.local_dofs[pnode->name],
6436  workspace.qdim(pnode->elementary_target),
6437  workspace.elementary_transformation(pnode->elementary_name),
6438  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6439  }
6440  break;
6441  case GA_NODE_ELEMENTARY_DIVERG:
6442  { // --> t(1)
6443  ga_instruction_set::elementary_trans_info &eti
6444  = rmi.elementary_trans_infos
6445  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6446  pgai =
6447  std::make_shared<ga_instruction_elementary_trans_diverg>
6448  (pnode->tensor(), rmi.grad[mf],
6449  rmi.local_dofs[pnode->name],
6450  workspace.qdim(pnode->elementary_target),
6451  workspace.elementary_transformation(pnode->elementary_name),
6452  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6453  }
6454  break;
6455  default: break;
6456  }
6457  rmi.instructions.push_back(std::move(pgai));
6458  }
6459  }
6460  }
6461  break;
6462 
6463  case GA_NODE_SECONDARY_DOMAIN_VAL: case GA_NODE_SECONDARY_DOMAIN_GRAD:
6464  case GA_NODE_SECONDARY_DOMAIN_HESS: case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6465  {
6466  GMM_ASSERT1(!function_case, "internal error");
6467  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6468  const im_data *imd = workspace.associated_im_data(pnode->name);
6469  const std::string &intn = pnode->interpolate_name;
6470  auto &sdi = rmi.secondary_domain_infos;
6471 
6472  fem_interpolation_context *pctx = &(sdi.ctx);
6473  papprox_integration pai = sdi.pai;
6474  psecondary_domain psd = workspace.secondary_domain(intn);
6475 
6476  if (imd) {
6477  pgai = std::make_shared<ga_instruction_extract_local_im_data>
6478  (pnode->tensor(), *imd, workspace.value(pnode->name),
6479  pai, *pctx, workspace.qdim(pnode->name));
6480  rmi.instructions.push_back(std::move(pgai));
6481  } else {
6482  GMM_ASSERT1(mf, "Internal error");
6483  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6484  "The finite element of variable " << pnode->name <<
6485  " has to be defined on the same mesh as the "
6486  "integration method or interpolation used on the "
6487  "secondary domain");
6488 
6489  // An instruction for extracting local dofs of the variable.
6490  if (sdi.local_dofs.count(pnode->name) == 0) {
6491  sdi.local_dofs[pnode->name] = base_vector(1);
6492  extend_variable_in_gis(workspace, pnode->name, gis);
6493  size_type qmult2 = mf->get_qdim();
6494  if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
6495  qmult2 = size_type(-1);
6496  pgai = std::make_shared<ga_instruction_slice_local_dofs>
6497  (*mf, *(gis.extended_vars[pnode->name]), *pctx,
6498  sdi.local_dofs[pnode->name],
6499  workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
6500  rmi.elt_instructions.push_back(std::move(pgai));
6501  }
6502 
6503  // An instruction for pfp update
6504  if (mf->is_uniform()) {
6505  if (sdi.pfps.count(mf) == 0) {
6506  sdi.pfps[mf] = 0;
6507  pgai = std::make_shared<ga_instruction_update_pfp>
6508  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6509  rmi.begin_instructions.push_back(std::move(pgai));
6510  }
6511  } else if (sdi.pfps.count(mf) == 0 ||
6512  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6513  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6514  sdi.pfps[mf] = 0;
6515  pgai = std::make_shared<ga_instruction_update_pfp>
6516  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6517  rmi.instructions.push_back(std::move(pgai));
6518  }
6519 
6520  // An instruction for the base value
6521  pgai = pga_instruction();
6522  switch (pnode->node_type) {
6523  case GA_NODE_SECONDARY_DOMAIN_VAL:
6524  if (sdi.base.count(mf) == 0 ||
6525  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6526  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6527  pgai = std::make_shared<ga_instruction_val_base>
6528  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6529  }
6530  break;
6531  case GA_NODE_SECONDARY_DOMAIN_GRAD:
6532  case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6533  if (sdi.grad.count(mf) == 0 ||
6534  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6535  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6536  pgai = std::make_shared<ga_instruction_grad_base>
6537  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6538  }
6539  break;
6540  case GA_NODE_SECONDARY_DOMAIN_HESS:
6541  if (sdi.hess.count(mf) == 0 ||
6542  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6543  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6544  pgai = std::make_shared<ga_instruction_hess_base>
6545  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6546  }
6547  break;
6548  default : GMM_ASSERT1(false, "Internal error");
6549  }
6550  if (pgai) rmi.instructions.push_back(std::move(pgai));
6551 
6552  // The eval instruction
6553  switch (pnode->node_type) {
6554  case GA_NODE_SECONDARY_DOMAIN_VAL: // --> t(target_dim*Qmult)
6555  pgai = std::make_shared<ga_instruction_val>
6556  (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6557  workspace.qdim(pnode->name));
6558  break;
6559  case GA_NODE_SECONDARY_DOMAIN_GRAD: // --> t(target_dim*Qmult,N)
6560  pgai = std::make_shared<ga_instruction_grad>
6561  (pnode->tensor(), sdi.grad[mf],
6562  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6563  break;
6564  case GA_NODE_SECONDARY_DOMAIN_HESS: // --> t(target_dim*Qmult,N,N)
6565  pgai = std::make_shared<ga_instruction_hess>
6566  (pnode->tensor(), sdi.hess[mf],
6567  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6568  break;
6569  case GA_NODE_SECONDARY_DOMAIN_DIVERG: // --> t(1)
6570  pgai = std::make_shared<ga_instruction_diverg>
6571  (pnode->tensor(), sdi.grad[mf],
6572  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6573  break;
6574  default: break;
6575  }
6576  rmi.instructions.push_back(std::move(pgai));
6577  }
6578  }
6579  break;
6580 
6581  case GA_NODE_INTERPOLATE_VAL: case GA_NODE_INTERPOLATE_GRAD:
6582  case GA_NODE_INTERPOLATE_HESS: case GA_NODE_INTERPOLATE_DIVERG:
6583  {
6584  extend_variable_in_gis(workspace, pnode->name, gis);
6585 
6586  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6587  const std::string &intn = pnode->interpolate_name;
6588  const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6589  fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6590  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6591  if (workspace.variable_group_exists(pnode->name)) {
6592  ga_instruction_set::variable_group_info &vgi =
6593  rmi.interpolate_infos[intn].groups_info[pnode->name];
6594  mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6595  }
6596 
6597  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6598  // --> t(target_dim*Qmult)
6599  pgai = std::make_shared<ga_instruction_interpolate_val>
6600  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6601  workspace.qdim(pnode->name),
6602  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6603  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6604  // --> t(target_dim*Qmult,N)
6605  pgai = std::make_shared<ga_instruction_interpolate_grad>
6606  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6607  workspace.qdim(pnode->name),
6608  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6609  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6610  // --> t(target_dim*Qmult,N,N)
6611  pgai = std::make_shared<ga_instruction_interpolate_hess>
6612  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6613  workspace.qdim(pnode->name),
6614  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6615  } else { // --> t(1)
6616  pgai = std::make_shared<ga_instruction_interpolate_diverg>
6617  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6618  workspace.qdim(pnode->name),
6619  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6620  }
6621  rmi.instructions.push_back(std::move(pgai));
6622  }
6623  break;
6624 
6625  case GA_NODE_INTERPOLATE_DERIVATIVE:
6626  GMM_ASSERT1(!function_case,
6627  "No use of Interpolate is allowed in functions");
6628  pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6629  (pnode->tensor(),
6630  rmi.interpolate_infos[pnode->interpolate_name_der]
6631  .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6632  rmi.instructions.push_back(std::move(pgai));
6633  break;
6634 
6635  case GA_NODE_VAL_TEST: case GA_NODE_GRAD_TEST:
6636  case GA_NODE_HESS_TEST: case GA_NODE_DIVERG_TEST:
6637  case GA_NODE_ELEMENTARY_VAL_TEST: case GA_NODE_ELEMENTARY_GRAD_TEST:
6638  case GA_NODE_ELEMENTARY_HESS_TEST: case GA_NODE_ELEMENTARY_DIVERG_TEST:
6639  case GA_NODE_XFEM_PLUS_VAL_TEST: case GA_NODE_XFEM_PLUS_GRAD_TEST:
6640  case GA_NODE_XFEM_PLUS_HESS_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6641  case GA_NODE_XFEM_MINUS_VAL_TEST: case GA_NODE_XFEM_MINUS_GRAD_TEST:
6642  case GA_NODE_XFEM_MINUS_HESS_TEST: case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6643  // GMM_ASSERT1(!function_case,
6644  // "Test functions not allowed in functions");
6645  {
6646  bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6647  pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6648  pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6649  pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6650  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6651  if (is_elementary) {
6652  mf = workspace.associated_mf(pnode->elementary_target);
6653  GMM_ASSERT1(mf && mfo,
6654  "Wrong context for elementary transformation");
6655  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6656  "The finite element of variable " << pnode->name
6657  << " has to be defined on the same mesh as the "
6658  << "integration method or interpolation used");
6659  }
6660 
6661  if (mf) {
6662  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6663  "The finite element of variable " <<
6664  (is_elementary ? pnode->elementary_target : pnode->name)
6665  << " and the applied integration method have to be"
6666  << " defined on the same mesh");
6667 
6668  // An instruction for pfp update
6669  if (is_uniform) {
6670  if (rmi.pfps.count(mf) == 0) {
6671  rmi.pfps[mf] = 0;
6672  pgai = std::make_shared<ga_instruction_update_pfp>
6673  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6674  rmi.begin_instructions.push_back(std::move(pgai));
6675  }
6676  } else if (rmi.pfps.count(mf) == 0 ||
6677  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6678  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6679  rmi.pfps[mf] = 0;
6680  pgai = std::make_shared<ga_instruction_update_pfp>
6681  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6682  rmi.instructions.push_back(std::move(pgai));
6683  }
6684 
6685  // An instruction for the base value
6686  pgai = pga_instruction();
6687  switch (pnode->node_type) {
6688  case GA_NODE_VAL_TEST: case GA_NODE_ELEMENTARY_VAL_TEST:
6689  if (rmi.base.count(mf) == 0 ||
6690  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6691  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6692  pgai = std::make_shared<ga_instruction_val_base>
6693  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6694  }
6695  break;
6696  case GA_NODE_XFEM_PLUS_VAL_TEST:
6697  if (rmi.xfem_plus_base.count(mf) == 0 ||
6698  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6699  {
6700  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6701  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6702  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6703  }
6704  break;
6705  case GA_NODE_XFEM_MINUS_VAL_TEST:
6706  if (rmi.xfem_minus_base.count(mf) == 0 ||
6707  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6708  {
6709  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6710  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6711  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6712  }
6713  break;
6714  case GA_NODE_GRAD_TEST: case GA_NODE_DIVERG_TEST:
6715  case GA_NODE_ELEMENTARY_GRAD_TEST:
6716  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6717  if (rmi.grad.count(mf) == 0 ||
6718  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6719  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6720  pgai = std::make_shared<ga_instruction_grad_base>
6721  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6722  }
6723  break;
6724  case GA_NODE_XFEM_PLUS_GRAD_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6725  if (rmi.xfem_plus_grad.count(mf) == 0 ||
6726  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6727  {
6728  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6729  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6730  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6731  }
6732  break;
6733  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6734  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6735  if (rmi.xfem_minus_grad.count(mf) == 0 ||
6736  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6737  {
6738  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6739  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6740  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6741  }
6742  break;
6743  case GA_NODE_HESS_TEST: case GA_NODE_ELEMENTARY_HESS_TEST:
6744  if (rmi.hess.count(mf) == 0 ||
6745  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6746  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6747  pgai = std::make_shared<ga_instruction_hess_base>
6748  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6749  }
6750  break;
6751  case GA_NODE_XFEM_PLUS_HESS_TEST:
6752  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6753  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6754  {
6755  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6756  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6757  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6758  }
6759  break;
6760  case GA_NODE_XFEM_MINUS_HESS_TEST:
6761  if (rmi.xfem_minus_hess.count(mf) == 0 ||
6762  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6763  {
6764  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6765  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6766  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6767  }
6768  break;
6769 
6770  default : GMM_ASSERT1(false, "Internal error");
6771  }
6772  if (pgai) rmi.instructions.push_back(std::move(pgai));
6773 
6774  // The copy of the real_base_value
6775  switch(pnode->node_type) {
6776  case GA_NODE_VAL_TEST:
6777  // --> t(Qmult*ndof,Qmult*target_dim)
6778  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6779  pnode->t.set_sparsity(1, mf->get_qdim());
6780  tensor_to_clear = true;
6781  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6782  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6783  } else {
6784  pgai = std::make_shared<ga_instruction_copy_val_base>
6785  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6786  }
6787  break;
6788  case GA_NODE_GRAD_TEST:
6789  // --> t(Qmult*ndof,Qmult*target_dim,N)
6790  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6791  pnode->t.set_sparsity(2, mf->get_qdim());
6792  tensor_to_clear = true;
6793  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6794  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6795  } else {
6796  pgai = std::make_shared<ga_instruction_copy_grad_base>
6797  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6798  }
6799  break;
6800  case GA_NODE_HESS_TEST:
6801  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6802  pgai = std::make_shared<ga_instruction_copy_hess_base>
6803  (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6804  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6805  pnode->t.set_sparsity(3, mf->get_qdim());
6806  break;
6807  case GA_NODE_DIVERG_TEST:
6808  // --> t(Qmult*ndof)
6809  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6810  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6811  break;
6812  case GA_NODE_XFEM_PLUS_VAL_TEST:
6813  // -->t(Qmult*ndof,Qmult*target_dim)
6814  pgai = std::make_shared<ga_instruction_copy_val_base>
6815  (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6816  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6817  pnode->t.set_sparsity(1, mf->get_qdim());
6818  break;
6819  case GA_NODE_XFEM_PLUS_GRAD_TEST:
6820  // --> t(Qmult*ndof,Qmult*target_dim,N)
6821  pgai = std::make_shared<ga_instruction_copy_grad_base>
6822  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6823  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6824  pnode->t.set_sparsity(2, mf->get_qdim());
6825  break;
6826  case GA_NODE_XFEM_PLUS_HESS_TEST:
6827  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6828  pgai = std::make_shared<ga_instruction_copy_hess_base>
6829  (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6830  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6831  pnode->t.set_sparsity(3, mf->get_qdim());
6832  break;
6833  case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6834  // --> t(Qmult*ndof)
6835  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6836  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6837  break;
6838  case GA_NODE_XFEM_MINUS_VAL_TEST:
6839  // -->t(Qmult*ndof,Qmult*target_dim)
6840  pgai = std::make_shared<ga_instruction_copy_val_base>
6841  (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6842  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6843  pnode->t.set_sparsity(1, mf->get_qdim());
6844  break;
6845  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6846  // --> t(Qmult*ndof,Qmult*target_dim,N)
6847  pgai = std::make_shared<ga_instruction_copy_grad_base>
6848  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6849  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6850  pnode->t.set_sparsity(2, mf->get_qdim());
6851  break;
6852  case GA_NODE_XFEM_MINUS_HESS_TEST:
6853  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6854  pgai = std::make_shared<ga_instruction_copy_hess_base>
6855  (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6856  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6857  pnode->t.set_sparsity(3, mf->get_qdim());
6858  break;
6859  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6860  // --> t(Qmult*ndof)
6861  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6862  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6863  break;
6864  case GA_NODE_ELEMENTARY_VAL_TEST:
6865  { // --> t(Qmult*ndof,Qmult*target_dim)
6866  ga_instruction_set::elementary_trans_info &eti
6867  = rmi.elementary_trans_infos
6868  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6869  pgai =
6870  std::make_shared<ga_instruction_elementary_trans_val_base>
6871  (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6872  workspace.elementary_transformation(pnode->elementary_name),
6873  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6874  }
6875  break;
6876  case GA_NODE_ELEMENTARY_GRAD_TEST:
6877  { // --> t(Qmult*ndof,Qmult*target_dim,N)
6878  ga_instruction_set::elementary_trans_info &eti
6879  = rmi.elementary_trans_infos
6880  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6881  pgai =
6882  std::make_shared<ga_instruction_elementary_trans_grad_base>
6883  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6884  workspace.elementary_transformation(pnode->elementary_name),
6885  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6886  }
6887  break;
6888  case GA_NODE_ELEMENTARY_HESS_TEST:
6889  { // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6890  ga_instruction_set::elementary_trans_info &eti
6891  = rmi.elementary_trans_infos
6892  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6893  pgai =
6894  std::make_shared<ga_instruction_elementary_trans_hess_base>
6895  (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6896  workspace.elementary_transformation(pnode->elementary_name),
6897  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6898  }
6899  break;
6900  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6901  { // --> t(Qmult*ndof)
6902  ga_instruction_set::elementary_trans_info &eti
6903  = rmi.elementary_trans_infos
6904  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6905  pgai =
6906  std::make_shared<ga_instruction_elementary_trans_diverg_base>
6907  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6908  workspace.elementary_transformation(pnode->elementary_name),
6909  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6910  }
6911  break;
6912  default: break;
6913  }
6914  if (pgai) rmi.instructions.push_back(std::move(pgai));
6915  }
6916  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6917  }
6918  break;
6919 
6920  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6921  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6922  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6923  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6924  {
6925  GMM_ASSERT1(!function_case, "internal error");
6926  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6927  const std::string &intn = pnode->interpolate_name;
6928  auto &sdi = rmi.secondary_domain_infos;
6929 
6930  fem_interpolation_context *pctx = &(sdi.ctx);
6931  papprox_integration pai = sdi.pai;
6932  psecondary_domain psd = workspace.secondary_domain(intn);
6933  if (mf) {
6934  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6935  "The finite element of variable " << pnode->name <<
6936  " and the applied integration method have to be"
6937  " defined on the same mesh for secondary domain");
6938 
6939  // An instruction for pfp update
6940  if (is_uniform) {
6941  if (sdi.pfps.count(mf) == 0) {
6942  sdi.pfps[mf] = 0;
6943  pgai = std::make_shared<ga_instruction_update_pfp>
6944  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6945  rmi.begin_instructions.push_back(std::move(pgai));
6946  }
6947  } else if (sdi.pfps.count(mf) == 0 ||
6948  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6949  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6950  sdi.pfps[mf] = 0;
6951  pgai = std::make_shared<ga_instruction_update_pfp>
6952  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6953  rmi.instructions.push_back(std::move(pgai));
6954  }
6955 
6956  // An instruction for the base value
6957  pgai = pga_instruction();
6958  switch (pnode->node_type) {
6959  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6960  if (sdi.base.count(mf) == 0 ||
6961  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6962  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6963  pgai = std::make_shared<ga_instruction_val_base>
6964  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6965  }
6966  break;
6967  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6968  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6969  if (sdi.grad.count(mf) == 0 ||
6970  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6971  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6972  pgai = std::make_shared<ga_instruction_grad_base>
6973  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6974  }
6975  break;
6976  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6977  if (sdi.hess.count(mf) == 0 ||
6978  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6979  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6980  pgai = std::make_shared<ga_instruction_hess_base>
6981  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6982  }
6983  break;
6984  default : GMM_ASSERT1(false, "Internal error");
6985  }
6986  if (pgai) rmi.instructions.push_back(std::move(pgai));
6987 
6988  // The copy of the real_base_value
6989  switch(pnode->node_type) {
6990  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6991  // --> t(Qmult*ndof,Qmult*target_dim)
6992  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6993  pnode->t.set_sparsity(1, mf->get_qdim());
6994  tensor_to_clear = true;
6995  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6996  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6997  } else {
6998  pgai = std::make_shared<ga_instruction_copy_val_base>
6999  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
7000  }
7001  break;
7002  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
7003  // --> t(Qmult*ndof,Qmult*target_dim,N)
7004  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
7005  pnode->t.set_sparsity(2, mf->get_qdim());
7006  tensor_to_clear = true;
7007  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
7008  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7009  } else {
7010  pgai = std::make_shared<ga_instruction_copy_grad_base>
7011  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7012  }
7013  break;
7014  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
7015  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
7016  pgai = std::make_shared<ga_instruction_copy_hess_base>
7017  (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
7018  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
7019  pnode->t.set_sparsity(3, mf->get_qdim());
7020  break;
7021  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
7022  // --> t(Qmult*ndof)
7023  pgai = std::make_shared<ga_instruction_copy_diverg_base>
7024  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7025  break;
7026  default: break;
7027  }
7028  if (pgai) rmi.instructions.push_back(std::move(pgai));
7029  }
7030  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7031  }
7032  break;
7033 
7034  case GA_NODE_INTERPOLATE_VAL_TEST: case GA_NODE_INTERPOLATE_GRAD_TEST:
7035  case GA_NODE_INTERPOLATE_HESS_TEST: case GA_NODE_INTERPOLATE_DIVERG_TEST:
7036  {
7037  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
7038  const std::string &intn = pnode->interpolate_name;
7039  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
7040  if (workspace.variable_group_exists(pnode->name)) {
7041  ga_instruction_set::variable_group_info &vgi =
7042  rmi.interpolate_infos[intn].groups_info[pnode->name];
7043  mfg = &(vgi.mf); mfn = 0;
7044  }
7045 
7046  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
7047  // --> t(Qmult*ndof,Qmult*target_dim)
7048  pgai = std::make_shared<ga_instruction_interpolate_val_base>
7049  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7050  workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
7051  gis.fp_pool);
7052  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
7053  // --> t(Qmult*ndof,Qmult*target_dim,N)
7054  pgai = std::make_shared<ga_instruction_interpolate_grad_base>
7055  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7056  workspace.qdim(pnode->name),
7057  rmi.interpolate_infos[intn], gis.fp_pool);
7058  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
7059  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
7060  pgai = std::make_shared<ga_instruction_interpolate_hess_base>
7061  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7062  workspace.qdim(pnode->name),
7063  rmi.interpolate_infos[intn], gis.fp_pool);
7064  } else { // if (pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST) {
7065  // --> t(Qmult*ndof)
7066  pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
7067  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7068  workspace.qdim(pnode->name),
7069  rmi.interpolate_infos[intn], gis.fp_pool);
7070  }
7071  rmi.instructions.push_back(std::move(pgai));
7072  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7073  }
7074  break;
7075 
7076  case GA_NODE_OP:
7077  switch(pnode->op_type) {
7078 
7079  case GA_PLUS:
7080  if (pnode->tensor().size() == 1) {
7081  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7082  "Internal error: child0 not scalar");
7083  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7084  "Internal error: child1 not scalar");
7085  pgai = std::make_shared<ga_instruction_scalar_add>
7086  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7087  } else {
7088  pgai = std::make_shared<ga_instruction_add>
7089  (pnode->tensor(), child0->tensor(), child1->tensor());
7090  }
7091  if (child0->t.sparsity() == child1->t.sparsity()
7092  && child0->t.qdim() == child1->t.qdim())
7093  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7094  rmi.instructions.push_back(std::move(pgai));
7095  break;
7096 
7097  case GA_MINUS:
7098  if (pnode->tensor().size() == 1) {
7099  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7100  "Internal error: child0 not scalar");
7101  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7102  "Internal error: child1 not scalar");
7103  pgai = std::make_shared<ga_instruction_scalar_sub>
7104  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7105  } else {
7106  pgai = std::make_shared<ga_instruction_sub>
7107  (pnode->tensor(), child0->tensor(), child1->tensor());
7108  }
7109  if (child0->t.sparsity() == child1->t.sparsity()
7110  && child0->t.qdim() == child1->t.qdim())
7111  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7112  rmi.instructions.push_back(std::move(pgai));
7113  break;
7114 
7115  case GA_UNARY_MINUS:
7116  if (pnode->tensor().size() == 1) {
7117  GA_DEBUG_ASSERT(child0->tensor().size() == 1, "Internal error");
7118  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7119  (pnode->tensor()[0], child0->tensor()[0], minus);
7120  } else {
7121  pgai = std::make_shared<ga_instruction_scalar_mult>
7122  (pnode->tensor(), child0->tensor(), minus);
7123  }
7124  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7125  rmi.instructions.push_back(std::move(pgai));
7126  break;
7127 
7128 
7129  case GA_DOT: case GA_COLON: case GA_MULT:
7130  {
7131  size_type tps0 = child0->tensor_proper_size();
7132  size_type tps1 = child1->tensor_proper_size();
7133  size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
7134  size_type s2 = size_type(round(sqrt(scalar_type(s1))));
7135 
7136  pgai = pga_instruction();
7137  if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
7138  (pnode->op_type == GA_COLON && dim1 <= 2) ||
7139  (pnode->op_type == GA_MULT && dim0 == 4) ||
7140  (pnode->op_type == GA_MULT && dim1 <= 1) ||
7141  child0->tensor().size() == 1 || tps1 == 1) {
7142 
7143  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7144  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7145  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7146  }
7147  else if (child0->tensor().size() == 1) {
7148  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7149  pgai = std::make_shared<ga_instruction_scalar_mult>
7150  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7151  }
7152  else if (child1->tensor().size() == 1) {
7153  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7154  pgai = std::make_shared<ga_instruction_scalar_mult>
7155  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7156  }
7157  else if (pnode->test_function_type < 3) {
7158  if (tps0 == 1) {
7159  if (is_uniform) // Unrolled instruction
7160  pgai = ga_uniform_instruction_simple_tmult
7161  (pnode->tensor(), child0->tensor(), child1->tensor());
7162  else
7163  pgai = std::make_shared<ga_instruction_simple_tmult>
7164  (pnode->tensor(), child0->tensor(), child1->tensor());
7165  } else {
7166  if (tps1 == 1) {
7167  if (is_uniform) // Unrolled instruction
7168  pgai = ga_uniform_instruction_simple_tmult
7169  (pnode->tensor(), child1->tensor(), child0->tensor());
7170  else
7171  pgai = std::make_shared<ga_instruction_simple_tmult>
7172  (pnode->tensor(), child1->tensor(), child0->tensor());
7173  } else if (is_uniform) // Unrolled instruction
7174  pgai = ga_uniform_instruction_contraction_switch
7175  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7176  else // Unrolled instruction
7177  pgai = ga_instruction_contraction_switch
7178  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7179  }
7180  } else {
7181  if (child1->test_function_type == 1 ||
7182  child1->test_function_type == 3) {
7183  if (child1->test_function_type == 3 ||
7184  child1->tensor_proper_size() <= s2) {
7185  if (tps0 == 1) {
7186  if (is_uniform) { // Unrolled instruction
7187  pgai = ga_uniform_instruction_simple_tmult
7188  (pnode->tensor(), child1->tensor(), child0->tensor());
7189  } else
7190  pgai = std::make_shared<ga_instruction_simple_tmult>
7191  (pnode->tensor(), child1->tensor(), child0->tensor());
7192  } else if (is_uniform) // Unrolled instruction
7193  pgai = ga_uniform_instruction_contraction_switch
7194  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7195  else // Unrolled instruction
7196  pgai = ga_instruction_contraction_switch
7197  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7198  } else
7199  pgai = std::make_shared<ga_instruction_spec_contraction>
7200  (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
7201  } else if (child1->test_function_type == 0 ||
7202  (child0->tensor_proper_size() == s2 &&
7203  child1->tensor_proper_size() == s2)) {
7204  if (tps0 == 1) {
7205  if (is_uniform) { // Unrolled instruction
7206  pgai = ga_uniform_instruction_simple_tmult
7207  (pnode->tensor(), child0->tensor(), child1->tensor());
7208  } else
7209  pgai = std::make_shared<ga_instruction_simple_tmult>
7210  (pnode->tensor(), child0->tensor(), child1->tensor());
7211  } else {
7212  if (is_uniform) // Unrolled instruction
7213  pgai = ga_uniform_instruction_contraction_switch
7214  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7215  else // Unrolled instruction
7216  pgai = ga_instruction_contraction_switch
7217  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7218  }
7219  } else {
7220  if (child0->tensor_proper_size() == s2)
7221  pgai = ga_uniform_instruction_contraction_switch
7222  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7223  else if (child1->tensor_proper_size() == s2)
7224  pgai = std::make_shared<ga_instruction_spec_contraction>
7225  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7226  else
7227  pgai = std::make_shared<ga_instruction_spec2_contraction>
7228  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7229  }
7230  }
7231  } else { // GA_MULT or GA_DOT for dim1 > 1 or GA_COLON for dim1 > 2
7232  // and child1->tensor_proper_size() > 1
7233  if (pnode->test_function_type < 3) {
7234  if (tps0 == 1) {
7235  if (is_uniform) // Unrolled instruction
7236  pgai = ga_uniform_instruction_simple_tmult
7237  (pnode->tensor(), child0->tensor(), child1->tensor());
7238  else
7239  pgai = std::make_shared<ga_instruction_simple_tmult>
7240  (pnode->tensor(), child0->tensor(), child1->tensor());
7241  } else {
7242  if (child1->test_function_type == 0)
7243  pgai = std::make_shared<ga_instruction_matrix_mult>
7244  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7245  else
7246  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7247  (pnode->tensor(), child0->tensor(), child1->tensor(),
7248  s2, tps0/s2, tps1/s2);
7249  }
7250  } else {
7251  if (child0->tensor_proper_size() == 1) {
7252  if (child0->test_function_type == 0 ||
7253  child0->test_function_type == 1) {
7254  if (is_uniform) // Unrolled instruction
7255  pgai = ga_uniform_instruction_simple_tmult
7256  (pnode->tensor(), child0->tensor(), child1->tensor());
7257  else
7258  pgai = std::make_shared<ga_instruction_simple_tmult>
7259  (pnode->tensor(), child0->tensor(), child1->tensor());
7260  } else
7261  pgai = std::make_shared<ga_instruction_spec_tmult>
7262  (pnode->tensor(), child1->tensor(), child0->tensor(),
7263  tps1, tps0);
7264  } else {
7265  if (child1->test_function_type == 0)
7266  pgai = std::make_shared<ga_instruction_matrix_mult>
7267  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7268  else if (child1->test_function_type == 2)
7269  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7270  (pnode->tensor(), child0->tensor(), child1->tensor(),
7271  s2, tps0/s2, tps1/s2);
7272  else
7273  pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
7274  (pnode->tensor(), child0->tensor(), child1->tensor(),
7275  s2, tps0/s2, tps1/s2);
7276  }
7277  }
7278  }
7279  rmi.instructions.push_back(std::move(pgai));
7280  }
7281  break;
7282 
7283  case GA_DIV:
7284  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7285  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7286  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7287  } else if (child1->tensor().size() == 1) {
7288  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7289  pgai = std::make_shared<ga_instruction_scalar_div>
7290  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7291  } else GMM_ASSERT1(false, "Internal error");
7292  rmi.instructions.push_back(std::move(pgai));
7293  break;
7294 
7295  case GA_PRINT:
7296  pnode->t.set_to_copy(child0->t);
7297  pgai = std::make_shared<ga_instruction_print_tensor>
7298  (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
7299  rmi.instructions.push_back(std::move(pgai));
7300  break;
7301 
7302  case GA_QUOTE:
7303  if (pnode->tensor_proper_size() > 1) {
7304  size_type n1 = child0->tensor_proper_size(0);
7305  size_type n2 = (child0->tensor_order() > 1) ?
7306  child0->tensor_proper_size(1) : 1;
7307  size_type nn = 1;
7308  for (size_type i = 2; i < child0->tensor_order(); ++i)
7309  nn *= child0->tensor_proper_size(i);
7310  if (child0->nb_test_functions() == 0)
7311  pgai = std::make_shared<ga_instruction_transpose_no_test>
7312  (pnode->tensor(), child0->tensor(), n1, n2, nn);
7313  else
7314  pgai = std::make_shared<ga_instruction_transpose>
7315  (pnode->tensor(), child0->tensor(), n1, n2, nn);
7316  rmi.instructions.push_back(std::move(pgai));
7317  } else {
7318  pnode->t.set_to_copy(child0->t);
7319  }
7320  break;
7321 
7322  case GA_SYM:
7323  if (pnode->tensor_proper_size() != 1) {
7324  pgai = std::make_shared<ga_instruction_sym>
7325  (pnode->tensor(), child0->tensor());
7326  rmi.instructions.push_back(std::move(pgai));
7327  } else {
7328  pnode->t.set_to_copy(child0->t);
7329  }
7330  break;
7331 
7332  case GA_SKEW:
7333  {
7334  pgai = std::make_shared<ga_instruction_skew>
7335  (pnode->tensor(), child0->tensor());
7336  rmi.instructions.push_back(std::move(pgai));
7337  }
7338  break;
7339 
7340  case GA_TRACE:
7341  {
7342  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7343  if (N == 1) {
7344  pnode->t.set_to_copy(child0->t);
7345  } else {
7346  pgai = std::make_shared<ga_instruction_trace>
7347  (pnode->tensor(), child0->tensor(), N);
7348  rmi.instructions.push_back(std::move(pgai));
7349  }
7350  }
7351  break;
7352 
7353  case GA_DEVIATOR:
7354  {
7355  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7356  pgai = std::make_shared<ga_instruction_deviator>
7357  (pnode->tensor(), child0->tensor(), N);
7358  rmi.instructions.push_back(std::move(pgai));
7359  }
7360  break;
7361 
7362  case GA_DOTMULT:
7363 
7364  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7365  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7366  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7367  } else if (child0->tensor().size() == 1) {
7368  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7369  pgai = std::make_shared<ga_instruction_scalar_mult>
7370  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7371  }
7372  else if (child1->tensor().size() == 1) {
7373  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7374  pgai = std::make_shared<ga_instruction_scalar_mult>
7375  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7376  }
7377  else if (child1->test_function_type == 0)
7378  pgai = std::make_shared<ga_instruction_dotmult>
7379  (pnode->tensor(), child0->tensor(), child1->tensor());
7380  else if (child0->test_function_type == 0)
7381  pgai = std::make_shared<ga_instruction_dotmult>
7382  (pnode->tensor(), child1->tensor(), child0->tensor());
7383  else if (child0->test_function_type == 1)
7384  pgai = std::make_shared<ga_instruction_dotmult_spec>
7385  (pnode->tensor(), child0->tensor(), child1->tensor());
7386  else
7387  pgai = std::make_shared<ga_instruction_dotmult_spec>
7388  (pnode->tensor(), child1->tensor(), child0->tensor());
7389 
7390  rmi.instructions.push_back(std::move(pgai));
7391  break;
7392 
7393 
7394  case GA_DOTDIV:
7395  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7396  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7397  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7398  } else if (child1->tensor().size() == 1) {
7399  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7400  pgai = std::make_shared<ga_instruction_scalar_div>
7401  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7402  } else if (child1->test_function_type == 0) {
7403  pgai = std::make_shared<ga_instruction_dotdiv>
7404  (pnode->tensor(), child0->tensor(), child1->tensor());
7405  } else GMM_ASSERT1(false, "Internal error");
7406  rmi.instructions.push_back(std::move(pgai));
7407  break;
7408 
7409 
7410  case GA_TMULT:
7411  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7412  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7413  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7414  } else if (child0->tensor().size() == 1) {
7415  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7416  pgai = std::make_shared<ga_instruction_scalar_mult>
7417  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7418  }
7419  else if (child1->tensor().size() == 1) {
7420  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7421  pgai = std::make_shared<ga_instruction_scalar_mult>
7422  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7423  }
7424  else if (child1->test_function_type == 0) {
7425  if (is_uniform) // Unrolled instruction
7426  pgai = ga_uniform_instruction_simple_tmult
7427  (pnode->tensor(), child0->tensor(), child1->tensor());
7428  else
7429  pgai = std::make_shared<ga_instruction_simple_tmult>
7430  (pnode->tensor(), child0->tensor(), child1->tensor());
7431  } else if (child1->tensor_proper_size() == 1)
7432  pgai = std::make_shared<ga_instruction_spec2_tmult>
7433  (pnode->tensor(), child0->tensor(), child1->tensor());
7434  else
7435  pgai = std::make_shared<ga_instruction_spec_tmult>
7436  (pnode->tensor(), child0->tensor(), child1->tensor(),
7437  child0->tensor_proper_size(),
7438  child1->tensor_proper_size());
7439 
7440  rmi.instructions.push_back(std::move(pgai));
7441  break;
7442 
7443  default:GMM_ASSERT1(false, "Unexpected operation. Internal error.");
7444  }
7445  break;
7446 
7447  case GA_NODE_C_MATRIX:
7448  {
7449  if (pnode->test_function_type) {
7450  std::vector<const base_tensor *> components(pnode->children.size());
7451  for (size_type i = 0; i < pnode->children.size(); ++i)
7452  components[i] = &(pnode->children[i]->tensor());
7453  pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
7454  (pnode->tensor(), components);
7455  } else {
7456  std::vector<scalar_type *> components(pnode->children.size());
7457  for (size_type i = 0; i < pnode->children.size(); ++i)
7458  components[i] = &(pnode->children[i]->tensor()[0]);
7459  pgai = std::make_shared<ga_instruction_simple_c_matrix>
7460  (pnode->tensor(), components);
7461  }
7462  rmi.instructions.push_back(std::move(pgai));
7463  }
7464  break;
7465 
7466  case GA_NODE_PARAMS:
7467  if (child0->node_type == GA_NODE_RESHAPE) {
7468  pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
7469  child1->tensor());
7470  rmi.instructions.push_back(std::move(pgai));
7471  } else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
7472  pga_tree_node child2 = pnode->children[2];
7473  if (child1->test_function_type==2 && child2->test_function_type==1)
7474  pgai = std::make_shared<ga_instruction_cross_product_tf>
7475  (pnode->tensor(), child2->tensor(), child1->tensor(), true);
7476  else if (child1->test_function_type || child2->test_function_type)
7477  pgai = std::make_shared<ga_instruction_cross_product_tf>
7478  (pnode->tensor(), child1->tensor(), child2->tensor(), false);
7479  else
7480  pgai = std::make_shared<ga_instruction_cross_product>
7481  (pnode->tensor(), child1->tensor(), child2->tensor());
7482  rmi.instructions.push_back(std::move(pgai));
7483  } else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
7484  size_type ind;
7485  ind = size_type(round(pnode->children[2]->tensor()[0])-1);
7486  size_type ii2 = 1;
7487  for (size_type i = 0; i < child1->tensor_order(); ++i)
7488  if (i>ind) ii2 *= child1->tensor_proper_size(i);
7489  size_type nn = child1->tensor_proper_size(ind);
7490  pgai = std::make_shared<ga_instruction_index_move_last>
7491  (pnode->tensor(), child1->tensor(), nn, ii2);
7492  rmi.instructions.push_back(std::move(pgai));
7493  } else if (child0->node_type == GA_NODE_SWAP_IND) {
7494  size_type ind[4];
7495  for (size_type i = 2; i < 4; ++i)
7496  ind[i] = size_type(round(pnode->children[i]->tensor()[0])-1);
7497  if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
7498  size_type ii2 = 1, ii3 = 1;
7499  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7500  if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
7501  if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
7502  }
7503  size_type nn1 = child1->tensor_proper_size(ind[2]);
7504  size_type nn2 = child1->tensor_proper_size(ind[3]);
7505 
7506  pgai = std::make_shared<ga_instruction_swap_indices>
7507  (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
7508  rmi.instructions.push_back(std::move(pgai));
7509  } else if (child0->node_type == GA_NODE_CONTRACT) {
7510  std::vector<size_type> ind(2), indsize(2);
7511  pga_tree_node child2(0);
7512  if (pnode->children.size() == 4)
7513  { ind[0] = 2; ind[1] = 3; }
7514  else if (pnode->children.size() == 5)
7515  { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
7516  else if (pnode->children.size() == 7) {
7517  ind.resize(4); indsize.resize(4);
7518  ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
7519  child2 = pnode->children[4];
7520  }
7521  size_type kk = 0, ll = 1;
7522  for (size_type i = 1; i < pnode->children.size(); ++i) {
7523  if (i == ind[kk]) {
7524  ind[kk] = size_type(round(pnode->children[i]->tensor()[0])-1);
7525  indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
7526  ++kk;
7527  } else ll = i;
7528  }
7529 
7530  if (pnode->children.size() == 4) {
7531  size_type i1 = ind[0], i2 = ind[1];
7532  if (i1 > i2) std::swap(i1, i2);
7533  size_type ii2 = 1, ii3 = 1;
7534  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7535  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7536  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7537  }
7538  pgai = std::make_shared<ga_instruction_contract_1_1>
7539  (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7540  }
7541  else if (pnode->children.size() == 5) {
7542  // Particular cases should be detected (ii2=ii3=1 in particular).
7543  size_type i1 = ind[0], i2 = ind[1];
7544  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7545  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7546  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7547  if (i > i1) ii2 *= child1->tensor_proper_size(i);
7548  }
7549  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7550  if (i < i2) ii3 *= child2->tensor_proper_size(i);
7551  if (i > i2) ii4 *= child2->tensor_proper_size(i);
7552  }
7553  if (child1->test_function_type==1 && child2->test_function_type==2)
7554  pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7555  (pnode->tensor(), child1->tensor(), child2->tensor(),
7556  indsize[0], ii1, ii2, ii3, ii4);
7557  else
7558  pgai = std::make_shared<ga_instruction_contract_2_1>
7559  (pnode->tensor(), child1->tensor(), child2->tensor(),
7560  indsize[0], ii1, ii2, ii3, ii4);
7561  }
7562  else if (pnode->children.size() == 7) {
7563  // Particular cases should be detected (ii2=ii3=1 in particular).
7564  size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7565  size_type nn1 = indsize[0], nn2 = indsize[1];
7566  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7567  if (i1 > i2)
7568  { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7569  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7570  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7571  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7572  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7573  }
7574  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7575  if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7576  if ((i > i3 && i < i4) || (i > i4 && i < i3))
7577  ii5 *= child2->tensor_proper_size(i);
7578  if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7579  }
7580  if (child1->test_function_type==1 && child2->test_function_type==2)
7581  pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7582  (pnode->tensor(), child1->tensor(), child2->tensor(),
7583  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7584  else
7585  pgai = std::make_shared<ga_instruction_contract_2_2>
7586  (pnode->tensor(), child1->tensor(), child2->tensor(),
7587  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7588  }
7589  rmi.instructions.push_back(std::move(pgai));
7590  } else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7591 
7592  std::string name = child0->name;
7593  const ga_predef_function_tab &PREDEF_FUNCTIONS
7595  ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7596  const ga_predef_function &F = it->second;
7597  size_type nbargs = F.nbargs();
7598  pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7599 
7600  if (nbargs == 1) {
7601  if (child1->tensor().size() == 1) {
7602  if (F.ftype() == 0)
7603  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7604  (pnode->tensor()[0], child1->tensor()[0], F.f1());
7605  else
7606  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7607  (pnode->tensor()[0], child1->tensor()[0], F);
7608  } else {
7609  if (F.ftype() == 0)
7610  pgai = std::make_shared<ga_instruction_eval_func_1arg>
7611  (pnode->tensor(), child1->tensor(), F.f1());
7612  else
7613  pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7614  (pnode->tensor(), child1->tensor(), F);
7615  }
7616  } else {
7617  if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7618  if (F.ftype() == 0)
7619  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7620  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7621  F.f2());
7622  else
7623  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7624  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7625  F);
7626  } else if (child1->tensor().size() == 1) {
7627  if (F.ftype() == 0)
7628  pgai =
7629  std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7630  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7631  else
7632  pgai =
7633  std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7634  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7635  } else if (child2->tensor().size() == 1) {
7636  if (F.ftype() == 0)
7637  pgai =
7638  std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7639  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7640  else
7641  pgai =
7642  std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7643  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7644  } else {
7645  if (F.ftype() == 0)
7646  pgai = std::make_shared<ga_instruction_eval_func_2arg>
7647  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7648  else
7649  pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7650  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7651  }
7652  }
7653  rmi.instructions.push_back(std::move(pgai));
7654 
7655  } else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7656 
7657  GMM_ASSERT1(false, "Internal error");
7658 
7659  } else if (child0->node_type == GA_NODE_OPERATOR) {
7660 
7661  ga_predef_operator_tab &PREDEF_OPERATORS
7663  ga_predef_operator_tab::T::iterator it
7664  = PREDEF_OPERATORS.tab.find(child0->name);
7665  const ga_nonlinear_operator &OP = *(it->second);
7666  ga_nonlinear_operator::arg_list args;
7667  for (size_type i = 1; i < pnode->children.size(); ++i)
7668  args.push_back(&(pnode->children[i]->tensor()));
7669 
7670  if (child0->der1 && child0->der2 == 0) {
7671  pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7672  (pnode->tensor(), OP, args, child0->der1);
7673  } else if (child0->der1 && child0->der2) {
7674  pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7675  (pnode->tensor(), OP, args, child0->der1, child0->der2);
7676  } else {
7677  pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7678  OP, args);
7679  }
7680  rmi.instructions.push_back(std::move(pgai));
7681 
7682  } else { // Access to a component of the tensor
7683  bgeot::multi_index mi1(size0.size()), indices;
7684  size_type nb_test = pnode->nb_test_functions();
7685  if (pnode->tensor().size() == 1) {
7686  for (size_type i = 0; i < child0->tensor_order(); ++i)
7687  mi1[i+nb_test] = size_type(round(pnode->children[i+1]->tensor()[0])-1);
7688  pgai = std::make_shared<ga_instruction_copy_scalar>
7689  (pnode->tensor()[0], child0->tensor()(mi1));
7690  } else {
7691  for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7692  for (size_type i = 0; i < child0->tensor_order(); ++i) {
7693  if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7694  mi1[i+nb_test]
7695  = size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7696  else
7697  indices.push_back(i+nb_test);
7698  }
7699  pgai = std::make_shared<ga_instruction_tensor_slice>
7700  (pnode->tensor(), child0->tensor(), mi1, indices);
7701  }
7702  rmi.instructions.push_back(std::move(pgai));
7703  }
7704 
7705  break;
7706 
7707  default:GMM_ASSERT1(false, "Unexpected node type " << pnode->node_type
7708  << " in compilation. Internal error.");
7709  }
7710  if (tensor_to_clear) {
7711  gmm::clear(pnode->tensor().as_vector());
7712  if (!is_uniform) {
7713  pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7714  rmi.elt_instructions.push_back(std::move(pgai));
7715  }
7716  }
7717  rmi.node_list[pnode->hash_value].push_back(pnode);
7718  } // ga_compile_node
7719 
7720  void ga_compile_function(ga_workspace &workspace,
7721  ga_instruction_set &gis, bool scalar) {
7722  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7723  const ga_workspace::tree_description &td = workspace.tree_info(i);
7724 
7725  gis.trees.push_back(*(td.ptree));
7726  pga_tree_node root = gis.trees.back().root;
7727  if (root) {
7728  GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7729  "The result of the given expression is not a scalar");
7730  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7731  gis.all_instructions[rm].m = td.m;
7732  ga_if_hierarchy if_hierarchy;
7733  ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7734  *(td.m), true, if_hierarchy);
7735 
7736  gis.coeff = scalar_type(1);
7737  pga_instruction pgai;
7738  workspace.assembled_tensor() = root->tensor();
7739  pgai = std::make_shared<ga_instruction_add_to_coeff>
7740  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7741  gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7742  }
7743  }
7744  }
7745 
7746  static bool ga_node_used_interpolates
7747  (const pga_tree_node pnode, const ga_workspace &workspace,
7748  std::map<std::string, std::set<std::string> > &interpolates,
7749  std::set<std::string> &interpolates_der) {
7750  bool found = false;
7751  bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7752  pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7753  pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7754  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7755  bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7756  pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7757  pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7758  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7759 
7760  if (intrpl || intrpl_test ||
7761  pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7762  pnode->node_type == GA_NODE_INTERPOLATE_X ||
7763  pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7764  interpolates[pnode->interpolate_name].size();
7765  if (intrpl || intrpl_test) {
7766  if (workspace.variable_group_exists(pnode->name))
7767  interpolates[pnode->interpolate_name].insert(pnode->name);
7768  }
7769  found = true;
7770  }
7771  if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7772  interpolates_der.insert(pnode->interpolate_name_der);
7773  interpolates[pnode->interpolate_name_der].size();
7774  if (workspace.variable_group_exists(pnode->name))
7775  interpolates[pnode->interpolate_name_der].insert(pnode->name);
7776  }
7777  for (size_type i = 0; i < pnode->children.size(); ++i)
7778  found = ga_node_used_interpolates(pnode->children[i], workspace,
7779  interpolates, interpolates_der)
7780  || found;
7781  return found;
7782  }
7783 
7784 
7785  static void ga_compile_interpolate_trans
7786  (const pga_tree_node pnode, const ga_workspace &workspace,
7787  ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7788  const mesh &m) {
7789 
7790  std::set<std::string> interpolates_der;
7791  std::map<std::string, std::set<std::string> > transformations;
7792  ga_node_used_interpolates(pnode, workspace, transformations,
7793  interpolates_der);
7794 
7795  for (const auto &transformation : transformations) {
7796  const std::string &transname = transformation.first;
7797  bool compute_der = (interpolates_der.count(transname) != 0);
7798  if (rmi.transformations.count(transname) == 0 ||
7799  (compute_der && rmi.transformations_der.count(transname) == 0)) {
7800  rmi.transformations[transname].size();
7801  gis.transformations.insert(transname);
7802  if (compute_der) rmi.transformations_der.insert(transname);
7803  pga_instruction pgai;
7804  if (transname.compare("neighbor_element") == 0 ||
7805  transname.compare("neighbour_elt") == 0) {
7806  pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7807  (workspace, rmi.interpolate_infos[transname],
7808  workspace.interpolate_transformation(transname), gis.ctx,
7809  m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7810  } else {
7811  pgai = std::make_shared<ga_instruction_transformation_call>
7812  (workspace, rmi.interpolate_infos[transname],
7813  workspace.interpolate_transformation(transname), gis.ctx,
7814  gis.Normal, m, compute_der);
7815  }
7816  if (pgai) rmi.instructions.push_back(std::move(pgai));
7817  }
7818 
7819  for (const std::string &nodename : transformation.second) {
7820  if (rmi.transformations[transname].count(nodename) == 0) {
7821  auto&& inin = rmi.interpolate_infos[transname];
7822  pga_instruction pgai =
7823  std::make_shared<ga_instruction_update_group_info>
7824  (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7825  rmi.instructions.push_back(std::move(pgai));
7826  rmi.transformations[transname].insert(nodename);
7827  }
7828  }
7829  }
7830  }
7831 
7832  void ga_compile_interpolation(ga_workspace &workspace,
7833  ga_instruction_set &gis) {
7834  gis.transformations.clear();
7835  gis.all_instructions.clear();
7836  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7837  const ga_workspace::tree_description &td = workspace.tree_info(i);
7838  if (td.operation != ga_workspace::ASSEMBLY) {
7839  gis.trees.push_back(*(td.ptree));
7840 
7841  // Semantic analysis mainly to evaluate fixed size variables and data
7842  const mesh *m = td.m;
7843  GMM_ASSERT1(m, "Internal error");
7844  ga_semantic_analysis(gis.trees.back(), workspace, *m,
7845  ref_elt_dim_of_mesh(*m, *(td.rg)), true, false);
7846  pga_tree_node root = gis.trees.back().root;
7847  if (root) {
7848  // Compile tree
7849  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7850  auto &rmi = gis.all_instructions[rm];
7851  rmi.m = td.m;
7852  rmi.im = td.mim;
7853  // rmi.interpolate_infos.clear();
7854  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7855  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
7856  rmi.current_hierarchy);
7857 
7858  // After compile tree
7859  workspace.assembled_tensor() = root->tensor();
7860  pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7861  (workspace.assembled_tensor(), root->tensor());
7862  rmi.instructions.push_back(std::move(pgai));
7863  }
7864  }
7865  }
7866  }
7867 
7868 
7869  struct var_set : std::map<std::string,size_type> {
7870  // This class indexes variable names in the order of their addition
7871  size_type operator[](const std::string &name) {
7872  if (name.empty()) return size_type(-1);
7873  size_type id = size();
7874  auto it = find(name);
7875  if (it == end()) {
7876  emplace(name, id);
7877  return id;
7878  }
7879  return it->second;
7880  }
7881  std::string operator[](const size_type &id) const {
7882  for (const auto &key_value : *this) // brute force reverse search
7883  if (key_value.second == id)
7884  return key_value.first;
7885  return std::string("");
7886  }
7887  };
7888 
7889 
7890  struct condensation_description {
7891  var_set Ivars, Jvars, Qvars; // sets of variables involved in condensation
7892  // Clusters of intercoupled condensed variables and subdiagonally coupled
7893  // primary variables for each cluster
7894  std::vector<std::set<size_type>> Qclusters, Jclusters;
7895  // Each element of Qclusters contains a group of intercoupled condensed
7896  // variables. Due to the couplings within each group, all variables of the
7897  // same group need to be condensed out simultaneously. Per definition two
7898  // clusters cannot share a common variable.
7899  // indexing of groups
7900  std::vector<size_type> cluster_of_Qvar;
7901  // Matrices of pointers to submatrices for all coupling terms
7902  gmm::dense_matrix<base_tensor *> KQQ, // diagonal
7903  KQJ, KQJpr, // subdiagonal
7904  KIQ, // superdiagonal
7905  KIJ; // outcome
7906  std::vector<base_tensor *> RI, // res. vector of coupled primary variables
7907  RQpr; // partial solution for condensed variables (initially stores residuals)
7908  };
7909 
7910  void ga_compile(ga_workspace &workspace,
7911  ga_instruction_set &gis, size_type order, bool condensation) {
7912  gis.transformations.clear();
7913  gis.all_instructions.clear();
7914  gis.unreduced_terms.clear();
7915  workspace.clear_temporary_variable_intervals();
7916 
7917  std::map<const ga_instruction_set::region_mim, condensation_description>
7918  condensations;
7919 
7920  if (condensation && order == 2) {
7921  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7922  const ga_workspace::tree_description &td = workspace.tree_info(i);
7923  if (td.order != 2 && td.order != size_type(-1))
7924  continue;
7925  ga_tree tree(*(td.ptree)); // temporary tree (not used later)
7926  ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7927  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7928  true, false);
7929  pga_tree_node root = tree.root;
7930  if (root) {
7931  const bool
7932  v1_is_intern = workspace.is_internal_variable(root->name_test1),
7933  v2_is_intern = workspace.is_internal_variable(root->name_test2);
7934  if (v1_is_intern || v2_is_intern) {
7935  GMM_ASSERT1(tree.secondary_domain.empty(),
7936  "Condensed variable cannot be used in secondary domain");
7937 
7938  for (const auto &key_val : condensations) {
7939  const ga_instruction_set::region_mim rm0 = key_val.first;
7940  const condensation_description &CC0 = key_val.second;
7941  if (rm0.mim() == td.mim && rm0.region() != td.rg
7942  && (CC0.Qvars.count(root->name_test1) ||
7943  CC0.Qvars.count(root->name_test2))) {
7944  mesh_region intrsct = getfem::mesh_region::intersection
7945  (*(rm0.region()), *(td.rg));
7946  GMM_ASSERT1(intrsct.is_empty(),
7947  "Cannot condense coupled variables between "
7948  "intersecting regions");
7949  }
7950  }
7951  const ga_instruction_set::region_mim rm(td.mim, td.rg, nullptr);
7952 
7953  condensation_description &CC = condensations[rm];
7954  size_type
7955  q1 = v1_is_intern ? CC.Qvars[root->name_test1] : size_type(-1),
7956  q2 = v2_is_intern ? CC.Qvars[root->name_test2] : size_type(-1);
7957  GMM_ASSERT1(q1 != size_type(-1) || q2 != size_type(-1), "Error");
7958  std::vector<size_type> selected_clusters;
7959  for (size_type j=0; j < CC.Qclusters.size(); ++j)
7960  if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7961  selected_clusters.push_back(j);
7962 
7963  if (selected_clusters.empty()) { // create new cluster
7964  CC.Qclusters.push_back(std::set<size_type>());
7965  if (q1 != size_type(-1)) CC.Qclusters.back().insert(q1);
7966  if (q2 != size_type(-1)) CC.Qclusters.back().insert(q2);
7967  } else { // add into existing cluster / merge clusters together
7968  auto &target = CC.Qclusters[selected_clusters[0]];
7969  if (q1 != size_type(-1)) target.insert(q1);
7970  if (q2 != size_type(-1)) target.insert(q2);
7971  for (size_type j=selected_clusters.size()-1; j > 1; --j) {
7972  auto &source = CC.Qclusters[selected_clusters[j]];
7973  target.insert(source.begin(), source.end());
7974  CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7975  }
7976  }
7977  } // is_internal_variable
7978  } // if (root)
7979  } // for (size_type i = 0; i < workspace.nb_trees(); ++i)
7980 
7981  for (auto &key_value : condensations) {
7982  condensation_description &CC = key_value.second;
7983  //for (const auto &cluster : CC.Qclusters) {
7984  // cout << "Clusters of coupled variables:" << endl;
7985  // for (const auto &varid : cluster) cout << "/" << CC.Qvars[varid];
7986  // cout << "/" << endl;
7987  //}
7988  size_type Qsize = CC.Qvars.size();
7989 
7990  // Jclusters will hold all J variables each cluster is coupled to
7991  CC.Jclusters.resize(CC.Qclusters.size());
7992 
7993  CC.cluster_of_Qvar.resize(Qsize);
7994  for (size_type i=0; i < CC.Qclusters.size(); ++i)
7995  for (const size_type &var : CC.Qclusters[i])
7996  CC.cluster_of_Qvar[var] = i;
7997 
7998  // Qvars: all condensed variables
7999  // Qclusters: definition of clusters of intercoupled variables of Qvars
8000  // cluster_of_Qvar: dictionary for which cluster each variable belongs to
8001  CC.KQQ.resize(Qsize, Qsize);
8002  CC.RQpr.resize(Qsize);
8003  for (size_type q=0; q < Qsize; ++q) {
8004  bgeot::multi_index mi(1);
8005  mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
8006  gis.condensation_tensors.push_back // memory allocation
8007  (std::make_shared<base_tensor>(mi));
8008  CC.RQpr[q] = gis.condensation_tensors.back().get();
8009  }
8010  }
8011  } // if (condensation && order == 2)
8012 
8013  std::array<ga_workspace::operation_type,3>
8014  phases{ga_workspace::PRE_ASSIGNMENT,
8015  ga_workspace::ASSEMBLY,
8016  ga_workspace::POST_ASSIGNMENT};
8017  for (const auto &phase : phases) {
8018 
8019  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
8020  const ga_workspace::tree_description &td = workspace.tree_info(i);
8021  if (td.operation != phase)
8022  continue; // skip this tree in this phase
8023 
8024  if (td.order == order || td.order == size_type(-1)) {
8025  std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
8026  ? gis.trees
8027  : gis.interpolation_trees;
8028  trees.push_back(*(td.ptree));
8029  // Semantic analysis mainly to evaluate fixed size variables and data
8030  ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
8031  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
8032  true, false);
8033  pga_tree_node root = trees.back().root;
8034  if (root) {
8035  // Compile tree
8036  // cout << "Will compile "; ga_print_node(root, cout); cout << endl;
8037 
8038  psecondary_domain psd(0);
8039  if (trees.back().secondary_domain.size())
8040  psd = workspace.secondary_domain(trees.back().secondary_domain);
8041  ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
8042  auto &rmi = gis.all_instructions[rm];
8043  rmi.m = td.m;
8044  rmi.im = td.mim;
8045  // rmi.interpolate_infos.clear();
8046  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
8047  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
8048  rmi.current_hierarchy);
8049  // cout << "compilation finished "; ga_print_node(root, cout);
8050  // cout << endl;
8051 
8052  if (phase != ga_workspace::ASSEMBLY) { // Assignment/interpolation
8053  if (!td.varname_interpolation.empty()) {
8054  auto *imd
8055  = workspace.associated_im_data(td.varname_interpolation);
8056  auto &V = const_cast<model_real_plain_vector &>
8057  (workspace.value(td.varname_interpolation));
8058  GMM_ASSERT1(imd, "Internal error");
8059  auto pgai = std::make_shared<ga_instruction_assignment>
8060  (root->tensor(), V, gis.ctx, imd);
8061  rmi.instructions.push_back(std::move(pgai));
8062  }
8063  } else { // Addition of an assembly instruction
8064  pga_instruction pgai;
8065  switch(order) {
8066  case 0: {
8067  workspace.assembled_tensor() = root->tensor();
8068  pgai = std::make_shared<ga_instruction_add_to_coeff>
8069  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
8070  break;
8071  }
8072  case 1: {
8073  GMM_ASSERT1(root->tensor_proper_size() == 1,
8074  "Invalid vector or tensor quantity. An order 1 "
8075  "weak form has to be a scalar quantity");
8076  const mesh_fem * const
8077  mf = workspace.associated_mf(root->name_test1);
8078  const im_data * const
8079  imd = workspace.associated_im_data(root->name_test1);
8080  workspace.add_temporary_interval_for_unreduced_variable
8081  (root->name_test1);
8082 
8083  base_vector &Vu = workspace.unreduced_vector(),
8084  &Vr = workspace.assembled_vector();
8085  if (mf) {
8086  const std::string &intn1 = root->interpolate_name_test1;
8087  bool secondary = !intn1.empty() &&
8088  workspace.secondary_domain_exists(intn1);
8089  fem_interpolation_context
8090  &ctx = intn1.empty() ? gis.ctx
8091  : (secondary ? rmi.secondary_domain_infos.ctx
8092  : rmi.interpolate_infos[intn1].ctx);
8093  bool interpolate =
8094  !(intn1.empty() || intn1 == "neighbor_element"
8095  || intn1 == "neighbour_elt" || secondary);
8096 
8097  if (intn1.size() && !secondary &&
8098  workspace.variable_group_exists(root->name_test1)) {
8099  ga_instruction_set::variable_group_info
8100  &vgi = rmi.interpolate_infos[intn1]
8101  .groups_info[root->name_test1];
8102  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8103  (root->tensor(), Vr, Vu, ctx,
8104  vgi.I, vgi.mf, vgi.reduced_mf,
8105  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8106  for (const std::string &name
8107  : workspace.variable_group(root->name_test1))
8108  gis.unreduced_terms.emplace(name, "");
8109  } else {
8110  base_vector &V = mf->is_reduced() ? Vu : Vr;
8111  const gmm::sub_interval
8112  &I = mf->is_reduced()
8113  ? workspace.temporary_interval_of_variable
8114  (root->name_test1)
8115  : workspace.interval_of_variable(root->name_test1);
8116  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8117  (root->tensor(), V, ctx, I, *mf,
8118  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8119  if (mf->is_reduced())
8120  gis.unreduced_terms.emplace(root->name_test1, "");
8121  }
8122  } else if (imd) {
8123  GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
8124  "Interpolate transformation on integration "
8125  "point variable");
8126  if (!workspace.is_internal_variable(root->name_test1) ||
8127  condensation)
8128  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8129  (root->tensor(), Vr, gis.ctx,
8130  workspace.interval_of_variable(root->name_test1),
8131  *imd, gis.coeff, gis.ipt);
8132  // Variable root->name_test1 can be internal or not
8133  } else {
8134  pgai = std::make_shared<ga_instruction_vector_assembly>
8135  (root->tensor(), Vr,
8136  workspace.interval_of_variable(root->name_test1),
8137  gis.coeff);
8138  }
8139  break;
8140  }
8141  case 2: {
8142  GMM_ASSERT1(root->tensor_proper_size() == 1,
8143  "Invalid vector or tensor quantity. An order 2 "
8144  "weak form has to be a scalar quantity");
8145  const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
8146  *mf2=workspace.associated_mf(root->name_test2);
8147  const im_data
8148  *imd1 = workspace.associated_im_data(root->name_test1),
8149  *imd2 = workspace.associated_im_data(root->name_test2);
8150  const std::string &intn1 = root->interpolate_name_test1,
8151  &intn2 = root->interpolate_name_test2;
8152  bool secondary1 = intn1.size() &&
8153  workspace.secondary_domain_exists(intn1);
8154  bool secondary2 = intn2.size() &&
8155  workspace.secondary_domain_exists(intn2);
8156  fem_interpolation_context
8157  &ctx1 = intn1.empty() ? gis.ctx
8158  : (secondary1 ? rmi.secondary_domain_infos.ctx
8159  : rmi.interpolate_infos[intn1].ctx),
8160  &ctx2 = intn2.empty() ? gis.ctx
8161  : (secondary2 ? rmi.secondary_domain_infos.ctx
8162  : rmi.interpolate_infos[intn2].ctx);
8163  bool interpolate = !(intn1.empty() || intn1 == "neighbor_element"
8164  || intn1 == "neighbour_elt"
8165  || secondary1) ||
8166  !(intn2.empty() || intn2 == "neighbor_element"
8167  || intn2 == "neighbour_elt"
8168  || secondary2);
8169 
8170  workspace.add_temporary_interval_for_unreduced_variable
8171  (root->name_test1);
8172  workspace.add_temporary_interval_for_unreduced_variable
8173  (root->name_test2);
8174 
8175  bool has_var_group1 = (!intn1.empty() && !secondary1 &&
8176  workspace.variable_group_exists
8177  (root->name_test1));
8178  bool has_var_group2 = (!intn2.empty() && !secondary2 &&
8179  workspace.variable_group_exists
8180  (root->name_test2));
8181  bool simple = !interpolate &&
8182  !has_var_group1 && !has_var_group2 &&
8183  mf1 && !(mf1->is_reduced()) &&
8184  mf2 && !(mf2->is_reduced());
8185 
8186  // ga instructions write into one of the following matrices
8187  auto &Krr = workspace.assembled_matrix();
8188  auto &Kru = workspace.col_unreduced_matrix();
8189  auto &Kur = workspace.row_unreduced_matrix();
8190  auto &Kuu = workspace.row_col_unreduced_matrix();
8191 
8192  if (simple) { // --> Krr
8193  const gmm::sub_interval
8194  &I1 = workspace.interval_of_variable(root->name_test1),
8195  &I2 = workspace.interval_of_variable(root->name_test2);
8196  const scalar_type
8197  &alpha1 = workspace.factor_of_variable(root->name_test1),
8198  &alpha2 = workspace.factor_of_variable(root->name_test2);
8199  if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
8200  pgai = std::make_shared
8201  <ga_instruction_matrix_assembly_standard_scalar>
8202  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8203  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8204  else if (root->sparsity() == 10 && root->t.qdim() == 2)
8205  pgai = std::make_shared
8206  <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
8207  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8208  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8209  else if (root->sparsity() == 10 && root->t.qdim() == 3)
8210  pgai = std::make_shared
8211  <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
8212  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8213  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8214  else
8215  pgai = std::make_shared
8216  <ga_instruction_matrix_assembly_standard_vector>
8217  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8218  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8219  } else if (condensation &&
8220  workspace.is_internal_variable(root->name_test1) &&
8221  workspace.is_internal_variable(root->name_test2)) {
8222  // diagonal condensation matrix KQQ
8223  // Only memory allocation, gathering of relevant pointers
8224  // and data summation instructions
8225  GMM_ASSERT1(imd1 && imd2, "Internal error");
8226  GMM_ASSERT1(!interpolate, "Internal error");
8227  size_type s1 = imd1->nb_tensor_elem();
8228  size_type s2 = imd2->nb_tensor_elem();
8229 
8230  condensation_description &CC = condensations[rm];
8231  GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
8232  CC.Qvars.count(root->name_test2) > 0,
8233  "Internal error");
8234  size_type q1 = CC.Qvars[root->name_test1],
8235  q2 = CC.Qvars[root->name_test2];
8236  if (!CC.KQQ(q1,q2)) {
8237  // allocate a new matrix
8238  gis.condensation_tensors.push_back
8239  (std::make_shared<base_tensor>(s1,s2));
8240  CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
8241  pgai = std::make_shared<ga_instruction_copy_vect>
8242  (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
8243  } else {
8244  // addition instruction to the previously allocated matrix
8245  pgai = std::make_shared<ga_instruction_add_to>
8246  (*CC.KQQ(q1,q2), root->tensor());
8247  }
8248  rmi.instructions.push_back(std::move(pgai));
8249  } else if (condensation &&
8250  workspace.is_internal_variable(root->name_test1)) {
8251  // subdiagonal condensation matrix KQJ
8252  // Only memory allocation, gathering of relevant pointers
8253  // and data summation instructions
8254  GMM_ASSERT1(imd1, "Internal error");
8255  GMM_ASSERT1(!interpolate, "Internal error");
8256  size_type s1 = imd1->nb_tensor_elem();
8257 
8258  condensation_description &CC = condensations[rm];
8259  GMM_ASSERT1(CC.Qvars.count(root->name_test1),
8260  "Internal error");
8261  size_type q1 = CC.Qvars[root->name_test1],
8262  j2 = CC.Jvars[root->name_test2];
8263  CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
8264  if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
8265  CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
8266  std::max(CC.KQJ.ncols(), j2+1));
8267  if (!CC.KQJ(q1,j2)) {
8268  // allocate a new matrix. Here we do not know the size as
8269  // it may change dynamically, but for now, just use the
8270  // size of root->tensor()
8271  gis.condensation_tensors.push_back
8272  (std::make_shared<base_tensor>(root->tensor()));
8273  GMM_ASSERT1(root->tensor().size(0) == s1, "Internal error");
8274  CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
8275  pgai = std::make_shared<ga_instruction_copy_vect>
8276  (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
8277  } else {
8278  // an extra matrix for this entry has already been
8279  // allocated, so just add the current tensor to it
8280  pgai = std::make_shared<ga_instruction_add_to>
8281  (*CC.KQJ(q1,j2), root->tensor());
8282  }
8283  rmi.instructions.push_back(std::move(pgai));
8284  } else if (condensation &&
8285  workspace.is_internal_variable(root->name_test2)) {
8286  // superdiagonal condensation matrix KIQ
8287  // Only memory allocation, gathering of relevant pointers
8288  // and data summation instructions
8289  GMM_ASSERT1(imd2, "Internal error");
8290  GMM_ASSERT1(!interpolate, "Internal error");
8291  size_type s2 = imd2->nb_tensor_elem();
8292 
8293  condensation_description &CC = condensations[rm];
8294  GMM_ASSERT1(CC.Qvars.count(root->name_test2),
8295  "Internal error");
8296  size_type i1 = CC.Ivars[root->name_test1],
8297  q2 = CC.Qvars[root->name_test2];
8298  if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
8299  CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
8300  std::max(CC.KIQ.ncols(), q2+1));
8301  if (!CC.KIQ(i1,q2)) {
8302  // allocate a new matrix. Here we do not know the size as
8303  // it may change dynamically, but for now, just use the
8304  // size of root->tensor()
8305  gis.condensation_tensors.push_back
8306  (std::make_shared<base_tensor>(root->tensor()));
8307  GMM_ASSERT1(root->tensor().size(1) == s2,
8308  "Internal error");
8309  CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
8310  pgai = std::make_shared<ga_instruction_copy_vect>
8311  (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
8312  } else {
8313  // an extra matrix for this entry has already been
8314  // allocated, so just add the current tensor to it
8315  pgai = std::make_shared<ga_instruction_add_to>
8316  (*CC.KIQ(i1,q2), root->tensor());
8317  }
8318  rmi.instructions.push_back(std::move(pgai));
8319  } else if (!workspace.is_internal_variable(root->name_test1) &&
8320  !workspace.is_internal_variable(root->name_test2)) {
8321 
8322  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
8323  || has_var_group1 || has_var_group2)
8324  gis.unreduced_terms.emplace(root->name_test1,
8325  root->name_test2);
8326 
8327  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8328  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8329  auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
8330  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8331  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8332 
8333  const scalar_type
8334  &alpha1 = workspace.factor_of_variable(root->name_test1),
8335  &alpha2 = workspace.factor_of_variable(root->name_test2);
8336 
8337  if (has_var_group1) {
8338  ga_instruction_set::variable_group_info
8339  &vgi1 = rmi.interpolate_infos[intn1]
8340  .groups_info[root->name_test1];
8341  if (has_var_group2) {
8342  ga_instruction_set::variable_group_info
8343  &vgi2 = rmi.interpolate_infos[intn2]
8344  .groups_info[root->name_test2];
8345  pgai = std::make_shared
8346  <ga_instruction_matrix_assembly_mf_mf>
8347  (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
8348  vgi1, vgi2,
8349  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8350  } else {
8351  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8352  ? workspace.temporary_interval_of_variable
8353  (root->name_test2)
8354  : workspace.interval_of_variable(root->name_test2);
8355  if (mf2)
8356  pgai = std::make_shared
8357  <ga_instruction_matrix_assembly_mf_mf>
8358  (root->tensor(), Krx, Kux, ctx1, ctx2,
8359  vgi1, I2, *mf2, alpha2,
8360  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8361  else // for global variable imd2 == 0
8362  pgai = std::make_shared
8363  <ga_instruction_matrix_assembly_mf_imd>
8364  (root->tensor(), Krr, Kur, ctx1, ctx2,
8365  vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
8366  }
8367  } else { // !has_var_group1
8368  const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
8369  ? workspace.temporary_interval_of_variable
8370  (root->name_test1)
8371  : workspace.interval_of_variable(root->name_test1);
8372  if (has_var_group2) {
8373  ga_instruction_set::variable_group_info
8374  &vgi2 = rmi.interpolate_infos[intn2]
8375  .groups_info[root->name_test2];
8376  if (mf1)
8377  pgai = std::make_shared
8378  <ga_instruction_matrix_assembly_mf_mf>
8379  (root->tensor(), Kxr, Kxu, ctx1, ctx2,
8380  I1, *mf1, alpha1, vgi2,
8381  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8382  else // for global variable imd1 == 0
8383  pgai = std::make_shared
8384  <ga_instruction_matrix_assembly_imd_mf>
8385  (root->tensor(), Krr, Kru, ctx1, ctx2,
8386  I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
8387  } else { // !has_var_group2
8388  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8389  ? workspace.temporary_interval_of_variable
8390  (root->name_test2)
8391  : workspace.interval_of_variable(root->name_test2);
8392  if (mf1 && mf2)
8393  pgai = std::make_shared
8394  <ga_instruction_matrix_assembly_mf_mf>
8395  (root->tensor(), Kxx, ctx1, ctx2,
8396  I1, *mf1, alpha1, I2, *mf2, alpha2,
8397  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8398  else if (mf1) // for global variable imd2 == 0
8399  pgai = std::make_shared
8400  <ga_instruction_matrix_assembly_mf_imd>
8401  (root->tensor(), Kxr, ctx1, ctx2,
8402  I1, *mf1, alpha1, I2, imd2, alpha2,
8403  gis.coeff, gis.ipt);
8404  else if (mf2)
8405  pgai = std::make_shared
8406  <ga_instruction_matrix_assembly_imd_mf>
8407  (root->tensor(), Krx, ctx1, ctx2,
8408  I1, imd1, alpha1, I2, *mf2, alpha2,
8409  gis.coeff, gis.ipt);
8410  else
8411  pgai = std::make_shared
8412  <ga_instruction_matrix_assembly_imd_imd>
8413  (root->tensor(), Krr, ctx1, ctx2,
8414  I1, imd1, alpha1, I2, imd2, alpha2,
8415  gis.coeff, gis.ipt);
8416  }
8417  }
8418  } // if (!simple)
8419  break;
8420  } // case 2
8421  } // switch(order)
8422  if (pgai)
8423  rmi.instructions.push_back(std::move(pgai));
8424  }
8425  } // if (root)
8426  } // if (td.order == order || td.order == size_type(-1))
8427  } // for (const ga_workspace::tree_description &td : trees_of_current_phase)
8428 
8429  if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
8430 
8431  auto &Krr = workspace.assembled_matrix();
8432  auto &Kru = workspace.col_unreduced_matrix();
8433  auto &Kur = workspace.row_unreduced_matrix();
8434  auto &Kuu = workspace.row_col_unreduced_matrix();
8435 
8436  for (auto &&key_val : condensations) {
8437  const ga_instruction_set::region_mim rm = key_val.first;
8438  condensation_description &CC = key_val.second;
8439  auto &rmi = gis.all_instructions[rm];
8440 
8441  CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
8442  for (size_type k=0; k < CC.KQJpr.size(); ++k) {
8443  gis.condensation_tensors.push_back // memory allocation
8444  (std::make_shared<base_tensor>(2,2));
8445  CC.KQJpr[k] = gis.condensation_tensors.back().get();
8446  }
8447 
8448  pga_instruction pgai;
8449 
8450  // Add one diagonal/subdiagonal condensation instruction per cluster
8451  for (size_type k=0; k < CC.Qclusters.size(); ++k) {
8452  // extract condensed variables residuals from
8453  // workspace.assembled_vector() into RQpr
8454  for (size_type q1 : CC.Qclusters[k]) {
8455  std::string name_test1 = CC.Qvars[q1];
8456  const im_data *imd1 = workspace.associated_im_data(name_test1);
8457  const gmm::sub_interval
8458  &I1 = workspace.interval_of_variable(name_test1);
8459  pgai =
8460  std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
8461  (*(CC.RQpr[q1]), workspace.cached_vector(), // cached_V --> CC.RQpr[q1]
8462  gis.ctx, I1, *imd1, gis.ipt);
8463  rmi.instructions.push_back(std::move(pgai));
8464  }
8465 
8466  // the exec() of this instruction calculates KQJpr including any
8467  // necessary size update to match the sizes of KQJ, upon size change
8468  // of primary variables J
8469  pgai = std::make_shared<ga_instruction_condensation_sub>
8470  (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff); // factor_of_variable()?
8471  rmi.instructions.push_back(std::move(pgai));
8472 
8473  // assemble/store KQJpr/RQpr matrices/vectors into the
8474  // corresponding global matrix/vector
8475  for (size_type q1 : CC.Qclusters[k]) {
8476  std::string name_test1 = CC.Qvars[q1];
8477  const im_data *imd1 = workspace.associated_im_data(name_test1);
8478 // const scalar_type
8479 // &alpha1 = workspace.factor_of_variable(name_test1); // TODO
8480  const gmm::sub_interval
8481  &I1 = workspace.interval_of_variable(name_test1);
8482  GMM_ASSERT1(imd1, "Internal error");
8483  for (size_type j2 : CC.Jclusters[k]) {
8484  std::string name_test2 = CC.Jvars[j2];
8485  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8486  const im_data *imd2 = workspace.associated_im_data(name_test2);
8487 // const std::string &intn2 = root->interpolate_name_test2;
8488 // GMM_ASSERT1(intn2.empty(), "Coupling of internal variables "
8489 // "with interpolated variables not "
8490 // "implemented yet");
8491 // const scalar_type
8492 // &alpha2 = workspace.factor_of_variable(name_test2); // TODO
8493  const gmm::sub_interval
8494  &I2 = mf2 && mf2->is_reduced()
8495  ? workspace.temporary_interval_of_variable(name_test2)
8496  : workspace.interval_of_variable(name_test2);
8497  const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2)); // <- input
8498  model_real_sparse_matrix
8499  &KQJpr = mf2 && mf2->is_reduced()
8500  ? workspace.col_unreduced_matrix()
8501  : workspace.internal_coupling_matrix(); // <- output
8502  if (mf2) {
8503  pgai =
8504  std::make_shared<ga_instruction_matrix_assembly_imd_mf>
8505  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8506  I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
8507  // TODO: name_test2 variable group
8508  if (mf2->is_reduced())
8509  gis.unreduced_terms.emplace(name_test1, name_test2);
8510  } else // for global variable imd2 == 0
8511  pgai =
8512  std::make_shared<ga_instruction_matrix_assembly_imd_imd>
8513  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8514  I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
8515  rmi.instructions.push_back(std::move(pgai));
8516  } // for j2
8517  const bool initialize = true;
8518  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8519  (*(CC.RQpr[q1]), workspace.assembled_vector(), // <- overwriting internal variables residual with internal solution
8520  gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize); // without gis.coeff
8521  rmi.instructions.push_back(std::move(pgai));
8522  } // for q1
8523  }
8524 
8525  // Add superdiagonal condensation instructions
8526  for (size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
8527 
8528  std::string name_test1 = CC.Ivars[i1];
8529  const mesh_fem *mf1 = workspace.associated_mf(name_test1); // TODO: name_test1 variable group
8530  const im_data *imd1 = workspace.associated_im_data(name_test1);
8531  const scalar_type
8532  &alpha1 = workspace.factor_of_variable(name_test1);
8533  const gmm::sub_interval
8534  &I1 = mf1 && mf1->is_reduced()
8535  ? workspace.temporary_interval_of_variable(name_test1)
8536  : workspace.interval_of_variable(name_test1);
8537 
8538  // Q_of_J[j2] will hold all condensed variables q that couple
8539  // variable i1 to each variable j2
8540  std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8541  for (size_type q=0; q < CC.Qvars.size(); ++q)
8542  if (CC.KIQ(i1,q)) {
8543  size_type cid = CC.cluster_of_Qvar[q];
8544  for (size_type j : CC.Jclusters[cid])
8545  Q_of_J[j].insert(q);
8546  }
8547 
8548  for (size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8549  if (Q_of_J[j2].size()) { // a coupling between i1 and j2 exists
8550  std::vector<base_tensor *> Ki1Q, KQj2;
8551  for (size_type q : Q_of_J[j2]) {
8552  Ki1Q.push_back(CC.KIQ(i1,q));
8553  KQj2.push_back(CC.KQJpr(q,j2));
8554  }
8555  // allocate a tensor for storing the coupling between i1 and j2
8556  gis.condensation_tensors.push_back
8557  (std::make_shared<base_tensor>());
8558  base_tensor &Kij = *gis.condensation_tensors.back();
8559  pgai = std::make_shared<ga_instruction_condensation_super_K>
8560  (Kij, Ki1Q, KQj2);
8561  rmi.instructions.push_back(std::move(pgai));
8562  // add assembly instruction
8563  std::string name_test2 = CC.Jvars[j2];
8564  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8565  const im_data *imd2 = workspace.associated_im_data(name_test2);
8566  // Here assuming interpolate_name_test1.empty() &&
8567  // interpolate_name_test2.empty() &&
8568  // !(secondary1 || secondary2) && !interpolate;
8569  const scalar_type
8570  &alpha2 = workspace.factor_of_variable(name_test2);
8571  const gmm::sub_interval
8572  &I2 = mf2 && mf2->is_reduced()
8573  ? workspace.temporary_interval_of_variable(name_test2)
8574  : workspace.interval_of_variable(name_test2);
8575 
8576  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8577  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8578  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8579  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8580 
8581  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8582  gis.unreduced_terms.emplace(name_test1, name_test2);
8583 
8584  if (mf1 && mf2) // TODO: name_test1 or name_test2 variable group
8585  pgai = std::make_shared
8586  <ga_instruction_matrix_assembly_mf_mf>
8587  (Kij, Kxx, gis.ctx, gis.ctx,
8588  I1, *mf1, alpha1, I2, *mf2, alpha2,
8589  gis.coeff, gis.nbpt, gis.ipt, false);
8590  else if (mf1) // for global variable imd2 == 0
8591  pgai = std::make_shared
8592  <ga_instruction_matrix_assembly_mf_imd>
8593  (Kij, Kxr, gis.ctx, gis.ctx,
8594  I1, *mf1, alpha1, I2, imd2, alpha2,
8595  gis.coeff, gis.ipt);
8596  else if (mf2)
8597  pgai = std::make_shared
8598  <ga_instruction_matrix_assembly_imd_mf>
8599  (Kij, Krx, gis.ctx, gis.ctx,
8600  I1, imd1, alpha1, I2, *mf2, alpha2,
8601  gis.coeff, gis.ipt);
8602  else
8603  pgai = std::make_shared
8604  <ga_instruction_matrix_assembly_imd_imd>
8605  (Kij, Krr, gis.ctx, gis.ctx,
8606  I1, imd1, alpha1, I2, imd2, alpha2,
8607  gis.coeff, gis.ipt);
8608  rmi.instructions.push_back(std::move(pgai));
8609  } // if (Q_of_J[j2].size())
8610  } // for j2
8611 
8612  // RHS condensation instructions
8613  std::vector<base_tensor *> Ki1Q, RQpr;
8614  for (size_type q=0; q < CC.Qvars.size(); ++q)
8615  if (CC.KIQ(i1,q)) {
8616  Ki1Q.push_back(CC.KIQ(i1,q));
8617  RQpr.push_back(CC.RQpr[q]);
8618  }
8619  gis.condensation_tensors.push_back
8620  (std::make_shared<base_tensor>());
8621  base_tensor &Ri = *gis.condensation_tensors.back();
8622  pgai = std::make_shared<ga_instruction_condensation_super_R>
8623  (Ri, Ki1Q, RQpr);
8624  rmi.instructions.push_back(std::move(pgai));
8625 
8626  base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8627  : workspace.assembled_vector();
8628  if (mf1)
8629  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8630  (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt, false);
8631  else if (imd1)
8632  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8633  (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8634  else
8635  pgai = std::make_shared<ga_instruction_vector_assembly>
8636  (Ri, R, I1, gis.coeff);
8637  rmi.instructions.push_back(std::move(pgai));
8638  } // for i1
8639  } // for (const auto &key_val : condensations)
8640  } // if (phase == ga_workspace::ASSEMBLY)
8641  } // for (const auto &phase : phases)
8642 
8643  } // ga_compile(...)
8644 
8645 
8646 
8647  //=========================================================================
8648  // Execution of a compiled set of assembly terms
8649  //=========================================================================
8650 
8651 
8652  void ga_function_exec(ga_instruction_set &gis) {
8653 
8654  for (auto &&instr : gis.all_instructions) {
8655  const auto &gil = instr.second.instructions;
8656  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8657  }
8658  }
8659 
8660  void ga_interpolation_exec(ga_instruction_set &gis,
8661  ga_workspace &workspace,
8662  ga_interpolation_context &gic) {
8663  base_matrix G;
8664  base_small_vector un, up;
8665 
8666  for (const std::string &t : gis.transformations)
8667  workspace.interpolate_transformation(t)->init(workspace);
8668 
8669  for (auto &&instr : gis.all_instructions) {
8670 
8671  const getfem::mesh_im &mim = *(instr.first.mim());
8672  const mesh_region &region = *(instr.first.region());
8673  const getfem::mesh &m = *(instr.second.m);
8674  GMM_ASSERT1(&m == &(gic.linked_mesh()),
8675  "Incompatibility of meshes in interpolation");
8676  const auto &gilb = instr.second.begin_instructions;
8677  const auto &gile = instr.second.elt_instructions;
8678  const auto &gil = instr.second.instructions;
8679 
8680  // iteration on elements (or faces of elements)
8681  std::vector<size_type> ind;
8682  auto pai_old = papprox_integration{};
8683  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8684  if (gic.use_mim()) {
8685  if (!mim.convex_index().is_in(v.cv())) continue;
8686  gis.pai = mim.int_method_of_element(v.cv())->approx_method();
8687  } else
8688  gis.pai = 0;
8689 
8690  ind.resize(0);
8691  bgeot::pstored_point_tab pspt
8692  = gic.ppoints_for_element(v.cv(), v.f(), ind);
8693 
8694  if (pspt.get() && ind.size() && pspt->size()) {
8695  m.points_of_convex(v.cv(), G);
8696  bgeot::pgeometric_trans pgt = m.trans_of_convex(v.cv());
8697  up.resize(G.nrows());
8698  un.resize(pgt->dim());
8699 
8700  if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8701  gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8702  } else {
8703  if (!(gic.use_pgp(v.cv()))) {
8704  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8705  } else {
8706  gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8707  }
8708  }
8709  pai_old = gis.pai;
8710 
8711  if (gis.need_elt_size)
8712  gis.elt_size = m.convex_radius_estimate(v.cv()) * scalar_type(2);
8713 
8714  // iterations on interpolation points
8715  gis.nbpt = pspt->size();
8716  for (size_type ii = 0; ii < ind.size(); ++ii) {
8717  gis.ipt = ii;
8718  if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8719  else gis.ctx.set_xref((*pspt)[gis.ipt]);
8720 
8721  if (ii == 0 || !(pgt->is_linear())) {
8722  // Computation of unit normal vector in case of a boundary
8723  if (v.f() != short_type(-1)) {
8724  const base_matrix& B = gis.ctx.B();
8725  gmm::copy(pgt->normals()[v.f()], un);
8726  gmm::mult(B, un, up);
8727  scalar_type nup = gmm::vect_norm2(up);
8728  gmm::scale(up,1.0/nup);
8729  gmm::clean(up, 1e-13);
8730  gis.Normal = up;
8731  } else gis.Normal.resize(0);
8732  }
8733  gmm::clear(workspace.assembled_tensor().as_vector());
8734  if (ii == 0) {
8735  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8736  for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8737  }
8738  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8739  gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8740  }
8741  }
8742  }
8743  }
8744  for (const std::string &t : gis.transformations)
8745  workspace.interpolate_transformation(t)->finalize();
8746 
8747  gic.finalize();
8748  }
8749 
8750  void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8751  base_matrix G1, G2;
8752  base_small_vector un;
8753  scalar_type J1(0), J2(0);
8754 
8755  for (const std::string &t : gis.transformations)
8756  workspace.interpolate_transformation(t)->init(workspace);
8757 
8758  for (auto &instr : gis.all_instructions) {
8759  const getfem::mesh_im &mim = *(instr.first.mim());
8760  psecondary_domain psd = instr.first.psd();
8761  const getfem::mesh &m = *(instr.second.m);
8762  GMM_ASSERT1(&m == &(mim.linked_mesh()), "Incompatibility of meshes");
8763  const auto &gilb = instr.second.begin_instructions;
8764  const auto &gile = instr.second.elt_instructions;
8765  const auto &gil = instr.second.instructions;
8766 
8767  // if (gilb.size()) cout << "Begin instructions\n";
8768  // for (size_type j = 0; j < gilb.size(); ++j)
8769  // cout << typeid(*(gilb[j])).name() << endl;
8770  // if (gile.size()) cout << "\nElement instructions\n";
8771  // for (size_type j = 0; j < gile.size(); ++j)
8772  // cout << typeid(*(gile[j])).name() << endl;
8773  // cout << "\nGauss pt instructions\n";
8774  // for (size_type j = 0; j < gil.size(); ++j)
8775  // cout << typeid(*(gil[j])).name() << endl;
8776 
8777  if (!psd) { // standard integration on a single domain
8778 
8779  const mesh_region &region = *(instr.first.region());
8780 
8781  // iteration on elements (or faces of elements)
8782  size_type old_cv = size_type(-1);
8783  bgeot::pgeometric_trans pgt = 0, pgt_old = 0;
8784  pintegration_method pim = 0;
8785  papprox_integration pai = 0;
8786  bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8787  bgeot::pgeotrans_precomp pgp = 0;
8788  bool first_gp = true;
8789  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8790  if (mim.convex_index().is_in(v.cv())) {
8791  // cout << "proceed with elt " << v.cv() << " face " << v.f()<<endl;
8792  if (v.cv() != old_cv) {
8793  pgt = m.trans_of_convex(v.cv());
8794  pim = mim.int_method_of_element(v.cv());
8795  m.points_of_convex(v.cv(), G1);
8796 
8797  if (pim->type() == IM_NONE) continue;
8798  GMM_ASSERT1(pim->type() == IM_APPROX, "Sorry, exact methods "
8799  "cannot be used in high level generic assembly");
8800  pai = pim->approx_method();
8801  pspt = pai->pintegration_points();
8802  if (pspt->size()) {
8803  if (pgp && gis.pai == pai && pgt_old == pgt) {
8804  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8805  } else {
8806  if (pai->is_built_on_the_fly()) {
8807  gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8808  pgp = 0;
8809  } else {
8810  pgp = gis.gp_pool(pgt, pspt);
8811  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8812  }
8813  pgt_old = pgt; gis.pai = pai;
8814  }
8815  if (gis.need_elt_size)
8816  gis.elt_size = convex_radius_estimate(pgt, G1)*scalar_type(2);
8817  }
8818  old_cv = v.cv();
8819  } else {
8820  if (pim->type() == IM_NONE) continue;
8821  gis.ctx.set_face_num(v.f());
8822  }
8823  if (pspt != old_pspt) { first_gp = true; old_pspt = pspt; }
8824  if (pspt->size()) {
8825  // Iterations on Gauss points
8826  size_type first_ind = 0;
8827  if (v.f() != short_type(-1)) {
8828  gis.nbpt = pai->nb_points_on_face(v.f());
8829  first_ind = pai->ind_first_point_on_face(v.f());
8830  } else {
8831  gis.nbpt = pai->nb_points_on_convex();
8832  }
8833  for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8834  if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8835  else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8836  if (gis.ipt == 0 || !(pgt->is_linear())) {
8837  J1 = gis.ctx.J();
8838  // Computation of unit normal vector in case of a boundary
8839  if (v.f() != short_type(-1)) {
8840  gis.Normal.resize(G1.nrows());
8841  un.resize(pgt->dim());
8842  gmm::copy(pgt->normals()[v.f()], un);
8843  gmm::mult(gis.ctx.B(), un, gis.Normal);
8844  scalar_type nup = gmm::vect_norm2(gis.Normal);
8845  J1 *= nup;
8846  gmm::scale(gis.Normal, 1.0/nup);
8847  gmm::clean(gis.Normal, 1e-13);
8848  } else gis.Normal.resize(0);
8849  }
8850  auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8851  gis.coeff = J1 * ipt_coeff;
8852  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8853  workspace.include_empty_int_points());
8854  if (!enable_ipt) gis.coeff = scalar_type(0);
8855  if (first_gp) {
8856  for (size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8857  first_gp = false;
8858  }
8859  if (gis.ipt == 0) {
8860  for (size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8861  }
8862  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8863  for (size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8864  }
8865  GA_DEBUG_INFO("");
8866  }
8867  }
8868  }
8869  }
8870  GA_DEBUG_INFO("-----------------------------");
8871 
8872  } else { // Integration on the product of two domains (secondary domain)
8873 
8874  auto &sdi = instr.second.secondary_domain_infos;
8875  const mesh_region &region1 = *(instr.first.region());
8876 
8877  // iteration on elements (or faces of elements)
8878  size_type old_cv1=size_type(-1), old_cv2=size_type(-1);
8879  size_type nbpt1 = 0, nbpt2 = 0;
8880  bgeot::pgeometric_trans pgt1 = 0, pgt1_old = 0, pgt2 = 0, pgt2_old = 0;
8881  pintegration_method pim1 = 0, pim2 = 0;
8882  papprox_integration pai1 = 0, pai2 = 0;
8883  bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8884  bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8885  bool first_gp = true;
8886  for (getfem::mr_visitor v1(region1, m, true); !v1.finished(); ++v1) {
8887  if (mim.convex_index().is_in(v1.cv())) {
8888  // cout << "proceed with elt " << v1.cv()<<" face " << v1.f()<<endl;
8889  if (v1.cv() != old_cv1) {
8890  pgt1 = m.trans_of_convex(v1.cv());
8891  pim1 = mim.int_method_of_element(v1.cv());
8892  m.points_of_convex(v1.cv(), G1);
8893 
8894  if (pim1->type() == IM_NONE) continue;
8895  GMM_ASSERT1(pim1->type() == IM_APPROX, "Sorry, exact methods "
8896  "cannot be used in high level generic assembly");
8897  pai1 = pim1->approx_method();
8898  pspt1 = pai1->pintegration_points();
8899  if (pspt1->size()) {
8900  if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8901  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8902  } else {
8903  if (pai1->is_built_on_the_fly()) {
8904  gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8905  pgp1 = 0;
8906  } else {
8907  pgp1 = gis.gp_pool(pgt1, pspt1);
8908  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8909  }
8910  pgt1_old = pgt1; gis.pai = pai1;
8911  }
8912  if (gis.need_elt_size)
8913  gis.elt_size = convex_radius_estimate(pgt1,G1)*scalar_type(2);
8914  }
8915  old_cv1 = v1.cv();
8916  } else {
8917  if (pim1->type() == IM_NONE) continue;
8918  gis.ctx.set_face_num(v1.f());
8919  }
8920  if (pspt1 != old_pspt1) { first_gp = true; old_pspt1 = pspt1; }
8921  if (pspt1->size()) {
8922  // iterations on Gauss points
8923  size_type first_ind1 = 0;
8924  if (v1.f() != short_type(-1)) {
8925  nbpt1 = pai1->nb_points_on_face(v1.f());
8926  first_ind1 = pai1->ind_first_point_on_face(v1.f());
8927  } else {
8928  nbpt1 = pai1->nb_points_on_convex();
8929  }
8930 
8931  const mesh &m2 = psd->mim().linked_mesh();
8932  const mesh_region &region2 = psd->give_region(m, v1.cv(), v1.f());
8933  for (getfem::mr_visitor v2(region2, m2, true);
8934  !v2.finished(); ++v2) {
8935  if (v2.cv() != old_cv2) {
8936  pgt2 = m2.trans_of_convex(v2.cv());
8937  pim2 = psd->mim().int_method_of_element(v2.cv());
8938  m2.points_of_convex(v2.cv(), G2);
8939 
8940  if (pim2->type() == IM_NONE) continue;
8941  GMM_ASSERT1(pim2->type() == IM_APPROX, "Sorry, exact methods "
8942  "cannot be used in high level generic assembly");
8943  pai2 = pim2->approx_method();
8944  pspt2 = pai2->pintegration_points();
8945  if (pspt2->size()) {
8946  if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8947  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8948  } else {
8949  if (pai2->is_built_on_the_fly()) {
8950  sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8951  pgp2 = 0;
8952  } else {
8953  pgp2 = gis.gp_pool(pgt2, pspt2);
8954  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8955  }
8956  pgt2_old = pgt2; sdi.pai = pai2;
8957  }
8958  }
8959  old_cv2 = v2.cv();
8960  } else {
8961  if (pim2->type() == IM_NONE) continue;
8962  sdi.ctx.set_face_num(v2.f());
8963  }
8964  if (pspt2 != old_pspt2) { first_gp = true; old_pspt2 = pspt2; }
8965  if (pspt2->size()) {
8966  // iterations on Gauss points
8967  size_type first_ind2 = 0;
8968  if (v2.f() != short_type(-1)) {
8969  nbpt2 = pai2->nb_points_on_face(v2.f());
8970  first_ind2 = pai2->ind_first_point_on_face(v2.f());
8971  } else {
8972  nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8973  }
8974  gis.nbpt = nbpt1 * nbpt2;
8975  gis.ipt = 0;
8976  for (size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8977  for (size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8978 
8979  if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8980  else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8981  if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8982  else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8983 
8984  if (gis.ipt == 0 || !(pgt1->is_linear())) {
8985  J1 = gis.ctx.J();
8986  if (v1.f() != short_type(-1)) {
8987  gis.Normal.resize(G1.nrows());
8988  un.resize(pgt1->dim());
8989  gmm::copy(pgt1->normals()[v1.f()], un);
8990  gmm::mult(gis.ctx.B(), un, gis.Normal);
8991  scalar_type nup = gmm::vect_norm2(gis.Normal);
8992  J1 *= nup;
8993  gmm::scale(gis.Normal, 1.0/nup);
8994  gmm::clean(gis.Normal, 1e-13);
8995  } else gis.Normal.resize(0);
8996  }
8997 
8998  if (gis.ipt == 0 || !(pgt2->is_linear())) {
8999  J2 = sdi.ctx.J();
9000  if (v2.f() != short_type(-1)) {
9001  sdi.Normal.resize(G2.nrows());
9002  un.resize(pgt2->dim());
9003  gmm::copy(pgt2->normals()[v2.f()], un);
9004  gmm::mult(sdi.ctx.B(), un, sdi.Normal);
9005  scalar_type nup = gmm::vect_norm2(sdi.Normal);
9006  J2 *= nup;
9007  gmm::scale(sdi.Normal, 1.0/nup);
9008  gmm::clean(sdi.Normal, 1e-13);
9009  } else sdi.Normal.resize(0);
9010  }
9011 
9012  auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
9013  * pai2->coeff(first_ind2+ipt2);
9014  gis.coeff = J1 * J2 * ipt_coeff;
9015  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
9016  workspace.include_empty_int_points());
9017  if (!enable_ipt) gis.coeff = scalar_type(0);
9018 
9019  if (first_gp) {
9020  for (size_type j=0; j < gilb.size(); ++j)
9021  j+=gilb[j]->exec();
9022  first_gp = false;
9023  }
9024  if (gis.ipt == 0) {
9025  for (size_type j=0; j < gile.size(); ++j)
9026  j+=gile[j]->exec();
9027  }
9028  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
9029  for (size_type j=0; j < gil.size(); ++j)
9030  j+=gil[j]->exec();
9031  }
9032  GA_DEBUG_INFO("");
9033  }
9034  }
9035  }
9036  }
9037  }
9038  }
9039  }
9040  GA_DEBUG_INFO("-----------------------------");
9041  }
9042 
9043  }
9044 
9045  for (const std::string &t : gis.transformations)
9046  workspace.interpolate_transformation(t)->finalize();
9047  }
9048 
9049 
9050 } /* end of namespace */
does the inversion of the geometric transformation for a given convex
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
static T & instance()
Instance from the current thread.
Describe an integration method linked to a mesh.
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
"iterator" class for regions.
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:98
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
Definition: getfem_mesh.cc:460
sparse vector built upon std::vector.
Definition: gmm_vector.h:995
Semantic analysis of assembly trees and semantic manipulations.
Compilation and execution operations.
a subclass of mesh_im which is conformal to a number of level sets.
void copy(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:976
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
Definition: gmm_blas.h:556
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
Definition: gmm_blas.h:692
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:58
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:209
void clean(L &l, double threshold)
Clean a vector or matrix (replace near-zero entries with zeroes).
void mult(const L1 &l1, const L2 &l2, L3 &l3)
*‍/
Definition: gmm_blas.h:1663
void add(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:1275
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
Definition: getfem_mesh.cc:797
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
Definition: getfem_fem.h:243
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:72
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
Definition: bgeot_poly.cc:46
GEneric Tool for Finite Element Methods.
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
Point tab storage.