10#ifndef XTENSOR_ASSIGN_HPP
11#define XTENSOR_ASSIGN_HPP
18#include <xtl/xcomplex.hpp>
19#include <xtl/xsequence.hpp>
21#include "xexpression.hpp"
22#include "xfunction.hpp"
23#include "xiterator.hpp"
24#include "xstrides.hpp"
25#include "xtensor_config.hpp"
26#include "xtensor_forward.hpp"
29#if defined(XTENSOR_USE_TBB)
40 template <
class E1,
class E2>
41 void assign_data(xexpression<E1>& e1,
const xexpression<E2>& e2,
bool trivial);
43 template <
class E1,
class E2>
44 void assign_xexpression(xexpression<E1>& e1,
const xexpression<E2>& e2);
46 template <
class E1,
class E2>
47 void computed_assign(xexpression<E1>& e1,
const xexpression<E2>& e2);
49 template <
class E1,
class E2,
class F>
50 void scalar_computed_assign(xexpression<E1>& e1,
const E2& e2, F&& f);
52 template <
class E1,
class E2>
53 void assert_compatible_shape(
const xexpression<E1>& e1,
const xexpression<E2>& e2);
55 template <
class E1,
class E2>
56 void strided_assign(E1& e1,
const E2& e2, std::false_type );
58 template <
class E1,
class E2>
59 void strided_assign(E1& e1,
const E2& e2, std::true_type );
73 template <
class E1,
class E2>
84 template <
class E1,
class E2>
85 static void assign_xexpression(
E1&
e1,
const E2&
e2);
87 template <
class E1,
class E2>
90 template <
class E1,
class E2,
class F>
93 template <
class E1,
class E2>
98 template <
class E1,
class E2>
99 static bool resize(
E1&
e1,
const E2&
e2);
101 template <
class E1,
class F,
class...
CT>
109 template <
class E1,
class E2, layout_type L>
114 using lhs_iterator =
typename E1::stepper;
115 using rhs_iterator =
typename E2::const_stepper;
116 using shape_type =
typename E1::shape_type;
118 using size_type =
typename lhs_iterator::size_type;
119 using difference_type =
typename lhs_iterator::difference_type;
125 void step(size_type
i);
126 void step(size_type
i, size_type
n);
127 void reset(size_type
i);
145 template <
bool simd_assign>
150 template <
class E1,
class E2>
151 static void run(
E1&
e1,
const E2&
e2);
159 template <
class E1,
class E2>
160 static void run(
E1&
e1,
const E2&
e2);
164 template <
class E1,
class E2>
165 static void run_impl(
E1&
e1,
const E2&
e2, std::true_type);
167 template <
class E1,
class E2>
168 static void run_impl(
E1&
e1,
const E2&
e2, std::false_type);
175 namespace strided_assign_detail
179 bool can_do_strided_assign;
181 std::size_t inner_loop_size;
182 std::size_t outer_loop_size;
184 std::size_t dimension;
195 template <
class E1,
class E2>
197 template <
class E1,
class E2>
199 template <
class E1,
class E2>
200 static void run(
E1&
e1,
const E2&
e2);
207 template <
class E1,
class E2>
214 template <
class E1,
class E2>
215 inline void assign_xexpression(xexpression<E1>& e1,
const xexpression<E2>& e2)
217 xtl::mpl::static_if<has_assign_to<E1, E2>::value>(
220 self(e2).derived_cast().assign_to(e1);
225 using tag = xexpression_tag_t<E1, E2>;
226 xexpression_assigner<tag>::assign_xexpression(e1, e2);
231 template <
class E1,
class E2>
232 inline void computed_assign(xexpression<E1>& e1,
const xexpression<E2>& e2)
234 using tag = xexpression_tag_t<E1, E2>;
235 xexpression_assigner<tag>::computed_assign(e1, e2);
238 template <
class E1,
class E2,
class F>
239 inline void scalar_computed_assign(xexpression<E1>& e1,
const E2& e2, F&& f)
241 using tag = xexpression_tag_t<E1, E2>;
242 xexpression_assigner<tag>::scalar_computed_assign(e1, e2, std::forward<F>(f));
245 template <
class E1,
class E2>
246 inline void assert_compatible_shape(
const xexpression<E1>& e1,
const xexpression<E2>& e2)
248 using tag = xexpression_tag_t<E1, E2>;
249 xexpression_assigner<tag>::assert_compatible_shape(e1, e2);
258 template <
class E1,
class E2>
259 constexpr bool linear_static_layout()
265 select_layout<E1::static_layout, typename E1::shape_type>::value,
266 select_layout<E2::static_layout, typename E2::shape_type>::value
271 template <
class E1,
class E2>
272 inline auto is_linear_assign(
const E1& e1,
const E2& e2)
273 -> std::enable_if_t<has_strides<E1>::value,
bool>
275 return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout<E1, E2>())
276 || (e1.is_contiguous() && e2.has_linear_assign(e1.strides()));
279 template <
class E1,
class E2>
280 inline auto is_linear_assign(
const E1&,
const E2&) -> std::enable_if_t<!has_strides<E1>::value,
bool>
285 template <
class E1,
class E2>
286 inline bool linear_dynamic_layout(
const E1& e1,
const E2& e2)
288 return e1.is_contiguous() && e2.is_contiguous()
292 template <
class E,
class =
void>
293 struct has_step_leading : std::false_type
298 struct has_step_leading<E, void_t<decltype(std::declval<E>().step_leading())>> : std::true_type
303 struct use_strided_loop
305 static constexpr bool stepper_deref()
307 return std::is_reference<typename T::stepper::reference>::value;
310 static constexpr bool value = has_strides<T>::value
311 && has_step_leading<typename T::stepper>::value && stepper_deref();
315 struct use_strided_loop<xscalar<T>>
317 static constexpr bool value =
true;
320 template <
class F,
class... CT>
321 struct use_strided_loop<xfunction<F, CT...>>
342 template <
class T1,
class T2>
343 struct conditional_promote_to_complex
345 static constexpr bool cond = xtl::is_gen_complex<T1>::value && !xtl::is_gen_complex<T2>::value;
347 using type = std::conditional_t<cond, T1, T2>;
350 template <
class T1,
class T2>
351 using conditional_promote_to_complex_t =
typename conditional_promote_to_complex<T1, T2>::type;
354 template <
class E1,
class E2>
359 using e1_value_type =
typename E1::value_type;
360 using e2_value_type =
typename E2::value_type;
363 using is_bool = std::is_same<T, bool>;
365 static constexpr bool is_bool_conversion()
367 return is_bool<e2_value_type>::value && !is_bool<e1_value_type>::value;
370 static constexpr bool contiguous_layout()
372 return E1::contiguous_layout && E2::contiguous_layout;
375 static constexpr bool convertible_types()
377 return std::is_convertible<e2_value_type, e1_value_type>::value && !is_bool_conversion();
380 static constexpr bool use_xsimd()
386 static constexpr bool simd_size_impl()
391 static constexpr bool simd_size()
396 static constexpr bool simd_interface()
407 static constexpr bool simd_assign()
409 return convertible_types() && simd_size() && simd_interface();
412 static constexpr bool linear_assign(
const E1&
e1,
const E2&
e2,
bool trivial)
414 return trivial && detail::is_linear_assign(
e1,
e2);
417 static constexpr bool strided_assign()
419 return detail::use_strided_loop<E1>::value && detail::use_strided_loop<E2>::value;
422 static constexpr bool simd_linear_assign()
424 return contiguous_layout() && simd_assign();
427 static constexpr bool simd_strided_assign()
429 return strided_assign() && simd_assign();
432 static constexpr bool simd_linear_assign(
const E1&
e1,
const E2&
e2)
434 return simd_assign() && detail::linear_dynamic_layout(
e1,
e2);
437 using e2_requested_value_type = std::
438 conditional_t<is_bool<e2_value_type>::value,
typename E2::bool_load_type, e2_value_type>;
439 using requested_value_type = detail::conditional_promote_to_complex_t<e1_value_type, e2_requested_value_type>;
442 template <
class E1,
class E2>
450 const E2&
de2 =
e2.derived_cast();
453 bool linear_assign = traits::linear_assign(
de1,
de2,
trivial);
454 constexpr bool simd_assign = traits::simd_assign();
455 constexpr bool simd_linear_assign = traits::simd_linear_assign();
456 constexpr bool simd_strided_assign = traits::simd_strided_assign();
459 if (simd_linear_assign || traits::simd_linear_assign(
de1,
de2))
470 linear_assigner<false>::run(de1, de2);
473 else if (simd_strided_assign)
475 strided_loop_assigner<simd_strided_assign>::run(de1, de2);
479 stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(de1, de2).run();
484 template <
class E1,
class E2>
485 inline void xexpression_assigner<Tag>::assign_xexpression(E1& e1,
const E2& e2)
487 bool trivial_broadcast = resize(e1.derived_cast(), e2.derived_cast());
488 base_type::assign_data(e1, e2, trivial_broadcast);
492 template <
class E1,
class E2>
493 inline void xexpression_assigner<Tag>::computed_assign(xexpression<E1>& e1,
const xexpression<E2>& e2)
495 using shape_type =
typename E1::shape_type;
496 using comperator_type = std::greater<typename shape_type::value_type>;
498 using size_type =
typename E1::size_type;
500 E1& de1 = e1.derived_cast();
501 const E2& de2 = e2.derived_cast();
503 size_type dim2 = de2.dimension();
504 shape_type shape = uninitialized_shape<shape_type>(dim2);
506 bool trivial_broadcast = de2.broadcast_shape(shape,
true);
508 auto&& de1_shape = de1.shape();
509 if (dim2 > de1.dimension()
510 || std::lexicographical_compare(
518 typename E1::temporary_type tmp(shape);
519 base_type::assign_data(tmp, e2, trivial_broadcast);
520 de1.assign_temporary(std::move(tmp));
524 base_type::assign_data(e1, e2, trivial_broadcast);
529 template <
class E1,
class E2,
class F>
530 inline void xexpression_assigner<Tag>::scalar_computed_assign(xexpression<E1>& e1,
const E2& e2, F&& f)
532 E1& d = e1.derived_cast();
533 using size_type =
typename E1::size_type;
534 auto dst = d.storage().begin();
535 for (size_type i = d.size(); i > 0; --i)
543 template <
class E1,
class E2>
545 xexpression_assigner<Tag>::assert_compatible_shape(
const xexpression<E1>& e1,
const xexpression<E2>& e2)
547 const E1& de1 = e1.derived_cast();
548 const E2& de2 = e2.derived_cast();
549 if (!broadcastable(de2.shape(), de1.shape()))
551 throw_broadcast_error(de2.shape(), de1.shape());
557 template <
bool B,
class... CT>
558 struct static_trivial_broadcast;
560 template <
class... CT>
561 struct static_trivial_broadcast<true, CT...>
563 static constexpr bool value = detail::promote_index<typename std::decay_t<CT>::shape_type...>::value;
566 template <
class... CT>
567 struct static_trivial_broadcast<false, CT...>
569 static constexpr bool value =
false;
574 template <
class E1,
class E2>
575 inline bool xexpression_assigner<Tag>::resize(E1& e1,
const E2& e2)
580 e1.resize(e2.shape());
585 template <
class E1,
class F,
class... CT>
586 inline bool xexpression_assigner<Tag>::resize(E1& e1,
const xfunction<F, CT...>& e2)
588 return xtl::mpl::static_if<detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value>(
596 e1.resize(
typename xfunction<F, CT...>::shape_type{});
597 return detail::static_trivial_broadcast<
598 detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value,
604 using index_type = xindex_type_t<typename E1::shape_type>;
605 using size_type =
typename E1::size_type;
606 size_type size = e2.dimension();
607 index_type shape = uninitialized_shape<index_type>(size);
608 bool trivial_broadcast = e2.broadcast_shape(shape,
true);
609 e1.resize(std::move(shape));
610 return trivial_broadcast;
619 template <
class FROM,
class TO>
622 using argument_type = std::decay_t<FROM>;
623 using result_type = std::decay_t<TO>;
625 static const bool value = xtl::is_arithmetic<result_type>::value
626 && (
sizeof(result_type) <
sizeof(argument_type)
627 || (xtl::is_integral<result_type>::value
628 && std::is_floating_point<argument_type>::value));
631 template <
class FROM,
class TO>
634 using argument_type = std::decay_t<FROM>;
635 using result_type = std::decay_t<TO>;
637 static const bool value = xtl::is_signed<argument_type>::value != xtl::is_signed<result_type>::value;
640 template <
class FROM,
class TO>
643 using argument_type = std::decay_t<FROM>;
644 using result_type = std::decay_t<TO>;
650 template <
class E1,
class E2, layout_type L>
653 , m_lhs(
e1.stepper_begin(
e1.shape()))
654 , m_rhs(
e2.stepper_begin(
e1.shape()))
655 , m_index(xtl::
make_sequence<index_type>(
e1.shape().size(), size_type(0)))
659 template <
class E1,
class E2, layout_type L>
660 inline void stepper_assigner<E1, E2, L>::run()
662 using tmp_size_type =
typename E1::size_type;
663 using argument_type = std::decay_t<
decltype(*m_rhs)>;
664 using result_type = std::decay_t<
decltype(*m_lhs)>;
665 constexpr bool needs_cast = has_assign_conversion<argument_type, result_type>::value;
667 tmp_size_type s = m_e1.size();
668 for (tmp_size_type i = 0; i < s; ++i)
670 *m_lhs = conditional_cast<needs_cast, result_type>(*m_rhs);
671 stepper_tools<L>::increment_stepper(*
this, m_index, m_e1.shape());
675 template <
class E1,
class E2, layout_type L>
676 inline void stepper_assigner<E1, E2, L>::step(size_type i)
682 template <
class E1,
class E2, layout_type L>
683 inline void stepper_assigner<E1, E2, L>::step(size_type i, size_type n)
689 template <
class E1,
class E2, layout_type L>
690 inline void stepper_assigner<E1, E2, L>::reset(size_type i)
696 template <
class E1,
class E2, layout_type L>
697 inline void stepper_assigner<E1, E2, L>::to_end(
layout_type l)
707 template <
bool simd_assign>
708 template <
class E1,
class E2>
709 inline void linear_assigner<simd_assign>::run(E1& e1,
const E2& e2)
711 using lhs_align_mode = xt_simd::container_alignment_t<E1>;
712 constexpr bool is_aligned = std::is_same<lhs_align_mode, aligned_mode>::value;
713 using rhs_align_mode = std::conditional_t<is_aligned, inner_aligned_mode, unaligned_mode>;
714 using e1_value_type =
typename E1::value_type;
715 using e2_value_type =
typename E2::value_type;
716 using value_type =
typename xassign_traits<E1, E2>::requested_value_type;
717 using simd_type = xt_simd::simd_type<value_type>;
718 using size_type =
typename E1::size_type;
719 size_type size = e1.size();
720 constexpr size_type simd_size = simd_type::size;
721 constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
723 size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size);
724 size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
726 for (size_type i = 0; i < align_begin; ++i)
728 e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
731#if defined(XTENSOR_USE_TBB)
732 if (size >= XTENSOR_TBB_THRESHOLD)
734 tbb::static_partitioner ap;
741 e1.template store_simd<lhs_align_mode>(
743 e2.template load_simd<rhs_align_mode, value_type>(i)
751 for (size_type i = align_begin; i < align_end; i += simd_size)
753 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
756#elif defined(XTENSOR_USE_OPENMP)
757 if (size >= size_type(XTENSOR_OPENMP_TRESHOLD))
759#pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2)
761 for (size_type i = align_begin; i < align_end; i += simd_size)
763 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
766 for (
auto i =
static_cast<std::ptrdiff_t
>(align_begin); i < static_cast<std::ptrdiff_t>(align_end);
767 i +=
static_cast<std::ptrdiff_t
>(simd_size))
769 size_type ui =
static_cast<size_type
>(i);
770 e1.template store_simd<lhs_align_mode>(ui, e2.template load_simd<rhs_align_mode, value_type>(ui));
776 for (size_type i = align_begin; i < align_end; i += simd_size)
778 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
782 for (size_type i = align_begin; i < align_end; i += simd_size)
784 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
787 for (size_type i = align_end; i < size; ++i)
789 e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
793 template <
class E1,
class E2>
794 inline void linear_assigner<false>::run(E1& e1,
const E2& e2)
796 using is_convertible = std::
797 is_convertible<typename std::decay_t<E2>::value_type,
typename std::decay_t<E1>::value_type>;
801 run_impl(e1, e2, is_convertible());
804 template <
class E1,
class E2>
805 inline void linear_assigner<false>::run_impl(E1& e1,
const E2& e2, std::true_type )
807 using value_type =
typename E1::value_type;
808 using size_type =
typename E1::size_type;
809 auto src = linear_begin(e2);
810 auto dst = linear_begin(e1);
811 size_type n = e1.size();
812#if defined(XTENSOR_USE_TBB)
813 tbb::static_partitioner sp;
816 static_cast<std::ptrdiff_t
>(n),
817 [&](std::ptrdiff_t i)
819 *(dst + i) =
static_cast<value_type
>(*(src + i));
823#elif defined(XTENSOR_USE_OPENMP)
824 if (n >= XTENSOR_OPENMP_TRESHOLD)
826#pragma omp parallel for default(none) shared(src, dst, n)
827 for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast<std::ptrdiff_t>(n); i++)
829 *(dst + i) =
static_cast<value_type
>(*(src + i));
834 for (; n > size_type(0); --n)
836 *dst =
static_cast<value_type
>(*src);
842 for (; n > size_type(0); --n)
844 *dst =
static_cast<value_type
>(*src);
851 template <
class E1,
class E2>
852 inline void linear_assigner<false>::run_impl(E1&,
const E2&, std::false_type )
854 XTENSOR_PRECONDITION(
false,
"Internal error: linear_assigner called with unrelated types.");
861 namespace strided_assign_detail
863 template <layout_type layout>
893 using size_type =
typename T::size_type;
915 using size_type =
typename T::size_type;
938 using size_type =
typename T::size_type;
957 template <layout_type L,
class S>
960 using strides_type =
S;
968 template <
class T, layout_type LE = L>
969 std::enable_if_t<LE == layout_type::row_major, std::size_t> operator()(
const T&
el)
980 template <
class T, layout_type LE = L>
981 std::enable_if_t<LE == layout_type::column_major, std::size_t> operator()(
const T&
el)
998 template <
class F,
class...
CT>
1001 xt::for_each(*
this,
xf.arguments());
1008 const strides_type& m_strides;
1014 return {
false,
true, 1,
e1.size(),
e1.dimension(),
e1.dimension()};
1017 template <bool possible = true, class E1, class E2, std::enable_if_t<has_strides<E1>::value && possible,
bool> =
true>
1018 loop_sizes_t get_loop_sizes(
const E1& e1,
const E2& e2)
1020 using shape_value_type =
typename E1::shape_type::value_type;
1021 bool is_row_major =
true;
1025 is_row_major =
true;
1026 auto is_zero = [](
auto i)
1030 auto&&
strides = e1.strides();
1031 auto it_bwd = std::find_if_not(
strides.rbegin(),
strides.rend(), is_zero);
1032 bool de1_row_contiguous = it_bwd !=
strides.rend() && *it_bwd == 1;
1033 auto it_fwd = std::find_if_not(
strides.begin(),
strides.end(), is_zero);
1034 bool de1_col_contiguous = it_fwd !=
strides.end() && *it_fwd == 1;
1035 if (de1_row_contiguous)
1037 is_row_major =
true;
1039 else if (de1_col_contiguous)
1041 is_row_major =
false;
1046 return {
false,
true, 1, e1.size(), e1.dimension(), e1.dimension()};
1050 std::size_t cut = 0;
1057 if (cut < e1.strides().size() - 1)
1060 cut = e1.strides().size() - 1;
1063 else if (!is_row_major)
1075 std::size_t outer_loop_size =
static_cast<std::size_t
>(std::accumulate(
1077 e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut),
1078 shape_value_type(1),
1079 std::multiplies<shape_value_type>{}
1081 std::size_t inner_loop_size =
static_cast<std::size_t
>(std::accumulate(
1082 e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut),
1084 shape_value_type(1),
1085 std::multiplies<shape_value_type>{}
1090 std::swap(outer_loop_size, inner_loop_size);
1093 return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()};
1097 template <
bool simd>
1098 template <
class E1,
class E2>
1099 inline strided_assign_detail::loop_sizes_t strided_loop_assigner<simd>::get_loop_sizes(E1& e1,
const E2& e2)
1101 return strided_assign_detail::get_loop_sizes<simd>(e1, e2);
1104#define strided_parallel_assign
1106 template <
bool simd>
1107 template <
class E1,
class E2>
1108 inline void strided_loop_assigner<simd>::run(E1& e1,
const E2& e2,
const loop_sizes_t& loop_sizes)
1110 bool is_row_major = loop_sizes.is_row_major;
1111 std::size_t inner_loop_size = loop_sizes.inner_loop_size;
1112 std::size_t outer_loop_size = loop_sizes.outer_loop_size;
1113 std::size_t cut = loop_sizes.cut;
1117 dynamic_shape<std::size_t> idx, max_shape;
1121 xt::resize_container(idx, cut);
1122 max_shape.assign(e1.shape().begin(), e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut));
1126 xt::resize_container(idx, e1.shape().size() - cut);
1127 max_shape.assign(e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut), e1.shape().end());
1132 using e1_value_type =
typename E1::value_type;
1133 using e2_value_type =
typename E2::value_type;
1134 constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
1135 using value_type =
typename xassign_traits<E1, E2>::requested_value_type;
1136 using simd_type = std::conditional_t<
1137 std::is_same<e1_value_type, bool>::value,
1138 xt_simd::simd_bool_type<value_type>,
1139 xt_simd::simd_type<value_type>>;
1141 std::size_t simd_size = inner_loop_size / simd_type::size;
1142 std::size_t simd_rest = inner_loop_size % simd_type::size;
1144 auto fct_stepper = e2.stepper_begin(e1.shape());
1145 auto res_stepper = e1.stepper_begin(e1.shape());
1149 std::size_t step_dim = 0;
1154#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign)
1155 if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
1157 std::size_t first_step =
true;
1158#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx)
1159 for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
1164 ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
1167 for (std::size_t i = 0; i < idx.size(); ++i)
1169 fct_stepper.step(i + step_dim, idx[i]);
1170 res_stepper.step(i + step_dim, idx[i]);
1175 for (std::size_t i = 0; i < simd_size; ++i)
1177 res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
1179 for (std::size_t i = 0; i < simd_rest; ++i)
1181 *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
1182 res_stepper.step_leading();
1183 fct_stepper.step_leading();
1188 ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
1191 fct_stepper.to_begin();
1194 if (!E1::contiguous_layout)
1196 res_stepper.to_begin();
1197 for (std::size_t i = 0; i < idx.size(); ++i)
1199 fct_stepper.step(i + step_dim, idx[i]);
1200 res_stepper.step(i + step_dim, idx[i]);
1205 for (std::size_t i = 0; i < idx.size(); ++i)
1207 fct_stepper.step(i + step_dim, idx[i]);
1214#elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB)
1215 if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size)
1217 tbb::static_partitioner sp;
1219 tbb::blocked_range<size_t>(0ul, outer_loop_size),
1220 [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx](
1221 const tbb::blocked_range<size_t>& r
1225 auto fct_stepper = e2.stepper_begin(e1.shape());
1226 auto res_stepper = e1.stepper_begin(e1.shape());
1227 std::size_t first_step =
true;
1230 for (std::size_t ox = r.begin(); ox < r.end(); ++ox)
1235 ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
1242 for (std::size_t i = 0; i < idx.size(); ++i)
1244 fct_stepper.step(i + step_dim, idx[i]);
1245 res_stepper.step(i + step_dim, idx[i]);
1250 for (std::size_t i = 0; i < simd_size; ++i)
1252 res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
1254 for (std::size_t i = 0; i < simd_rest; ++i)
1256 *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
1257 res_stepper.step_leading();
1258 fct_stepper.step_leading();
1263 ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
1266 fct_stepper.to_begin();
1269 if (!E1::contiguous_layout)
1271 res_stepper.to_begin();
1272 for (std::size_t i = 0; i < idx.size(); ++i)
1274 fct_stepper.step(i + step_dim, idx[i]);
1275 res_stepper.step(i + step_dim, idx[i]);
1280 for (std::size_t i = 0; i < idx.size(); ++i)
1282 fct_stepper.step(i + step_dim, idx[i]);
1294 for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
1296 for (std::size_t i = 0; i < simd_size; ++i)
1298 res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
1300 for (std::size_t i = 0; i < simd_rest; ++i)
1302 *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
1303 res_stepper.step_leading();
1304 fct_stepper.step_leading();
1308 ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
1311 fct_stepper.to_begin();
1314 if (!E1::contiguous_layout)
1316 res_stepper.to_begin();
1317 for (std::size_t i = 0; i < idx.size(); ++i)
1319 fct_stepper.step(i + step_dim, idx[i]);
1320 res_stepper.step(i + step_dim, idx[i]);
1325 for (std::size_t i = 0; i < idx.size(); ++i)
1327 fct_stepper.step(i + step_dim, idx[i]);
1331#if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign)
1337 template <
class E1,
class E2>
1338 inline void strided_loop_assigner<true>::run(E1& e1,
const E2& e2)
1340 strided_assign_detail::loop_sizes_t loop_sizes = strided_loop_assigner<true>::get_loop_sizes(e1, e2);
1341 if (loop_sizes.can_do_strided_assign)
1343 run(e1, e2, loop_sizes);
1348 stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
1353 template <
class E1,
class E2>
1354 inline void strided_loop_assigner<false>::run(E1& ,
const E2& ,
const loop_sizes_t&)
1359 template <
class E1,
class E2>
1360 inline void strided_loop_assigner<false>::run(E1& e1,
const E2& e2)
1363 stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
auto strides(const E &e, stride_type type=stride_type::normal) noexcept
Get strides of an object.
standard mathematical functions for xexpressions
constexpr layout_type compute_layout(Args... args) noexcept
Implementation of the following logical table: