10#ifndef XTENSOR_ASSIGN_HPP
11#define XTENSOR_ASSIGN_HPP
18#include <xtl/xcomplex.hpp>
19#include <xtl/xsequence.hpp>
21#include "../core/xexpression.hpp"
22#include "../core/xfunction.hpp"
23#include "../core/xiterator.hpp"
24#include "../core/xstrides.hpp"
25#include "../core/xtensor_config.hpp"
26#include "../core/xtensor_forward.hpp"
27#include "../utils/xutils.hpp"
29#if defined(XTENSOR_USE_TBB)
40 template <
class E1,
class E2>
43 template <
class E1,
class E2>
46 template <
class E1,
class E2>
49 template <
class E1,
class E2,
class F>
52 template <
class E1,
class E2>
55 template <
class E1,
class E2>
56 void strided_assign(E1& e1,
const E2& e2, std::false_type );
58 template <
class E1,
class E2>
59 void strided_assign(E1& e1,
const E2& e2, std::true_type );
73 template <
class E1,
class E2>
84 template <
class E1,
class E2>
85 static void assign_xexpression(E1& e1,
const E2& e2);
87 template <
class E1,
class E2>
90 template <
class E1,
class E2,
class F>
91 static void scalar_computed_assign(
xexpression<E1>& e1,
const E2& e2, F&& f);
93 template <
class E1,
class E2>
98 template <
class E1,
class E2>
99 static bool resize(E1& e1,
const E2& e2);
101 template <
class E1,
class F,
class... CT>
109 template <
class E1,
class E2, layout_type L>
110 class stepper_assigner
114 using lhs_iterator =
typename E1::stepper;
115 using rhs_iterator =
typename E2::const_stepper;
116 using shape_type =
typename E1::shape_type;
117 using index_type = xindex_type_t<shape_type>;
118 using size_type =
typename lhs_iterator::size_type;
119 using difference_type =
typename lhs_iterator::difference_type;
121 stepper_assigner(E1& e1,
const E2& e2);
125 void step(size_type i);
126 void step(size_type i, size_type n);
127 void reset(size_type i);
145 template <
bool simd_assign>
150 template <
class E1,
class E2>
151 static void run(E1& e1,
const E2& e2);
159 template <
class E1,
class E2>
160 static void run(E1& e1,
const E2& e2);
164 template <
class E1,
class E2>
165 static void run_impl(E1& e1,
const E2& e2, std::true_type);
167 template <
class E1,
class E2>
168 static void run_impl(E1& e1,
const E2& e2, std::false_type);
175 namespace strided_assign_detail
179 bool can_do_strided_assign;
181 std::size_t inner_loop_size;
182 std::size_t outer_loop_size;
184 std::size_t dimension;
195 template <
class E1,
class E2>
196 static void run(E1& e1,
const E2& e2,
const loop_sizes_t& loop_sizes);
197 template <
class E1,
class E2>
198 static loop_sizes_t get_loop_sizes(E1& e1,
const E2& e2);
199 template <
class E1,
class E2>
200 static void run(E1& e1,
const E2& e2);
207 template <
class E1,
class E2>
210 using tag = xexpression_tag_t<E1, E2>;
214 template <
class E1,
class E2>
215 inline void assign_xexpression(xexpression<E1>& e1,
const xexpression<E2>& e2)
217 if constexpr (has_assign_to<E1, E2>::value)
219 e2.derived_cast().assign_to(e1);
223 using tag = xexpression_tag_t<E1, E2>;
224 xexpression_assigner<tag>::assign_xexpression(e1, e2);
228 template <
class E1,
class E2>
231 using tag = xexpression_tag_t<E1, E2>;
232 xexpression_assigner<tag>::computed_assign(e1, e2);
235 template <
class E1,
class E2,
class F>
236 inline void scalar_computed_assign(
xexpression<E1>& e1,
const E2& e2, F&& f)
238 using tag = xexpression_tag_t<E1, E2>;
239 xexpression_assigner<tag>::scalar_computed_assign(e1, e2, std::forward<F>(f));
242 template <
class E1,
class E2>
245 using tag = xexpression_tag_t<E1, E2>;
246 xexpression_assigner<tag>::assert_compatible_shape(e1, e2);
255 template <
class E1,
class E2>
256 constexpr bool linear_static_layout()
262 select_layout<E1::static_layout, typename E1::shape_type>::value,
263 select_layout<E2::static_layout, typename E2::shape_type>::value
268 template <
class E1,
class E2>
269 inline auto is_linear_assign(
const E1& e1,
const E2& e2)
270 -> std::enable_if_t<has_strides<E1>::value,
bool>
272 return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout<E1, E2>())
273 || (e1.is_contiguous() && e2.has_linear_assign(e1.strides()));
276 template <
class E1,
class E2>
277 inline auto is_linear_assign(
const E1&,
const E2&) -> std::enable_if_t<!has_strides<E1>::value,
bool>
282 template <
class E1,
class E2>
283 inline bool linear_dynamic_layout(
const E1& e1,
const E2& e2)
285 return e1.is_contiguous() && e2.is_contiguous()
289 template <
class E,
class =
void>
290 struct has_step_leading : std::false_type
295 struct has_step_leading<E, void_t<decltype(std::declval<E>().step_leading())>> : std::true_type
300 struct use_strided_loop
302 static constexpr bool stepper_deref()
304 return std::is_reference<typename T::stepper::reference>::value;
307 static constexpr bool value = has_strides<T>::value
308 && has_step_leading<typename T::stepper>::value && stepper_deref();
312 struct use_strided_loop<xscalar<T>>
314 static constexpr bool value =
true;
317 template <
class F,
class... CT>
318 struct use_strided_loop<xfunction<F, CT...>>
320 static constexpr bool value = std::conjunction<use_strided_loop<std::decay_t<CT>>...>::value;
339 template <
class T1,
class T2>
340 struct conditional_promote_to_complex
342 static constexpr bool cond = xtl::is_gen_complex<T1>::value && !xtl::is_gen_complex<T2>::value;
344 using type = std::conditional_t<cond, T1, T2>;
347 template <
class T1,
class T2>
348 using conditional_promote_to_complex_t =
typename conditional_promote_to_complex<T1, T2>::type;
351 template <
class E1,
class E2>
356 using e1_value_type =
typename E1::value_type;
357 using e2_value_type =
typename E2::value_type;
360 using is_bool = std::is_same<T, bool>;
362 static constexpr bool is_bool_conversion()
364 return is_bool<e2_value_type>::value && !is_bool<e1_value_type>::value;
367 static constexpr bool contiguous_layout()
369 return E1::contiguous_layout && E2::contiguous_layout;
372 static constexpr bool convertible_types()
374 return std::is_convertible<e2_value_type, e1_value_type>::value && !is_bool_conversion();
377 static constexpr bool use_xsimd()
379 return xt_simd::simd_traits<int8_t>::size > 1;
383 static constexpr bool simd_size_impl()
385 return xt_simd::simd_traits<T>::size > 1 || (is_bool<T>::value && use_xsimd());
388 static constexpr bool simd_size()
390 return simd_size_impl<e1_value_type>() && simd_size_impl<e2_value_type>();
393 static constexpr bool simd_interface()
404 static constexpr bool simd_assign()
406 return convertible_types() && simd_size() && simd_interface();
409 static constexpr bool linear_assign(
const E1& e1,
const E2& e2,
bool trivial)
411 return trivial && detail::is_linear_assign(e1, e2);
414 static constexpr bool strided_assign()
416 return detail::use_strided_loop<E1>::value && detail::use_strided_loop<E2>::value;
419 static constexpr bool simd_linear_assign()
421 return contiguous_layout() && simd_assign();
424 static constexpr bool simd_strided_assign()
426 return strided_assign() && simd_assign();
429 static constexpr bool simd_linear_assign(
const E1& e1,
const E2& e2)
431 return simd_assign() && detail::linear_dynamic_layout(e1, e2);
434 using e2_requested_value_type = std::
435 conditional_t<is_bool<e2_value_type>::value,
typename E2::bool_load_type, e2_value_type>;
436 using requested_value_type = detail::conditional_promote_to_complex_t<e1_value_type, e2_requested_value_type>;
439 template <
class E1,
class E2>
450 bool linear_assign = traits::linear_assign(de1, de2, trivial);
451 constexpr bool simd_assign = traits::simd_assign();
452 constexpr bool simd_linear_assign = traits::simd_linear_assign();
453 constexpr bool simd_strided_assign = traits::simd_strided_assign();
456 if (simd_linear_assign || traits::simd_linear_assign(de1, de2))
463 linear_assigner<simd_assign>::run(de1, de2);
467 linear_assigner<false>::run(de1, de2);
470 else if (simd_strided_assign)
472 strided_loop_assigner<simd_strided_assign>::run(de1, de2);
481 template <
class E1,
class E2>
482 inline void xexpression_assigner<Tag>::assign_xexpression(E1& e1,
const E2& e2)
484 bool trivial_broadcast = resize(e1.derived_cast(), e2.derived_cast());
485 base_type::assign_data(e1, e2, trivial_broadcast);
489 template <
class E1,
class E2>
492 using shape_type =
typename E1::shape_type;
493 using comperator_type = std::greater<typename shape_type::value_type>;
495 using size_type =
typename E1::size_type;
497 E1& de1 = e1.derived_cast();
498 const E2& de2 = e2.derived_cast();
500 size_type dim2 = de2.dimension();
501 shape_type shape = uninitialized_shape<shape_type>(dim2);
503 bool trivial_broadcast = de2.broadcast_shape(shape,
true);
505 auto&& de1_shape = de1.shape();
506 if (dim2 > de1.dimension()
507 || std::lexicographical_compare(
515 typename E1::temporary_type tmp(shape);
516 base_type::assign_data(tmp, e2, trivial_broadcast);
517 de1.assign_temporary(std::move(tmp));
521 base_type::assign_data(e1, e2, trivial_broadcast);
526 template <
class E1,
class E2,
class F>
527 inline void xexpression_assigner<Tag>::scalar_computed_assign(
xexpression<E1>& e1,
const E2& e2, F&& f)
529 E1& d = e1.derived_cast();
530 using size_type =
typename E1::size_type;
531 auto dst = d.storage().begin();
532 for (size_type i = d.size(); i > 0; --i)
540 template <
class E1,
class E2>
544 const E1& de1 = e1.derived_cast();
545 const E2& de2 = e2.derived_cast();
546 if (!broadcastable(de2.shape(), de1.shape()))
548 throw_broadcast_error(de2.shape(), de1.shape());
554 template <
bool B,
class... CT>
555 struct static_trivial_broadcast;
557 template <
class... CT>
558 struct static_trivial_broadcast<true, CT...>
560 static constexpr bool value = detail::promote_index<typename std::decay_t<CT>::shape_type...>::value;
563 template <
class... CT>
564 struct static_trivial_broadcast<false, CT...>
566 static constexpr bool value =
false;
571 template <
class E1,
class E2>
572 inline bool xexpression_assigner<Tag>::resize(E1& e1,
const E2& e2)
577 e1.resize(e2.shape());
582 template <
class E1,
class F,
class... CT>
585 if constexpr (detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value)
592 e1.resize(
typename xfunction<F, CT...>::shape_type{});
593 return detail::static_trivial_broadcast<
594 detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value,
599 using index_type = xindex_type_t<typename E1::shape_type>;
600 using size_type =
typename E1::size_type;
601 size_type size = e2.dimension();
602 index_type shape = uninitialized_shape<index_type>(size);
603 bool trivial_broadcast = e2.broadcast_shape(shape,
true);
604 e1.resize(std::move(shape));
605 return trivial_broadcast;
613 template <
class FROM,
class TO>
616 using argument_type = std::decay_t<FROM>;
617 using result_type = std::decay_t<TO>;
619 static const bool value = xtl::is_arithmetic<result_type>::value
620 && (
sizeof(result_type) <
sizeof(argument_type)
621 || (xtl::is_integral<result_type>::value
622 && std::is_floating_point<argument_type>::value));
625 template <
class FROM,
class TO>
628 using argument_type = std::decay_t<FROM>;
629 using result_type = std::decay_t<TO>;
631 static const bool value = xtl::is_signed<argument_type>::value != xtl::is_signed<result_type>::value;
634 template <
class FROM,
class TO>
637 using argument_type = std::decay_t<FROM>;
638 using result_type = std::decay_t<TO>;
640 static const bool value = is_narrowing_conversion<argument_type, result_type>::value
641 || has_sign_conversion<argument_type, result_type>::value;
644 template <
class E1,
class E2, layout_type L>
645 inline stepper_assigner<E1, E2, L>::stepper_assigner(E1& e1,
const E2& e2)
647 , m_lhs(e1.stepper_begin(e1.shape()))
648 , m_rhs(e2.stepper_begin(e1.shape()))
649 , m_index(xtl::make_sequence<index_type>(e1.shape().size(), size_type(0)))
653 template <
class E1,
class E2, layout_type L>
654 inline void stepper_assigner<E1, E2, L>::run()
656 using tmp_size_type =
typename E1::size_type;
657 using argument_type = std::decay_t<
decltype(*m_rhs)>;
658 using result_type = std::decay_t<
decltype(*m_lhs)>;
659 constexpr bool needs_cast = has_assign_conversion<argument_type, result_type>::value;
661 tmp_size_type s = m_e1.size();
662 for (tmp_size_type i = 0; i < s; ++i)
665 stepper_tools<L>::increment_stepper(*
this, m_index, m_e1.shape());
669 template <
class E1,
class E2, layout_type L>
670 inline void stepper_assigner<E1, E2, L>::step(size_type i)
676 template <
class E1,
class E2, layout_type L>
677 inline void stepper_assigner<E1, E2, L>::step(size_type i, size_type n)
683 template <
class E1,
class E2, layout_type L>
684 inline void stepper_assigner<E1, E2, L>::reset(size_type i)
690 template <
class E1,
class E2, layout_type L>
691 inline void stepper_assigner<E1, E2, L>::to_end(
layout_type l)
701 template <
bool simd_assign>
702 template <
class E1,
class E2>
703 inline void linear_assigner<simd_assign>::run(E1& e1,
const E2& e2)
705 using lhs_align_mode = xt_simd::container_alignment_t<E1>;
706 constexpr bool is_aligned = std::is_same<lhs_align_mode, aligned_mode>::value;
707 using rhs_align_mode = std::conditional_t<is_aligned, inner_aligned_mode, unaligned_mode>;
708 using e1_value_type =
typename E1::value_type;
709 using e2_value_type =
typename E2::value_type;
710 using value_type =
typename xassign_traits<E1, E2>::requested_value_type;
711 using simd_type = xt_simd::simd_type<value_type>;
712 using size_type =
typename E1::size_type;
713 size_type size = e1.size();
714 constexpr size_type simd_size = simd_type::size;
715 constexpr bool needs_cast = has_assign_conversion<e2_value_type, e1_value_type>::value;
717 size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size);
718 size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
720 for (size_type i = 0; i < align_begin; ++i)
725#if defined(XTENSOR_USE_TBB)
726 if (size >= XTENSOR_TBB_THRESHOLD)
728 tbb::static_partitioner ap;
735 e1.template store_simd<lhs_align_mode>(
737 e2.template load_simd<rhs_align_mode, value_type>(i)
745 for (size_type i = align_begin; i < align_end; i += simd_size)
747 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
750#elif defined(XTENSOR_USE_OPENMP)
751 if (size >= size_type(XTENSOR_OPENMP_TRESHOLD))
753#pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2)
755 for (size_type i = align_begin; i < align_end; i += simd_size)
757 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
760 for (
auto i =
static_cast<std::ptrdiff_t
>(align_begin); i < static_cast<std::ptrdiff_t>(align_end);
761 i +=
static_cast<std::ptrdiff_t
>(simd_size))
763 size_type ui =
static_cast<size_type
>(i);
764 e1.template store_simd<lhs_align_mode>(ui, e2.template load_simd<rhs_align_mode, value_type>(ui));
770 for (size_type i = align_begin; i < align_end; i += simd_size)
772 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
776 for (size_type i = align_begin; i < align_end; i += simd_size)
778 e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
781 for (size_type i = align_end; i < size; ++i)
787 template <
class E1,
class E2>
788 inline void linear_assigner<false>::run(E1& e1,
const E2& e2)
790 using is_convertible = std::
791 is_convertible<typename std::decay_t<E2>::value_type,
typename std::decay_t<E1>::value_type>;
795 run_impl(e1, e2, is_convertible());
798 template <
class E1,
class E2>
801 using value_type =
typename E1::value_type;
802 using size_type =
typename E1::size_type;
803 auto src = linear_begin(e2);
804 auto dst = linear_begin(e1);
805 size_type n = e1.size();
806#if defined(XTENSOR_USE_TBB)
807 tbb::static_partitioner sp;
810 static_cast<std::ptrdiff_t
>(n),
811 [&](std::ptrdiff_t i)
813 *(dst + i) =
static_cast<value_type
>(*(src + i));
817#elif defined(XTENSOR_USE_OPENMP)
818 if (n >= XTENSOR_OPENMP_TRESHOLD)
820#pragma omp parallel for default(none) shared(src, dst, n)
821 for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast<std::ptrdiff_t>(n); i++)
823 *(dst + i) =
static_cast<value_type
>(*(src + i));
828 for (; n > size_type(0); --n)
830 *dst =
static_cast<value_type
>(*src);
836 for (; n > size_type(0); --n)
838 *dst =
static_cast<value_type
>(*src);
845 template <
class E1,
class E2>
848 XTENSOR_PRECONDITION(
false,
"Internal error: linear_assigner called with unrelated types.");
855 namespace strided_assign_detail
857 template <layout_type layout>
864 static void next_idx(T& outer_index, T& outer_shape)
866 auto i = outer_index.size();
869 if (outer_index[i - 1] + 1 >= outer_shape[i - 1])
871 outer_index[i - 1] = 0;
875 outer_index[i - 1]++;
882 static void nth_idx(
size_t n, T& outer_index,
const T& outer_shape)
884 dynamic_shape<std::size_t> stride_sizes;
885 xt::resize_container(stride_sizes, outer_shape.size());
887 using size_type =
typename T::size_type;
888 for (size_type i = outer_shape.size(); i > 0; i--)
890 stride_sizes[i - 1] = (i == outer_shape.size()) ? 1 : stride_sizes[i] * outer_shape[i];
894 for (size_type i = 0; i < outer_shape.size(); i++)
896 auto d_idx = n / stride_sizes[i];
897 outer_index[i] = d_idx;
898 n -= d_idx * stride_sizes[i];
907 static void next_idx(T& outer_index, T& outer_shape)
909 using size_type =
typename T::size_type;
911 auto sz = outer_index.size();
914 if (outer_index[i] + 1 >= outer_shape[i])
927 static void nth_idx(
size_t n, T& outer_index,
const T& outer_shape)
929 dynamic_shape<std::size_t> stride_sizes;
930 xt::resize_container(stride_sizes, outer_shape.size());
932 using size_type =
typename T::size_type;
935 for (size_type i = 0; i < outer_shape.size(); i++)
937 stride_sizes[i] = (i == 0) ? 1 : stride_sizes[i - 1] * outer_shape[i - 1];
941 for (size_type i = outer_shape.size(); i > 0;)
944 auto d_idx = n / stride_sizes[i];
945 outer_index[i] = d_idx;
946 n -= d_idx * stride_sizes[i];
951 template <layout_type L,
class S>
952 struct check_strides_functor
954 using strides_type = S;
956 check_strides_functor(
const S&
strides)
962 template <
class T, layout_type LE = L>
963 std::enable_if_t<LE == layout_type::row_major, std::size_t> operator()(
const T& el)
974 template <
class T, layout_type LE = L>
975 std::enable_if_t<LE == layout_type::column_major, std::size_t> operator()(
const T& el)
992 template <
class F,
class... CT>
995 xt::for_each(*
this, xf.arguments());
1002 const strides_type& m_strides;
1005 template <bool possible = true, class E1, class E2, std::enable_if_t<!has_strides<E1>::value || !possible,
bool> =
true>
1008 return {
false,
true, 1, e1.size(), e1.dimension(), e1.dimension()};
1011 template <bool possible = true, class E1, class E2, std::enable_if_t<has_strides<E1>::value && possible,
bool> =
true>
1012 loop_sizes_t get_loop_sizes(
const E1& e1,
const E2& e2)
1014 using shape_value_type =
typename E1::shape_type::value_type;
1015 bool is_row_major =
true;
1019 is_row_major =
true;
1020 auto is_zero = [](
auto i)
1024 auto&&
strides = e1.strides();
1025 auto it_bwd = std::find_if_not(
strides.rbegin(),
strides.rend(), is_zero);
1026 bool de1_row_contiguous = it_bwd !=
strides.rend() && *it_bwd == 1;
1027 auto it_fwd = std::find_if_not(
strides.begin(),
strides.end(), is_zero);
1028 bool de1_col_contiguous = it_fwd !=
strides.end() && *it_fwd == 1;
1029 if (de1_row_contiguous)
1031 is_row_major =
true;
1033 else if (de1_col_contiguous)
1035 is_row_major =
false;
1040 return {
false,
true, 1, e1.size(), e1.dimension(), e1.dimension()};
1044 std::size_t cut = 0;
1051 if (cut < e1.strides().size() - 1)
1054 cut = e1.strides().size() - 1;
1057 else if (!is_row_major)
1069 std::size_t outer_loop_size =
static_cast<std::size_t
>(std::accumulate(
1071 e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut),
1072 shape_value_type(1),
1073 std::multiplies<shape_value_type>{}
1075 std::size_t inner_loop_size =
static_cast<std::size_t
>(std::accumulate(
1076 e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut),
1078 shape_value_type(1),
1079 std::multiplies<shape_value_type>{}
1084 std::swap(outer_loop_size, inner_loop_size);
1087 return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()};
1091 template <
bool simd>
1092 template <
class E1,
class E2>
1095 return strided_assign_detail::get_loop_sizes<simd>(e1, e2);
1098#define strided_parallel_assign
1100 template <
bool simd>
1101 template <
class E1,
class E2>
1102 inline void strided_loop_assigner<simd>::run(E1& e1,
const E2& e2,
const loop_sizes_t& loop_sizes)
1104 bool is_row_major = loop_sizes.is_row_major;
1105 std::size_t inner_loop_size = loop_sizes.inner_loop_size;
1106 std::size_t outer_loop_size = loop_sizes.outer_loop_size;
1107 std::size_t cut = loop_sizes.cut;
1111 dynamic_shape<std::size_t> idx, max_shape;
1115 xt::resize_container(idx, cut);
1116 max_shape.assign(e1.shape().begin(), e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut));
1120 xt::resize_container(idx, e1.shape().size() - cut);
1121 max_shape.assign(e1.shape().begin() +
static_cast<std::ptrdiff_t
>(cut), e1.shape().end());
1126 using e1_value_type =
typename E1::value_type;
1127 using e2_value_type =
typename E2::value_type;
1128 constexpr bool needs_cast = has_assign_conversion<e2_value_type, e1_value_type>::value;
1129 using value_type =
typename xassign_traits<E1, E2>::requested_value_type;
1130 using simd_type = std::conditional_t<
1131 std::is_same<e1_value_type, bool>::value,
1132 xt_simd::simd_bool_type<value_type>,
1133 xt_simd::simd_type<value_type>>;
1135 std::size_t simd_size = inner_loop_size / simd_type::size;
1136 std::size_t simd_rest = inner_loop_size % simd_type::size;
1138 auto fct_stepper = e2.stepper_begin(e1.shape());
1139 auto res_stepper = e1.stepper_begin(e1.shape());
1143 std::size_t step_dim = 0;
1148#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign)
1149 if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
1151 std::size_t first_step =
true;
1152#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx)
1153 for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
1161 for (std::size_t i = 0; i < idx.size(); ++i)
1163 fct_stepper.step(i + step_dim, idx[i]);
1164 res_stepper.step(i + step_dim, idx[i]);
1169 for (std::size_t i = 0; i < simd_size; ++i)
1171 res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
1173 for (std::size_t i = 0; i < simd_rest; ++i)
1176 res_stepper.step_leading();
1177 fct_stepper.step_leading();
1185 fct_stepper.to_begin();
1188 if (!E1::contiguous_layout)
1190 res_stepper.to_begin();
1191 for (std::size_t i = 0; i < idx.size(); ++i)
1193 fct_stepper.step(i + step_dim, idx[i]);
1194 res_stepper.step(i + step_dim, idx[i]);
1199 for (std::size_t i = 0; i < idx.size(); ++i)
1201 fct_stepper.step(i + step_dim, idx[i]);
1208#elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB)
1209 if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size)
1211 tbb::static_partitioner sp;
1213 tbb::blocked_range<size_t>(0ul, outer_loop_size),
1214 [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx](
1215 const tbb::blocked_range<size_t>& r
1219 auto fct_stepper = e2.stepper_begin(e1.shape());
1220 auto res_stepper = e1.stepper_begin(e1.shape());
1221 std::size_t first_step =
true;
1224 for (std::size_t ox = r.begin(); ox < r.end(); ++ox)
1236 for (std::size_t i = 0; i < idx.size(); ++i)
1238 fct_stepper.step(i + step_dim, idx[i]);
1239 res_stepper.step(i + step_dim, idx[i]);
1244 for (std::size_t i = 0; i < simd_size; ++i)
1246 res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
1248 for (std::size_t i = 0; i < simd_rest; ++i)
1251 res_stepper.step_leading();
1252 fct_stepper.step_leading();
1260 fct_stepper.to_begin();
1263 if (!E1::contiguous_layout)
1265 res_stepper.to_begin();
1266 for (std::size_t i = 0; i < idx.size(); ++i)
1268 fct_stepper.step(i + step_dim, idx[i]);
1269 res_stepper.step(i + step_dim, idx[i]);
1274 for (std::size_t i = 0; i < idx.size(); ++i)
1276 fct_stepper.step(i + step_dim, idx[i]);
1288 for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
1290 for (std::size_t i = 0; i < simd_size; ++i)
1292 res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
1294 for (std::size_t i = 0; i < simd_rest; ++i)
1297 res_stepper.step_leading();
1298 fct_stepper.step_leading();
1305 fct_stepper.to_begin();
1308 if (!E1::contiguous_layout)
1310 res_stepper.to_begin();
1311 for (std::size_t i = 0; i < idx.size(); ++i)
1313 fct_stepper.step(i + step_dim, idx[i]);
1314 res_stepper.step(i + step_dim, idx[i]);
1319 for (std::size_t i = 0; i < idx.size(); ++i)
1321 fct_stepper.step(i + step_dim, idx[i]);
1325#if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign)
1331 template <
class E1,
class E2>
1332 inline void strided_loop_assigner<true>::run(E1& e1,
const E2& e2)
1335 if (loop_sizes.can_do_strided_assign)
1337 run(e1, e2, loop_sizes);
1347 template <
class E1,
class E2>
1348 inline void strided_loop_assigner<false>::run(E1& ,
const E2& ,
const loop_sizes_t&)
1353 template <
class E1,
class E2>
1354 inline void strided_loop_assigner<false>::run(E1& e1,
const E2& e2)
Base class for xexpressions.
derived_type & derived_cast() &noexcept
Returns a reference to the actual derived type of the xexpression.
Multidimensional function operating on xtensor expressions.
auto strides(const E &e, stride_type type=stride_type::normal) noexcept
Get strides of an object.
standard mathematical functions for xexpressions
constexpr layout_type compute_layout(Args... args) noexcept
Implementation of the following logical table:
auto conditional_cast(U &&u)
Perform a type cast when a condition is true.