10#ifndef XTENSOR_ASSIGN_HPP 
   11#define XTENSOR_ASSIGN_HPP 
   18#include <xtl/xcomplex.hpp> 
   19#include <xtl/xsequence.hpp> 
   21#include "../core/xexpression.hpp" 
   22#include "../core/xfunction.hpp" 
   23#include "../core/xiterator.hpp" 
   24#include "../core/xstrides.hpp" 
   25#include "../core/xtensor_config.hpp" 
   26#include "../core/xtensor_forward.hpp" 
   27#include "../utils/xutils.hpp" 
   29#if defined(XTENSOR_USE_TBB) 
   40    template <
class E1, 
class E2>
 
   43    template <
class E1, 
class E2>
 
   46    template <
class E1, 
class E2>
 
   49    template <
class E1, 
class E2, 
class F>
 
   52    template <
class E1, 
class E2>
 
   55    template <
class E1, 
class E2>
 
   56    void strided_assign(E1& e1, 
const E2& e2, std::false_type );
 
   58    template <
class E1, 
class E2>
 
   59    void strided_assign(E1& e1, 
const E2& e2, std::true_type );
 
   73        template <
class E1, 
class E2>
 
 
   84        template <
class E1, 
class E2>
 
   85        static void assign_xexpression(E1& e1, 
const E2& e2);
 
   87        template <
class E1, 
class E2>
 
   90        template <
class E1, 
class E2, 
class F>
 
   91        static void scalar_computed_assign(
xexpression<E1>& e1, 
const E2& e2, F&& f);
 
   93        template <
class E1, 
class E2>
 
   98        template <
class E1, 
class E2>
 
   99        static bool resize(E1& e1, 
const E2& e2);
 
  101        template <
class E1, 
class F, 
class... CT>
 
 
  109    template <
class E1, 
class E2, layout_type L>
 
  110    class stepper_assigner
 
  114        using lhs_iterator = 
typename E1::stepper;
 
  115        using rhs_iterator = 
typename E2::const_stepper;
 
  116        using shape_type = 
typename E1::shape_type;
 
  117        using index_type = xindex_type_t<shape_type>;
 
  118        using size_type = 
typename lhs_iterator::size_type;
 
  119        using difference_type = 
typename lhs_iterator::difference_type;
 
  121        stepper_assigner(E1& e1, 
const E2& e2);
 
  125        void step(size_type i);
 
  126        void step(size_type i, size_type n);
 
  127        void reset(size_type i);
 
 
  145    template <
bool simd_assign>
 
  150        template <
class E1, 
class E2>
 
  151        static void run(E1& e1, 
const E2& e2);
 
 
  159        template <
class E1, 
class E2>
 
  160        static void run(E1& e1, 
const E2& e2);
 
  164        template <
class E1, 
class E2>
 
  165        static void run_impl(E1& e1, 
const E2& e2, std::true_type);
 
  167        template <
class E1, 
class E2>
 
  168        static void run_impl(E1& e1, 
const E2& e2, std::false_type);
 
 
  175    namespace strided_assign_detail
 
  179            bool can_do_strided_assign;
 
  181            std::size_t inner_loop_size;
 
  182            std::size_t outer_loop_size;
 
  184            std::size_t dimension;
 
 
  195        template <
class E1, 
class E2>
 
  196        static void run(E1& e1, 
const E2& e2, 
const loop_sizes_t& loop_sizes);
 
  197        template <
class E1, 
class E2>
 
  198        static loop_sizes_t get_loop_sizes(E1& e1, 
const E2& e2);
 
  199        template <
class E1, 
class E2>
 
  200        static void run(E1& e1, 
const E2& e2);
 
 
  207    template <
class E1, 
class E2>
 
  210        using tag = xexpression_tag_t<E1, E2>;
 
  214    template <
class E1, 
class E2>
 
  215    inline void assign_xexpression(xexpression<E1>& e1, 
const xexpression<E2>& e2)
 
  217        if constexpr (has_assign_to<E1, E2>::value)
 
  219            e2.derived_cast().assign_to(e1);
 
  223            using tag = xexpression_tag_t<E1, E2>;
 
  224            xexpression_assigner<tag>::assign_xexpression(e1, e2);
 
  228    template <
class E1, 
class E2>
 
  231        using tag = xexpression_tag_t<E1, E2>;
 
  232        xexpression_assigner<tag>::computed_assign(e1, e2);
 
  235    template <
class E1, 
class E2, 
class F>
 
  236    inline void scalar_computed_assign(
xexpression<E1>& e1, 
const E2& e2, F&& f)
 
  238        using tag = xexpression_tag_t<E1, E2>;
 
  239        xexpression_assigner<tag>::scalar_computed_assign(e1, e2, std::forward<F>(f));
 
  242    template <
class E1, 
class E2>
 
  245        using tag = xexpression_tag_t<E1, E2>;
 
  246        xexpression_assigner<tag>::assert_compatible_shape(e1, e2);
 
  255        template <
class E1, 
class E2>
 
  256        constexpr bool linear_static_layout()
 
  262                       select_layout<E1::static_layout, typename E1::shape_type>::value,
 
  263                       select_layout<E2::static_layout, typename E2::shape_type>::value
 
  268        template <
class E1, 
class E2>
 
  269        inline auto is_linear_assign(
const E1& e1, 
const E2& e2)
 
  270            -> std::enable_if_t<has_strides<E1>::value, 
bool>
 
  272            return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout<E1, E2>())
 
  273                   || (e1.is_contiguous() && e2.has_linear_assign(e1.strides()));
 
  276        template <
class E1, 
class E2>
 
  277        inline auto is_linear_assign(
const E1&, 
const E2&) -> std::enable_if_t<!has_strides<E1>::value, 
bool>
 
  282        template <
class E1, 
class E2>
 
  283        inline bool linear_dynamic_layout(
const E1& e1, 
const E2& e2)
 
  285            return e1.is_contiguous() && e2.is_contiguous()
 
  289        template <
class E, 
class = 
void>
 
  290        struct has_step_leading : std::false_type
 
  295        struct has_step_leading<E, void_t<decltype(std::declval<E>().step_leading())>> : std::true_type
 
  300        struct use_strided_loop
 
  302            static constexpr bool stepper_deref()
 
  304                return std::is_reference<typename T::stepper::reference>::value;
 
  307            static constexpr bool value = has_strides<T>::value
 
  308                                          && has_step_leading<typename T::stepper>::value && stepper_deref();
 
  312        struct use_strided_loop<xscalar<T>>
 
  314            static constexpr bool value = 
true;
 
  317        template <
class F, 
class... CT>
 
  318        struct use_strided_loop<xfunction<F, CT...>>
 
  320            static constexpr bool value = std::conjunction<use_strided_loop<std::decay_t<CT>>...>::value;
 
  339        template <
class T1, 
class T2>
 
  340        struct conditional_promote_to_complex
 
  342            static constexpr bool cond = xtl::is_gen_complex<T1>::value && !xtl::is_gen_complex<T2>::value;
 
  344            using type = std::conditional_t<cond, T1, T2>;
 
  347        template <
class T1, 
class T2>
 
  348        using conditional_promote_to_complex_t = 
typename conditional_promote_to_complex<T1, T2>::type;
 
  351    template <
class E1, 
class E2>
 
  356        using e1_value_type = 
typename E1::value_type;
 
  357        using e2_value_type = 
typename E2::value_type;
 
  360        using is_bool = std::is_same<T, bool>;
 
  362        static constexpr bool is_bool_conversion()
 
  364            return is_bool<e2_value_type>::value && !is_bool<e1_value_type>::value;
 
  367        static constexpr bool contiguous_layout()
 
  369            return E1::contiguous_layout && E2::contiguous_layout;
 
  372        static constexpr bool convertible_types()
 
  374            return std::is_convertible<e2_value_type, e1_value_type>::value && !is_bool_conversion();
 
  377        static constexpr bool use_xsimd()
 
  379            return xt_simd::simd_traits<int8_t>::size > 1;
 
  383        static constexpr bool simd_size_impl()
 
  385            return xt_simd::simd_traits<T>::size > 1 || (is_bool<T>::value && use_xsimd());
 
  388        static constexpr bool simd_size()
 
  390            return simd_size_impl<e1_value_type>() && simd_size_impl<e2_value_type>();
 
  393        static constexpr bool simd_interface()
 
  404        static constexpr bool simd_assign()
 
  406            return convertible_types() && simd_size() && simd_interface();
 
  409        static constexpr bool linear_assign(
const E1& e1, 
const E2& e2, 
bool trivial)
 
  411            return trivial && detail::is_linear_assign(e1, e2);
 
  414        static constexpr bool strided_assign()
 
  416            return detail::use_strided_loop<E1>::value && detail::use_strided_loop<E2>::value;
 
  419        static constexpr bool simd_linear_assign()
 
  421            return contiguous_layout() && simd_assign();
 
  424        static constexpr bool simd_strided_assign()
 
  426            return strided_assign() && simd_assign();
 
  429        static constexpr bool simd_linear_assign(
const E1& e1, 
const E2& e2)
 
  431            return simd_assign() && detail::linear_dynamic_layout(e1, e2);
 
  434        using e2_requested_value_type = std::
 
  435            conditional_t<is_bool<e2_value_type>::value, 
typename E2::bool_load_type, e2_value_type>;
 
  436        using requested_value_type = detail::conditional_promote_to_complex_t<e1_value_type, e2_requested_value_type>;
 
 
  439    template <
class E1, 
class E2>
 
  450        bool linear_assign = traits::linear_assign(de1, de2, trivial);
 
  451        constexpr bool simd_assign = traits::simd_assign();
 
  452        constexpr bool simd_linear_assign = traits::simd_linear_assign();
 
  453        constexpr bool simd_strided_assign = traits::simd_strided_assign();
 
  456            if (simd_linear_assign || traits::simd_linear_assign(de1, de2))
 
  463                linear_assigner<simd_assign>::run(de1, de2);
 
  467                linear_assigner<false>::run(de1, de2);
 
  470        else if (simd_strided_assign)
 
  472            strided_loop_assigner<simd_strided_assign>::run(de1, de2);
 
  481    template <
class E1, 
class E2>
 
  482    inline void xexpression_assigner<Tag>::assign_xexpression(E1& e1, 
const E2& e2)
 
  484        bool trivial_broadcast = resize(e1.derived_cast(), e2.derived_cast());
 
  485        base_type::assign_data(e1, e2, trivial_broadcast);
 
  489    template <
class E1, 
class E2>
 
  492        using shape_type = 
typename E1::shape_type;
 
  493        using comperator_type = std::greater<typename shape_type::value_type>;
 
  495        using size_type = 
typename E1::size_type;
 
  497        E1& de1 = e1.derived_cast();
 
  498        const E2& de2 = e2.derived_cast();
 
  500        size_type dim2 = de2.dimension();
 
  501        shape_type shape = uninitialized_shape<shape_type>(dim2);
 
  503        bool trivial_broadcast = de2.broadcast_shape(shape, 
true);
 
  505        auto&& de1_shape = de1.shape();
 
  506        if (dim2 > de1.dimension()
 
  507            || std::lexicographical_compare(
 
  515            typename E1::temporary_type tmp(shape);
 
  516            base_type::assign_data(tmp, e2, trivial_broadcast);
 
  517            de1.assign_temporary(std::move(tmp));
 
  521            base_type::assign_data(e1, e2, trivial_broadcast);
 
  526    template <
class E1, 
class E2, 
class F>
 
  527    inline void xexpression_assigner<Tag>::scalar_computed_assign(
xexpression<E1>& e1, 
const E2& e2, F&& f)
 
  529        E1& d = e1.derived_cast();
 
  530        using size_type = 
typename E1::size_type;
 
  531        auto dst = d.storage().begin();
 
  532        for (size_type i = d.size(); i > 0; --i)
 
  540    template <
class E1, 
class E2>
 
  544        const E1& de1 = e1.derived_cast();
 
  545        const E2& de2 = e2.derived_cast();
 
  546        if (!broadcastable(de2.shape(), de1.shape()))
 
  548            throw_broadcast_error(de2.shape(), de1.shape());
 
  554        template <
bool B, 
class... CT>
 
  555        struct static_trivial_broadcast;
 
  557        template <
class... CT>
 
  558        struct static_trivial_broadcast<true, CT...>
 
  560            static constexpr bool value = detail::promote_index<typename std::decay_t<CT>::shape_type...>::value;
 
  563        template <
class... CT>
 
  564        struct static_trivial_broadcast<false, CT...>
 
  566            static constexpr bool value = 
false;
 
  571    template <
class E1, 
class E2>
 
  572    inline bool xexpression_assigner<Tag>::resize(E1& e1, 
const E2& e2)
 
  577        e1.resize(e2.shape());
 
  582    template <
class E1, 
class F, 
class... CT>
 
  585        if constexpr (detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value)
 
  592            e1.resize(
typename xfunction<F, CT...>::shape_type{});
 
  593            return detail::static_trivial_broadcast<
 
  594                detail::is_fixed<
typename xfunction<F, CT...>::shape_type>::value,
 
  599            using index_type = xindex_type_t<typename E1::shape_type>;
 
  600            using size_type = 
typename E1::size_type;
 
  601            size_type size = e2.dimension();
 
  602            index_type shape = uninitialized_shape<index_type>(size);
 
  603            bool trivial_broadcast = e2.broadcast_shape(shape, 
true);
 
  604            e1.resize(std::move(shape));
 
  605            return trivial_broadcast;
 
  613    template <
class FROM, 
class TO>
 
  616        using argument_type = std::decay_t<FROM>;
 
  617        using result_type = std::decay_t<TO>;
 
  619        static const bool value = xtl::is_arithmetic<result_type>::value
 
  620                                  && (
sizeof(result_type) < 
sizeof(argument_type)
 
  621                                      || (xtl::is_integral<result_type>::value
 
  622                                          && std::is_floating_point<argument_type>::value));
 
 
  625    template <
class FROM, 
class TO>
 
  628        using argument_type = std::decay_t<FROM>;
 
  629        using result_type = std::decay_t<TO>;
 
  631        static const bool value = xtl::is_signed<argument_type>::value != xtl::is_signed<result_type>::value;
 
 
  634    template <
class FROM, 
class TO>
 
  637        using argument_type = std::decay_t<FROM>;
 
  638        using result_type = std::decay_t<TO>;
 
  640        static const bool value = is_narrowing_conversion<argument_type, result_type>::value
 
  641                                  || has_sign_conversion<argument_type, result_type>::value;
 
 
  644    template <
class E1, 
class E2, layout_type L>
 
  645    inline stepper_assigner<E1, E2, L>::stepper_assigner(E1& e1, 
const E2& e2)
 
  647        , m_lhs(e1.stepper_begin(e1.shape()))
 
  648        , m_rhs(e2.stepper_begin(e1.shape()))
 
  649        , m_index(xtl::make_sequence<index_type>(e1.shape().size(), size_type(0)))
 
  653    template <
class E1, 
class E2, layout_type L>
 
  654    inline void stepper_assigner<E1, E2, L>::run()
 
  656        using tmp_size_type = 
typename E1::size_type;
 
  657        using argument_type = std::decay_t<
decltype(*m_rhs)>;
 
  658        using result_type = std::decay_t<
decltype(*m_lhs)>;
 
  659        constexpr bool needs_cast = has_assign_conversion<argument_type, result_type>::value;
 
  661        tmp_size_type s = m_e1.size();
 
  662        for (tmp_size_type i = 0; i < s; ++i)
 
  665            stepper_tools<L>::increment_stepper(*
this, m_index, m_e1.shape());
 
  669    template <
class E1, 
class E2, layout_type L>
 
  670    inline void stepper_assigner<E1, E2, L>::step(size_type i)
 
  676    template <
class E1, 
class E2, layout_type L>
 
  677    inline void stepper_assigner<E1, E2, L>::step(size_type i, size_type n)
 
  683    template <
class E1, 
class E2, layout_type L>
 
  684    inline void stepper_assigner<E1, E2, L>::reset(size_type i)
 
  690    template <
class E1, 
class E2, layout_type L>
 
  691    inline void stepper_assigner<E1, E2, L>::to_end(
layout_type l)
 
  701    template <
bool simd_assign>
 
  702    template <
class E1, 
class E2>
 
  703    inline void linear_assigner<simd_assign>::run(E1& e1, 
const E2& e2)
 
  705        using lhs_align_mode = xt_simd::container_alignment_t<E1>;
 
  706        constexpr bool is_aligned = std::is_same<lhs_align_mode, aligned_mode>::value;
 
  707        using rhs_align_mode = std::conditional_t<is_aligned, inner_aligned_mode, unaligned_mode>;
 
  708        using e1_value_type = 
typename E1::value_type;
 
  709        using e2_value_type = 
typename E2::value_type;
 
  710        using value_type = 
typename xassign_traits<E1, E2>::requested_value_type;
 
  711        using simd_type = xt_simd::simd_type<value_type>;
 
  712        using size_type = 
typename E1::size_type;
 
  713        size_type size = e1.size();
 
  714        constexpr size_type simd_size = simd_type::size;
 
  715        constexpr bool needs_cast = has_assign_conversion<e2_value_type, e1_value_type>::value;
 
  717        size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size);
 
  718        size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
 
  720        for (size_type i = 0; i < align_begin; ++i)
 
  725#if defined(XTENSOR_USE_TBB) 
  726        if (size >= XTENSOR_TBB_THRESHOLD)
 
  728            tbb::static_partitioner ap;
 
  735                    e1.template store_simd<lhs_align_mode>(
 
  737                        e2.template load_simd<rhs_align_mode, value_type>(i)
 
  745            for (size_type i = align_begin; i < align_end; i += simd_size)
 
  747                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
 
  750#elif defined(XTENSOR_USE_OPENMP) 
  751        if (size >= size_type(XTENSOR_OPENMP_TRESHOLD))
 
  753#pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2) 
  755            for (size_type i = align_begin; i < align_end; i += simd_size)
 
  757                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
 
  760            for (
auto i = 
static_cast<std::ptrdiff_t
>(align_begin); i < static_cast<std::ptrdiff_t>(align_end);
 
  761                 i += 
static_cast<std::ptrdiff_t
>(simd_size))
 
  763                size_type ui = 
static_cast<size_type
>(i);
 
  764                e1.template store_simd<lhs_align_mode>(ui, e2.template load_simd<rhs_align_mode, value_type>(ui));
 
  770            for (size_type i = align_begin; i < align_end; i += simd_size)
 
  772                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
 
  776        for (size_type i = align_begin; i < align_end; i += simd_size)
 
  778            e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
 
  781        for (size_type i = align_end; i < size; ++i)
 
  787    template <
class E1, 
class E2>
 
  788    inline void linear_assigner<false>::run(E1& e1, 
const E2& e2)
 
  790        using is_convertible = std::
 
  791            is_convertible<typename std::decay_t<E2>::value_type, 
typename std::decay_t<E1>::value_type>;
 
  795        run_impl(e1, e2, is_convertible());
 
  798    template <
class E1, 
class E2>
 
  801        using value_type = 
typename E1::value_type;
 
  802        using size_type = 
typename E1::size_type;
 
  803        auto src = linear_begin(e2);
 
  804        auto dst = linear_begin(e1);
 
  805        size_type n = e1.size();
 
  806#if defined(XTENSOR_USE_TBB) 
  807        tbb::static_partitioner sp;
 
  810            static_cast<std::ptrdiff_t
>(n),
 
  811            [&](std::ptrdiff_t i)
 
  813                *(dst + i) = 
static_cast<value_type
>(*(src + i));
 
  817#elif defined(XTENSOR_USE_OPENMP) 
  818        if (n >= XTENSOR_OPENMP_TRESHOLD)
 
  820#pragma omp parallel for default(none) shared(src, dst, n) 
  821            for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast<std::ptrdiff_t>(n); i++)
 
  823                *(dst + i) = 
static_cast<value_type
>(*(src + i));
 
  828            for (; n > size_type(0); --n)
 
  830                *dst = 
static_cast<value_type
>(*src);
 
  836        for (; n > size_type(0); --n)
 
  838            *dst = 
static_cast<value_type
>(*src);
 
  845    template <
class E1, 
class E2>
 
  848        XTENSOR_PRECONDITION(
false, 
"Internal error: linear_assigner called with unrelated types.");
 
  855    namespace strided_assign_detail
 
  857        template <layout_type layout>
 
  864            static void next_idx(T& outer_index, T& outer_shape)
 
  866                auto i = outer_index.size();
 
  869                    if (outer_index[i - 1] + 1 >= outer_shape[i - 1])
 
  871                        outer_index[i - 1] = 0;
 
  875                        outer_index[i - 1]++;
 
  882            static void nth_idx(
size_t n, T& outer_index, 
const T& outer_shape)
 
  884                dynamic_shape<std::size_t> stride_sizes;
 
  885                xt::resize_container(stride_sizes, outer_shape.size());
 
  887                using size_type = 
typename T::size_type;
 
  888                for (size_type i = outer_shape.size(); i > 0; i--)
 
  890                    stride_sizes[i - 1] = (i == outer_shape.size()) ? 1 : stride_sizes[i] * outer_shape[i];
 
  894                for (size_type i = 0; i < outer_shape.size(); i++)
 
  896                    auto d_idx = n / stride_sizes[i];
 
  897                    outer_index[i] = d_idx;
 
  898                    n -= d_idx * stride_sizes[i];
 
 
  907            static void next_idx(T& outer_index, T& outer_shape)
 
  909                using size_type = 
typename T::size_type;
 
  911                auto sz = outer_index.size();
 
  914                    if (outer_index[i] + 1 >= outer_shape[i])
 
  927            static void nth_idx(
size_t n, T& outer_index, 
const T& outer_shape)
 
  929                dynamic_shape<std::size_t> stride_sizes;
 
  930                xt::resize_container(stride_sizes, outer_shape.size());
 
  932                using size_type = 
typename T::size_type;
 
  935                for (size_type i = 0; i < outer_shape.size(); i++)
 
  937                    stride_sizes[i] = (i == 0) ? 1 : stride_sizes[i - 1] * outer_shape[i - 1];
 
  941                for (size_type i = outer_shape.size(); i > 0;)
 
  944                    auto d_idx = n / stride_sizes[i];
 
  945                    outer_index[i] = d_idx;
 
  946                    n -= d_idx * stride_sizes[i];
 
 
  951        template <layout_type L, 
class S>
 
  952        struct check_strides_functor
 
  954            using strides_type = S;
 
  956            check_strides_functor(
const S& 
strides)
 
  962            template <
class T, layout_type LE = L>
 
  963            std::enable_if_t<LE == layout_type::row_major, std::size_t> operator()(
const T& el)
 
  974            template <
class T, layout_type LE = L>
 
  975            std::enable_if_t<LE == layout_type::column_major, std::size_t> operator()(
const T& el)
 
  992            template <
class F, 
class... CT>
 
  995                xt::for_each(*
this, xf.arguments());
 
 1002            const strides_type& m_strides;
 
 
 1005        template <bool possible = true, class E1, class E2, std::enable_if_t<!has_strides<E1>::value || !possible, 
bool> = 
true>
 
 1008            return {
false, 
true, 1, e1.size(), e1.dimension(), e1.dimension()};
 
 1011        template <bool possible = true, class E1, class E2, std::enable_if_t<has_strides<E1>::value && possible, 
bool> = 
true>
 
 1012        loop_sizes_t get_loop_sizes(
const E1& e1, 
const E2& e2)
 
 1014            using shape_value_type = 
typename E1::shape_type::value_type;
 
 1015            bool is_row_major = 
true;
 
 1019            is_row_major = 
true;
 
 1020            auto is_zero = [](
auto i)
 
 1024            auto&& 
strides = e1.strides();
 
 1025            auto it_bwd = std::find_if_not(
strides.rbegin(), 
strides.rend(), is_zero);
 
 1026            bool de1_row_contiguous = it_bwd != 
strides.rend() && *it_bwd == 1;
 
 1027            auto it_fwd = std::find_if_not(
strides.begin(), 
strides.end(), is_zero);
 
 1028            bool de1_col_contiguous = it_fwd != 
strides.end() && *it_fwd == 1;
 
 1029            if (de1_row_contiguous)
 
 1031                is_row_major = 
true;
 
 1033            else if (de1_col_contiguous)
 
 1035                is_row_major = 
false;
 
 1040                return {
false, 
true, 1, e1.size(), e1.dimension(), e1.dimension()};
 
 1044            std::size_t cut = 0;
 
 1051                if (cut < e1.strides().size() - 1)
 
 1054                    cut = e1.strides().size() - 1;
 
 1057            else if (!is_row_major)
 
 1069            std::size_t outer_loop_size = 
static_cast<std::size_t
>(std::accumulate(
 
 1071                e1.shape().begin() + 
static_cast<std::ptrdiff_t
>(cut),
 
 1072                shape_value_type(1),
 
 1073                std::multiplies<shape_value_type>{}
 
 1075            std::size_t inner_loop_size = 
static_cast<std::size_t
>(std::accumulate(
 
 1076                e1.shape().begin() + 
static_cast<std::ptrdiff_t
>(cut),
 
 1078                shape_value_type(1),
 
 1079                std::multiplies<shape_value_type>{}
 
 1084                std::swap(outer_loop_size, inner_loop_size);
 
 1087            return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()};
 
 1091    template <
bool simd>
 
 1092    template <
class E1, 
class E2>
 
 1095        return strided_assign_detail::get_loop_sizes<simd>(e1, e2);
 
 1098#define strided_parallel_assign 
 1100    template <
bool simd>
 
 1101    template <
class E1, 
class E2>
 
 1102    inline void strided_loop_assigner<simd>::run(E1& e1, 
const E2& e2, 
const loop_sizes_t& loop_sizes)
 
 1104        bool is_row_major = loop_sizes.is_row_major;
 
 1105        std::size_t inner_loop_size = loop_sizes.inner_loop_size;
 
 1106        std::size_t outer_loop_size = loop_sizes.outer_loop_size;
 
 1107        std::size_t cut = loop_sizes.cut;
 
 1111        dynamic_shape<std::size_t> idx, max_shape;
 
 1115            xt::resize_container(idx, cut);
 
 1116            max_shape.assign(e1.shape().begin(), e1.shape().begin() + 
static_cast<std::ptrdiff_t
>(cut));
 
 1120            xt::resize_container(idx, e1.shape().size() - cut);
 
 1121            max_shape.assign(e1.shape().begin() + 
static_cast<std::ptrdiff_t
>(cut), e1.shape().end());
 
 1126        using e1_value_type = 
typename E1::value_type;
 
 1127        using e2_value_type = 
typename E2::value_type;
 
 1128        constexpr bool needs_cast = has_assign_conversion<e2_value_type, e1_value_type>::value;
 
 1129        using value_type = 
typename xassign_traits<E1, E2>::requested_value_type;
 
 1130        using simd_type = std::conditional_t<
 
 1131            std::is_same<e1_value_type, bool>::value,
 
 1132            xt_simd::simd_bool_type<value_type>,
 
 1133            xt_simd::simd_type<value_type>>;
 
 1135        std::size_t simd_size = inner_loop_size / simd_type::size;
 
 1136        std::size_t simd_rest = inner_loop_size % simd_type::size;
 
 1138        auto fct_stepper = e2.stepper_begin(e1.shape());
 
 1139        auto res_stepper = e1.stepper_begin(e1.shape());
 
 1143        std::size_t step_dim = 0;
 
 1148#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign) 
 1149        if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
 
 1151            std::size_t first_step = 
true;
 
 1152#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx) 
 1153            for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
 
 1161                    for (std::size_t i = 0; i < idx.size(); ++i)
 
 1163                        fct_stepper.step(i + step_dim, idx[i]);
 
 1164                        res_stepper.step(i + step_dim, idx[i]);
 
 1169                for (std::size_t i = 0; i < simd_size; ++i)
 
 1171                    res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
 
 1173                for (std::size_t i = 0; i < simd_rest; ++i)
 
 1176                    res_stepper.step_leading();
 
 1177                    fct_stepper.step_leading();
 
 1185                fct_stepper.to_begin();
 
 1188                if (!E1::contiguous_layout)
 
 1190                    res_stepper.to_begin();
 
 1191                    for (std::size_t i = 0; i < idx.size(); ++i)
 
 1193                        fct_stepper.step(i + step_dim, idx[i]);
 
 1194                        res_stepper.step(i + step_dim, idx[i]);
 
 1199                    for (std::size_t i = 0; i < idx.size(); ++i)
 
 1201                        fct_stepper.step(i + step_dim, idx[i]);
 
 1208#elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB) 
 1209        if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size)
 
 1211            tbb::static_partitioner sp;
 
 1213                tbb::blocked_range<size_t>(0ul, outer_loop_size),
 
 1214                [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx](
 
 1215                    const tbb::blocked_range<size_t>& r
 
 1219                    auto fct_stepper = e2.stepper_begin(e1.shape());
 
 1220                    auto res_stepper = e1.stepper_begin(e1.shape());
 
 1221                    std::size_t first_step = 
true;
 
 1224                    for (std::size_t ox = r.begin(); ox < r.end(); ++ox)
 
 1236                            for (std::size_t i = 0; i < idx.size(); ++i)
 
 1238                                fct_stepper.step(i + step_dim, idx[i]);
 
 1239                                res_stepper.step(i + step_dim, idx[i]);
 
 1244                        for (std::size_t i = 0; i < simd_size; ++i)
 
 1246                            res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
 
 1248                        for (std::size_t i = 0; i < simd_rest; ++i)
 
 1251                            res_stepper.step_leading();
 
 1252                            fct_stepper.step_leading();
 
 1260                        fct_stepper.to_begin();
 
 1263                        if (!E1::contiguous_layout)
 
 1265                            res_stepper.to_begin();
 
 1266                            for (std::size_t i = 0; i < idx.size(); ++i)
 
 1268                                fct_stepper.step(i + step_dim, idx[i]);
 
 1269                                res_stepper.step(i + step_dim, idx[i]);
 
 1274                            for (std::size_t i = 0; i < idx.size(); ++i)
 
 1276                                fct_stepper.step(i + step_dim, idx[i]);
 
 1288            for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
 
 1290                for (std::size_t i = 0; i < simd_size; ++i)
 
 1292                    res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
 
 1294                for (std::size_t i = 0; i < simd_rest; ++i)
 
 1297                    res_stepper.step_leading();
 
 1298                    fct_stepper.step_leading();
 
 1305                fct_stepper.to_begin();
 
 1308                if (!E1::contiguous_layout)
 
 1310                    res_stepper.to_begin();
 
 1311                    for (std::size_t i = 0; i < idx.size(); ++i)
 
 1313                        fct_stepper.step(i + step_dim, idx[i]);
 
 1314                        res_stepper.step(i + step_dim, idx[i]);
 
 1319                    for (std::size_t i = 0; i < idx.size(); ++i)
 
 1321                        fct_stepper.step(i + step_dim, idx[i]);
 
 1325#if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign) 
 1331    template <
class E1, 
class E2>
 
 1332    inline void strided_loop_assigner<true>::run(E1& e1, 
const E2& e2)
 
 1335        if (loop_sizes.can_do_strided_assign)
 
 1337            run(e1, e2, loop_sizes);
 
 1347    template <
class E1, 
class E2>
 
 1348    inline void strided_loop_assigner<false>::run(E1& , 
const E2& , 
const loop_sizes_t&)
 
 1353    template <
class E1, 
class E2>
 
 1354    inline void strided_loop_assigner<false>::run(E1& e1, 
const E2& e2)
 
Base class for xexpressions.
derived_type & derived_cast() &noexcept
Returns a reference to the actual derived type of the xexpression.
Multidimensional function operating on xtensor expressions.
auto strides(const E &e, stride_type type=stride_type::normal) noexcept
Get strides of an object.
standard mathematical functions for xexpressions
constexpr layout_type compute_layout(Args... args) noexcept
Implementation of the following logical table:
auto conditional_cast(U &&u)
Perform a type cast when a condition is true.