xtensor
 
Loading...
Searching...
No Matches
xhistogram.hpp
1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
3 * Copyright (c) QuantStack *
4 * *
5 * Distributed under the terms of the BSD 3-Clause License. *
6 * *
7 * The full license is in the file LICENSE, distributed with this software. *
8 ****************************************************************************/
9
13
14#ifndef XTENSOR_HISTOGRAM_HPP
15#define XTENSOR_HISTOGRAM_HPP
16
17#include "../containers/xtensor.hpp"
18#include "../misc/xset_operation.hpp"
19#include "../misc/xsort.hpp"
20#include "../views/xview.hpp"
21
22using namespace xt::placeholders;
23
24namespace xt
25{
30
40 template <class E1, class E2>
41 inline auto digitize(E1&& data, E2&& bin_edges, bool right = false)
42 {
43 XTENSOR_ASSERT(bin_edges.dimension() == 1);
44 XTENSOR_ASSERT(bin_edges.size() >= 2);
45 XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
46 XTENSOR_ASSERT(xt::amin(data)[0] >= bin_edges[0]);
47 XTENSOR_ASSERT(xt::amax(data)[0] <= bin_edges[bin_edges.size() - 1]);
48
49 return xt::searchsorted(std::forward<E2>(bin_edges), std::forward<E1>(data), right);
50 }
51
52 namespace detail
53 {
54 template <class R = double, class E1, class E2, class E3>
55 inline auto histogram_imp(E1&& data, E2&& bin_edges, E3&& weights, bool density, bool equal_bins)
56 {
57 using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>, std::decay_t<E3>>;
58 using value_type = typename std::decay_t<E3>::value_type;
59
60 XTENSOR_ASSERT(data.dimension() == 1);
61 XTENSOR_ASSERT(weights.dimension() == 1);
62 XTENSOR_ASSERT(bin_edges.dimension() == 1);
63 XTENSOR_ASSERT(weights.size() == data.size());
64 XTENSOR_ASSERT(bin_edges.size() >= 2);
65 XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
66
67 size_t n_bins = bin_edges.size() - 1;
69
70 if (equal_bins)
71 {
72 std::array<typename std::decay_t<E2>::value_type, 2> bounds = xt::minmax(bin_edges)();
73 auto left = static_cast<double>(bounds[0]);
74 auto right = static_cast<double>(bounds[1]);
75 double norm = 1. / (right - left);
76 for (size_t i = 0; i < data.size(); ++i)
77 {
78 auto v = static_cast<double>(data(i));
79 // left and right are not bounds of data
80 if (v >= left && v < right)
81 {
82 auto i_bin = static_cast<size_t>(static_cast<double>(n_bins) * (v - left) * norm);
83 count(i_bin) += weights(i);
84 }
85 else if (v == right)
86 {
87 count(n_bins - 1) += weights(i);
88 }
89 }
90 }
91 else
92 {
93 auto sorter = xt::argsort(data);
94
95 size_type ibin = 0;
96
97 for (auto& idx : sorter)
98 {
99 auto item = data[idx];
100
101 if (item < bin_edges[0])
102 {
103 continue;
104 }
105
106 if (item > bin_edges[n_bins])
107 {
108 break;
109 }
110
111 while (item >= bin_edges[ibin + 1] && ibin < n_bins - 1)
112 {
113 ++ibin;
114 }
115
116 count[ibin] += weights[idx];
117 }
118 }
119
120 xt::xtensor<R, 1> prob = xt::cast<R>(count);
121
122 if (density)
123 {
124 R n = static_cast<R>(data.size());
125 for (size_type i = 0; i < bin_edges.size() - 1; ++i)
126 {
127 prob[i] /= (static_cast<R>(bin_edges[i + 1] - bin_edges[i]) * n);
128 }
129 }
130
131 return prob;
132 }
133
134 } // detail
135
140
151 template <class R = double, class E1, class E2, class E3>
152 inline auto histogram(E1&& data, E2&& bin_edges, E3&& weights, bool density = false)
153 {
154 return detail::histogram_imp<R>(
155 std::forward<E1>(data),
156 std::forward<E2>(bin_edges),
157 std::forward<E3>(weights),
158 density,
159 false
160 );
161 }
162
172 template <class R = double, class E1, class E2>
173 inline auto histogram(E1&& data, E2&& bin_edges, bool density = false)
174 {
175 using value_type = typename std::decay_t<E1>::value_type;
176
177 auto n = data.size();
178
179 return detail::histogram_imp<R>(
180 std::forward<E1>(data),
181 std::forward<E2>(bin_edges),
183 density,
184 false
185 );
186 }
187
197 template <class R = double, class E1>
198 inline auto histogram(E1&& data, std::size_t bins = 10, bool density = false)
199 {
200 using value_type = typename std::decay_t<E1>::value_type;
201
202 auto n = data.size();
203
204 return detail::histogram_imp<R>(
205 std::forward<E1>(data),
208 density,
209 true
210 );
211 }
212
224 template <class R = double, class E1, class E2>
225 inline auto histogram(E1&& data, std::size_t bins, E2 left, E2 right, bool density = false)
226 {
227 using value_type = typename std::decay_t<E1>::value_type;
228
229 auto n = data.size();
230
231 return detail::histogram_imp<R>(
232 std::forward<E1>(data),
233 histogram_bin_edges(data, left, right, bins),
235 density,
236 true
237 );
238 }
239
250 template <class R = double, class E1, class E2>
251 inline auto histogram(E1&& data, std::size_t bins, E2&& weights, bool density = false)
252 {
253 return detail::histogram_imp<R>(
254 std::forward<E1>(data),
255 histogram_bin_edges(data, weights, bins),
256 std::forward<E2>(weights),
257 density,
258 true
259 );
260 }
261
274 template <class R = double, class E1, class E2, class E3>
275 inline auto histogram(E1&& data, std::size_t bins, E2&& weights, E3 left, E3 right, bool density = false)
276 {
277 return detail::histogram_imp<R>(
278 std::forward<E1>(data),
279 histogram_bin_edges(data, weights, left, right, bins),
280 std::forward<E2>(weights),
281 density,
282 true
283 );
284 }
285
291 {
292 automatic,
293 linspace,
294 logspace,
295 uniform
296 };
297
310 template <class E1, class E2, class E3>
312 E1&& data,
313 E2&& weights,
314 E3 left,
315 E3 right,
316 std::size_t bins = 10,
317 histogram_algorithm mode = histogram_algorithm::automatic
318 )
319 {
320 // counter and return type
321 using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>>;
322 using value_type = typename std::decay_t<E1>::value_type;
323 using weights_type = typename std::decay_t<E2>::value_type;
324
325 // basic checks
326 // - rank
327 XTENSOR_ASSERT(data.dimension() == 1);
328 XTENSOR_ASSERT(weights.dimension() == 1);
329 // - size
330 XTENSOR_ASSERT(weights.size() == data.size());
331 // - bounds
332 XTENSOR_ASSERT(left <= right);
333 // - non-empty
334 XTENSOR_ASSERT(bins > std::size_t(0));
335
336 // act on different modes
337 switch (mode)
338 {
339 // bins of equal width
340 case histogram_algorithm::automatic:
341 {
342 xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
343 return bin_edges;
344 }
345
346 // bins of equal width
347 case histogram_algorithm::linspace:
348 {
349 xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
350 return bin_edges;
351 }
352
353 // bins of logarithmically increasing size
354 case histogram_algorithm::logspace:
355 {
356 using rhs_value_type = std::conditional_t<xtl::is_integral<value_type>::value, double, value_type>;
357
359 xt::logspace<rhs_value_type>(std::log10(left), std::log10(right), bins + 1)
360 );
361
362 // TODO: replace above with below after 'xsimd' fix
363 // xt::xtensor<value_type,1> bin_edges = xt::logspace<value_type>(
364 // std::log10(left), std::log10(right), bins+1);
365
366 return bin_edges;
367 }
368
369 // same amount of data in each bin
370 case histogram_algorithm::uniform:
371 {
372 // indices that sort "data"
373 auto sorter = xt::argsort(data);
374
375 // histogram: all of equal 'height'
376 // - height
377 weights_type w = xt::sum<weights_type>(weights)[0] / static_cast<weights_type>(bins);
378 // - apply to all bins
380
381 // take cumulative sum, to act as easy look-up
382 count = xt::cumsum(count);
383
384 // edges
385 // - allocate
386 std::vector<size_t> shape = {bins + 1};
387 xt::xtensor<value_type, 1> bin_edges = xtensor<value_type, 1>::from_shape(shape);
388 // - first/last edge
389 bin_edges[0] = left;
390 bin_edges[bins] = right;
391 // - cumulative weight
392 weights_type cum_weight = static_cast<weights_type>(0);
393 // - current bin
394 size_type ibin = 0;
395 // - loop to find interior bin-edges
396 for (size_type i = 0; i < weights.size(); ++i)
397 {
398 if (cum_weight >= count[ibin])
399 {
400 bin_edges[ibin + 1] = data[sorter[i]];
401 ++ibin;
402 }
403 cum_weight += weights[sorter[i]];
404 }
405 return bin_edges;
406 }
407
408 // bins of equal width
409 default:
410 {
411 xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
412 return bin_edges;
413 }
414 }
415 }
416
427 template <class E1, class E2>
429 E1&& data,
430 E2&& weights,
431 std::size_t bins = 10,
432 histogram_algorithm mode = histogram_algorithm::automatic
433 )
434 {
435 using value_type = typename std::decay_t<E1>::value_type;
436 std::array<value_type, 2> left_right;
437 left_right = xt::minmax(data)();
438
439 return histogram_bin_edges(
440 std::forward<E1>(data),
441 std::forward<E2>(weights),
442 left_right[0],
443 left_right[1],
444 bins,
445 mode
446 );
447 }
448
458 template <class E1>
459 inline auto
460 histogram_bin_edges(E1&& data, std::size_t bins = 10, histogram_algorithm mode = histogram_algorithm::automatic)
461 {
462 using value_type = typename std::decay_t<E1>::value_type;
463
464 auto n = data.size();
465 std::array<value_type, 2> left_right;
466 left_right = xt::minmax(data)();
467
468 return histogram_bin_edges(
469 std::forward<E1>(data),
471 left_right[0],
472 left_right[1],
473 bins,
474 mode
475 );
476 }
477
489 template <class E1, class E2>
491 E1&& data,
492 E2 left,
493 E2 right,
494 std::size_t bins = 10,
495 histogram_algorithm mode = histogram_algorithm::automatic
496 )
497 {
498 using value_type = typename std::decay_t<E1>::value_type;
499
500 auto n = data.size();
501
502 return histogram_bin_edges(std::forward<E1>(data), xt::ones<value_type>({n}), left, right, bins, mode);
503 }
504
522 template <class E1, class E2, XTL_REQUIRES(is_xexpression<std::decay_t<E2>>)>
523 inline auto bincount(E1&& data, E2&& weights, std::size_t minlength = 0)
524 {
525 using result_value_type = typename std::decay_t<E2>::value_type;
526 using input_value_type = typename std::decay_t<E1>::value_type;
527 using size_type = typename std::decay_t<E1>::size_type;
528
529 static_assert(
530 xtl::is_integral<typename std::decay_t<E1>::value_type>::value,
531 "Bincount data has to be integral type."
532 );
533 XTENSOR_ASSERT(data.dimension() == 1);
534 XTENSOR_ASSERT(weights.dimension() == 1);
535
536 std::array<input_value_type, 2> left_right;
537 left_right = xt::minmax(data)();
538
539 if (left_right[0] < input_value_type(0))
540 {
541 XTENSOR_THROW(std::runtime_error, "Data argument for bincount can only contain positive integers!");
542 }
543
545 {(std::max)(minlength, std::size_t(left_right[1] + 1))}
546 );
547
548 for (size_type i = 0; i < data.size(); ++i)
549 {
550 res(data(i)) += weights(i);
551 }
552
553 return res;
554 }
555
556 template <class E1>
557 inline auto bincount(E1&& data, std::size_t minlength = 0)
558 {
559 return bincount(
560 std::forward<E1>(data),
561 xt::ones<typename std::decay_t<E1>::value_type>(data.shape()),
562 minlength
563 );
564 }
565
575 template <class E>
576 inline xt::xtensor<size_t, 1> bin_items(size_t N, E&& weights)
577 {
578 if (weights.size() <= std::size_t(1))
579 {
581 return n;
582 }
583
584#ifdef XTENSOR_ENABLE_ASSERT
585 using value_type = typename std::decay_t<E>::value_type;
586
587 XTENSOR_ASSERT(xt::all(weights >= static_cast<value_type>(0)));
588 XTENSOR_ASSERT(xt::sum(weights)() > static_cast<value_type>(0));
589#endif
590
591 xt::xtensor<double, 1> P = xt::cast<double>(weights) / static_cast<double>(xt::sum(weights)());
592 xt::xtensor<size_t, 1> n = xt::ceil(static_cast<double>(N) * P);
593
594 if (xt::sum(n)() == N)
595 {
596 return n;
597 }
598
600 xt::xtensor<size_t, 1> sorter = xt::argsort(P);
601 sorter = xt::view(sorter, xt::range(P.size(), _, -1));
602 sorter = xt::view(sorter, xt::range(0, xt::sum(n)(0) - N));
603 xt::view(d, xt::keep(sorter)) = 1;
604 n -= d;
605
606 return n;
607 }
608
618 inline xt::xtensor<size_t, 1> bin_items(size_t N, size_t bins)
619 {
620 return bin_items(N, xt::ones<double>({bins}));
621 }
622}
623
624#endif
size_type size() const noexcept
Returns the number of element in the container.
constexpr const inner_shape_type & shape() const noexcept
Returns the shape of the container.
auto cumsum(E &&e, std::ptrdiff_t axis)
Cumulative sum.
Definition xmath.hpp:2283
auto amax(E &&e, X &&axes, EVS es=EVS())
Maximum element along given axis.
Definition xmath.hpp:782
auto amin(E &&e, X &&axes, EVS es=EVS())
Minimum element along given axis.
Definition xmath.hpp:800
auto cast(E &&e) noexcept -> detail::xfunction_type_t< typename detail::cast< R >::functor, E >
Element-wise static_cast.
auto digitize(E1 &&data, E2 &&bin_edges, bool right=false)
Return the indices of the bins to which each value in input array belongs.
auto histogram_bin_edges(E1 &&data, E2 &&weights, E3 left, E3 right, std::size_t bins=10, histogram_algorithm mode=histogram_algorithm::automatic)
Compute the bin-edges of a histogram of a set of data using different algorithms.
auto histogram(E1 &&data, E2 &&bin_edges, E3 &&weights, bool density=false)
Compute the histogram of a set of data.
histogram_algorithm
Defines different algorithms to be used in "histogram_bin_edges".
bool all(E &&e)
Any.
auto ceil(E &&e) noexcept -> detail::xfunction_type_t< math::ceil_fun, E >
ceil function.
Definition xmath.hpp:1574
auto sum(E &&e, X &&axes, EVS es=EVS())
Sum of elements over given axes.
Definition xmath.hpp:1838
auto minmax(E &&e, EVS es=EVS())
Minimum and maximum among the elements of an array or expression.
Definition xmath.hpp:2231
auto searchsorted(E1 &&a, E2 &&v, bool right=true)
Find indices where elements should be inserted to maintain order.
auto norm(E &&e) noexcept
Calculates the squared magnitude elementwise for the complex numbers in e.
Definition xcomplex.hpp:257
standard mathematical functions for xexpressions
auto range(A start_val, B stop_val)
Select a range from start_val to stop_val (excluded).
Definition xslice.hpp:744
auto ones(S shape) noexcept
Returns an xexpression containing ones of the specified shape.
Definition xbuilder.hpp:46
auto zeros(S shape) noexcept
Returns an xexpression containing zeros of the specified shape.
Definition xbuilder.hpp:66
xt::xtensor< size_t, 1 > bin_items(size_t N, E &&weights)
Get the number of items in each bin, given the fraction of items per bin.
auto linspace(T start, T stop, std::size_t num_samples=50, bool endpoint=true) noexcept
Generates num_samples evenly spaced numbers over given interval.
Definition xbuilder.hpp:460
xtensor_container< uvector< T, A >, N, L > xtensor
Alias template on xtensor_container with default parameters for data container type.
auto keep(T &&indices)
Create a non-contigous slice from a container of indices to keep.
Definition xslice.hpp:386
auto logspace(T start, T stop, std::size_t num_samples, T base=10, bool endpoint=true) noexcept
Generates num_samples numbers evenly spaced on a log scale over given interval.
Definition xbuilder.hpp:481
auto bincount(E1 &&data, E2 &&weights, std::size_t minlength=0)
Count number of occurrences of each value in array of non-negative ints.
auto view(E &&e, S &&... slices)
Constructs and returns a view on the specified xexpression.
Definition xview.hpp:1823