xtensor
Loading...
Searching...
No Matches
xhistogram.hpp
1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
3 * Copyright (c) QuantStack *
4 * *
5 * Distributed under the terms of the BSD 3-Clause License. *
6 * *
7 * The full license is in the file LICENSE, distributed with this software. *
8 ****************************************************************************/
9
14#ifndef XTENSOR_HISTOGRAM_HPP
15#define XTENSOR_HISTOGRAM_HPP
16
17#include "xset_operation.hpp"
18#include "xsort.hpp"
19#include "xtensor.hpp"
20#include "xview.hpp"
21
22using namespace xt::placeholders;
23
24namespace xt
25{
35 template <class E1, class E2>
36 inline auto digitize(E1&& data, E2&& bin_edges, bool right = false)
37 {
38 XTENSOR_ASSERT(bin_edges.dimension() == 1);
39 XTENSOR_ASSERT(bin_edges.size() >= 2);
40 XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
41 XTENSOR_ASSERT(xt::amin(data)[0] >= bin_edges[0]);
42 XTENSOR_ASSERT(xt::amax(data)[0] <= bin_edges[bin_edges.size() - 1]);
43
44 return xt::searchsorted(std::forward<E2>(bin_edges), std::forward<E1>(data), right);
45 }
46
47 namespace detail
48 {
49 template <class R = double, class E1, class E2, class E3>
50 inline auto histogram_imp(E1&& data, E2&& bin_edges, E3&& weights, bool density, bool equal_bins)
51 {
52 using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>, std::decay_t<E3>>;
53 using value_type = typename std::decay_t<E3>::value_type;
54
55 XTENSOR_ASSERT(data.dimension() == 1);
56 XTENSOR_ASSERT(weights.dimension() == 1);
57 XTENSOR_ASSERT(bin_edges.dimension() == 1);
58 XTENSOR_ASSERT(weights.size() == data.size());
59 XTENSOR_ASSERT(bin_edges.size() >= 2);
60 XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
61
62 size_t n_bins = bin_edges.size() - 1;
64
65 if (equal_bins)
66 {
67 std::array<typename std::decay_t<E2>::value_type, 2> bounds = xt::minmax(bin_edges)();
68 auto left = static_cast<double>(bounds[0]);
69 auto right = static_cast<double>(bounds[1]);
70 double norm = 1. / (right - left);
71 for (size_t i = 0; i < data.size(); ++i)
72 {
73 auto v = static_cast<double>(data(i));
74 // left and right are not bounds of data
75 if (v >= left && v < right)
76 {
77 auto i_bin = static_cast<size_t>(static_cast<double>(n_bins) * (v - left) * norm);
78 count(i_bin) += weights(i);
79 }
80 else if (v == right)
81 {
82 count(n_bins - 1) += weights(i);
83 }
84 }
85 }
86 else
87 {
88 auto sorter = xt::argsort(data);
89
90 size_type ibin = 0;
91
92 for (auto& idx : sorter)
93 {
94 auto item = data[idx];
95
96 if (item < bin_edges[0])
97 {
98 continue;
99 }
100
101 if (item > bin_edges[n_bins])
102 {
103 break;
104 }
105
106 while (item >= bin_edges[ibin + 1] && ibin < n_bins - 1)
107 {
108 ++ibin;
109 }
110
111 count[ibin] += weights[idx];
112 }
113 }
114
116
117 if (density)
118 {
119 R n = static_cast<R>(data.size());
120 for (size_type i = 0; i < bin_edges.size() - 1; ++i)
121 {
122 prob[i] /= (static_cast<R>(bin_edges[i + 1] - bin_edges[i]) * n);
123 }
124 }
125
126 return prob;
127 }
128
129 } // detail
130
141 template <class R = double, class E1, class E2, class E3>
142 inline auto histogram(E1&& data, E2&& bin_edges, E3&& weights, bool density = false)
143 {
144 return detail::histogram_imp<R>(
145 std::forward<E1>(data),
146 std::forward<E2>(bin_edges),
147 std::forward<E3>(weights),
148 density,
149 false
150 );
151 }
152
162 template <class R = double, class E1, class E2>
163 inline auto histogram(E1&& data, E2&& bin_edges, bool density = false)
164 {
165 using value_type = typename std::decay_t<E1>::value_type;
166
167 auto n = data.size();
168
169 return detail::histogram_imp<R>(
170 std::forward<E1>(data),
171 std::forward<E2>(bin_edges),
173 density,
174 false
175 );
176 }
177
187 template <class R = double, class E1>
188 inline auto histogram(E1&& data, std::size_t bins = 10, bool density = false)
189 {
190 using value_type = typename std::decay_t<E1>::value_type;
191
192 auto n = data.size();
193
194 return detail::histogram_imp<R>(
195 std::forward<E1>(data),
198 density,
199 true
200 );
201 }
202
214 template <class R = double, class E1, class E2>
215 inline auto histogram(E1&& data, std::size_t bins, E2 left, E2 right, bool density = false)
216 {
217 using value_type = typename std::decay_t<E1>::value_type;
218
219 auto n = data.size();
220
221 return detail::histogram_imp<R>(
222 std::forward<E1>(data),
225 density,
226 true
227 );
228 }
229
240 template <class R = double, class E1, class E2>
241 inline auto histogram(E1&& data, std::size_t bins, E2&& weights, bool density = false)
242 {
243 return detail::histogram_imp<R>(
244 std::forward<E1>(data),
246 std::forward<E2>(weights),
247 density,
248 true
249 );
250 }
251
264 template <class R = double, class E1, class E2, class E3>
265 inline auto histogram(E1&& data, std::size_t bins, E2&& weights, E3 left, E3 right, bool density = false)
266 {
267 return detail::histogram_imp<R>(
268 std::forward<E1>(data),
270 std::forward<E2>(weights),
271 density,
272 true
273 );
274 }
275
281 {
282 automatic,
283 linspace,
284 logspace,
285 uniform
286 };
287
300 template <class E1, class E2, class E3>
302 E1&& data,
303 E2&& weights,
304 E3 left,
305 E3 right,
306 std::size_t bins = 10,
307 histogram_algorithm mode = histogram_algorithm::automatic
308 )
309 {
310 // counter and return type
311 using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>>;
312 using value_type = typename std::decay_t<E1>::value_type;
313 using weights_type = typename std::decay_t<E2>::value_type;
314
315 // basic checks
316 // - rank
317 XTENSOR_ASSERT(data.dimension() == 1);
318 XTENSOR_ASSERT(weights.dimension() == 1);
319 // - size
320 XTENSOR_ASSERT(weights.size() == data.size());
321 // - bounds
322 XTENSOR_ASSERT(left <= right);
323 // - non-empty
324 XTENSOR_ASSERT(bins > std::size_t(0));
325
326 // act on different modes
327 switch (mode)
328 {
329 // bins of equal width
330 case histogram_algorithm::automatic:
331 {
333 return bin_edges;
334 }
335
336 // bins of equal width
337 case histogram_algorithm::linspace:
338 {
340 return bin_edges;
341 }
342
343 // bins of logarithmically increasing size
344 case histogram_algorithm::logspace:
345 {
346 using rhs_value_type = std::conditional_t<xtl::is_integral<value_type>::value, double, value_type>;
347
349 xt::logspace<rhs_value_type>(std::log10(left), std::log10(right), bins + 1)
350 );
351
352 // TODO: replace above with below after 'xsimd' fix
353 // xt::xtensor<value_type,1> bin_edges = xt::logspace<value_type>(
354 // std::log10(left), std::log10(right), bins+1);
355
356 return bin_edges;
357 }
358
359 // same amount of data in each bin
360 case histogram_algorithm::uniform:
361 {
362 // indices that sort "data"
363 auto sorter = xt::argsort(data);
364
365 // histogram: all of equal 'height'
366 // - height
368 // - apply to all bins
370
371 // take cumulative sum, to act as easy look-up
372 count = xt::cumsum(count);
373
374 // edges
375 // - allocate
376 std::vector<size_t> shape = {bins + 1};
378 // - first/last edge
379 bin_edges[0] = left;
381 // - cumulative weight
382 weights_type cum_weight = static_cast<weights_type>(0);
383 // - current bin
384 size_type ibin = 0;
385 // - loop to find interior bin-edges
386 for (size_type i = 0; i < weights.size(); ++i)
387 {
388 if (cum_weight >= count[ibin])
389 {
390 bin_edges[ibin + 1] = data[sorter[i]];
391 ++ibin;
392 }
394 }
395 return bin_edges;
396 }
397
398 // bins of equal width
399 default:
400 {
402 return bin_edges;
403 }
404 }
405 }
406
417 template <class E1, class E2>
419 E1&& data,
420 E2&& weights,
421 std::size_t bins = 10,
422 histogram_algorithm mode = histogram_algorithm::automatic
423 )
424 {
425 using value_type = typename std::decay_t<E1>::value_type;
426 std::array<value_type, 2> left_right;
427 left_right = xt::minmax(data)();
428
429 return histogram_bin_edges(
430 std::forward<E1>(data),
431 std::forward<E2>(weights),
432 left_right[0],
433 left_right[1],
434 bins,
435 mode
436 );
437 }
438
448 template <class E1>
449 inline auto
450 histogram_bin_edges(E1&& data, std::size_t bins = 10, histogram_algorithm mode = histogram_algorithm::automatic)
451 {
452 using value_type = typename std::decay_t<E1>::value_type;
453
454 auto n = data.size();
455 std::array<value_type, 2> left_right;
456 left_right = xt::minmax(data)();
457
458 return histogram_bin_edges(
459 std::forward<E1>(data),
461 left_right[0],
462 left_right[1],
463 bins,
464 mode
465 );
466 }
467
479 template <class E1, class E2>
481 E1&& data,
482 E2 left,
483 E2 right,
484 std::size_t bins = 10,
485 histogram_algorithm mode = histogram_algorithm::automatic
486 )
487 {
488 using value_type = typename std::decay_t<E1>::value_type;
489
490 auto n = data.size();
491
492 return histogram_bin_edges(std::forward<E1>(data), xt::ones<value_type>({n}), left, right, bins, mode);
493 }
494
512 template <class E1, class E2, XTL_REQUIRES(is_xexpression<std::decay_t<E2>>)>
513 inline auto bincount(E1&& data, E2&& weights, std::size_t minlength = 0)
514 {
515 using result_value_type = typename std::decay_t<E2>::value_type;
516 using input_value_type = typename std::decay_t<E1>::value_type;
517 using size_type = typename std::decay_t<E1>::size_type;
518
519 static_assert(
520 xtl::is_integral<typename std::decay_t<E1>::value_type>::value,
521 "Bincount data has to be integral type."
522 );
523 XTENSOR_ASSERT(data.dimension() == 1);
524 XTENSOR_ASSERT(weights.dimension() == 1);
525
526 std::array<input_value_type, 2> left_right;
527 left_right = xt::minmax(data)();
528
529 if (left_right[0] < input_value_type(0))
530 {
531 XTENSOR_THROW(std::runtime_error, "Data argument for bincount can only contain positive integers!");
532 }
533
535 {(std::max)(minlength, std::size_t(left_right[1] + 1))}
536 );
537
538 for (size_type i = 0; i < data.size(); ++i)
539 {
540 res(data(i)) += weights(i);
541 }
542
543 return res;
544 }
545
546 template <class E1>
547 inline auto bincount(E1&& data, std::size_t minlength = 0)
548 {
549 return bincount(
550 std::forward<E1>(data),
551 xt::ones<typename std::decay_t<E1>::value_type>(data.shape()),
553 );
554 }
555
565 template <class E>
567 {
568 if (weights.size() <= std::size_t(1))
569 {
571 return n;
572 }
573
574#ifdef XTENSOR_ENABLE_ASSERT
575 using value_type = typename std::decay_t<E>::value_type;
576
577 XTENSOR_ASSERT(xt::all(weights >= static_cast<value_type>(0)));
578 XTENSOR_ASSERT(xt::sum(weights)() > static_cast<value_type>(0));
579#endif
580
581 xt::xtensor<double, 1> P = xt::cast<double>(weights) / static_cast<double>(xt::sum(weights)());
582 xt::xtensor<size_t, 1> n = xt::ceil(static_cast<double>(N) * P);
583
584 if (xt::sum(n)() == N)
585 {
586 return n;
587 }
588
590 xt::xtensor<size_t, 1> sorter = xt::argsort(P);
591 sorter = xt::view(sorter, xt::range(P.size(), _, -1));
592 sorter = xt::view(sorter, xt::range(0, xt::sum(n)(0) - N));
593 xt::view(d, xt::keep(sorter)) = 1;
594 n -= d;
595
596 return n;
597 }
598
608 inline xt::xtensor<size_t, 1> bin_items(size_t N, size_t bins)
609 {
610 return bin_items(N, xt::ones<double>({bins}));
611 }
612}
613
614#endif
auto cumsum(E &&e, std::ptrdiff_t axis)
Cumulative sum.
Definition xmath.hpp:2286
auto amax(E &&e, X &&axes, EVS es=EVS())
Maximum element along given axis.
Definition xmath.hpp:782
auto amin(E &&e, X &&axes, EVS es=EVS())
Minimum element along given axis.
Definition xmath.hpp:800
auto ceil(E &&e) noexcept -> detail::xfunction_type_t< math::ceil_fun, E >
ceil function.
Definition xmath.hpp:1577
auto sum(E &&e, X &&axes, EVS es=EVS())
Sum of elements over given axes.
Definition xmath.hpp:1841
auto minmax(E &&e, EVS es=EVS())
Minimum and maximum among the elements of an array or expression.
Definition xmath.hpp:2234
auto norm(E &&e) noexcept
Calculates the squared magnitude elementwise for the complex numbers in e.
Definition xcomplex.hpp:257
standard mathematical functions for xexpressions
auto range(A start_val, B stop_val)
Select a range from start_val to stop_val (excluded).
Definition xslice.hpp:818
auto all() noexcept
Returns a slice representing a full dimension, to be used as an argument of view function.
Definition xslice.hpp:234
auto ones(S shape) noexcept
Returns an xexpression containing ones of the specified shape.
Definition xbuilder.hpp:46
auto histogram_bin_edges(E1 &&data, E2 &&weights, E3 left, E3 right, std::size_t bins=10, histogram_algorithm mode=histogram_algorithm::automatic)
Compute the bin-edges of a histogram of a set of data using different algorithms.
xt::xtensor< size_t, 1 > bin_items(size_t N, E &&weights)
Get the number of items in each bin, given the fraction of items per bin.
auto searchsorted(E1 &&a, E2 &&v, bool right=true)
Find indices where elements should be inserted to maintain order.
auto digitize(E1 &&data, E2 &&bin_edges, bool right=false)
Return the indices of the bins to which each value in input array belongs.
auto linspace(T start, T stop, std::size_t num_samples=50, bool endpoint=true) noexcept
Generates num_samples evenly spaced numbers over given interval.
Definition xbuilder.hpp:460
auto histogram(E1 &&data, E2 &&bin_edges, E3 &&weights, bool density=false)
Compute the histogram of a set of data.
detail::disable_integral_keep< T > keep(T &&indices)
Create a non-contigous slice from a container of indices to keep.
Definition xslice.hpp:405
histogram_algorithm
Defines different algorithms to be used in "histogram_bin_edges".
auto logspace(T start, T stop, std::size_t num_samples, T base=10, bool endpoint=true) noexcept
Generates num_samples numbers evenly spaced on a log scale over given interval.
Definition xbuilder.hpp:481
auto bincount(E1 &&data, E2 &&weights, std::size_t minlength=0)
Count number of occurrences of each value in array of non-negative ints.
auto view(E &&e, S &&... slices)
Constructs and returns a view on the specified xexpression.
Definition xview.hpp:1834