50 ALPAKA_FN_ACC
inline constexpr const float* minMax()
const {
return minmax; }
51 ALPAKA_FN_ACC
inline constexpr float* minMax() {
return minmax; }
53 ALPAKA_FN_ACC
inline constexpr const float* tileSize()
const {
return tilesizes; }
54 ALPAKA_FN_ACC
inline constexpr float* tileSize() {
return tilesizes; }
56 ALPAKA_FN_ACC
inline constexpr const uint8_t* wrapped()
const {
return wrapping; }
57 ALPAKA_FN_ACC
inline constexpr uint8_t* wrapped() {
return wrapping; }
59 ALPAKA_FN_ACC
inline constexpr int getBin(
float coord,
int dim)
const {
63 static_cast<int>((normalizeCoordinate(coord, dim) - minmax->min(dim)) / tilesizes[dim]);
65 coord_bin =
static_cast<int>((coord - minmax->min(dim)) / tilesizes[dim]);
69 coord_bin = internal::math::min(coord_bin, nperdim - 1);
70 coord_bin = internal::math::max(coord_bin, 0);
75 ALPAKA_FN_ACC
inline constexpr int getGlobalBin(
const float* coords)
const {
77 for (
int dim = 0; dim != Ndim - 1; ++dim) {
78 global_bin += internal::math::pow(
static_cast<float>(nperdim), Ndim - dim - 1) *
79 getBin(coords[dim], dim);
81 global_bin += getBin(coords[Ndim - 1], Ndim - 1);
85 ALPAKA_FN_ACC
inline constexpr int getGlobalBinByBin(
const VecArray<int32_t, Ndim>& Bins)
const {
86 int32_t globalBin = 0;
87 for (
int dim = 0; dim != Ndim; ++dim) {
88 auto bin_i = wrapping[dim] ? (Bins[dim] % nperdim) : Bins[dim];
89 globalBin += internal::math::pow(
static_cast<float>(nperdim), Ndim - dim - 1) * bin_i;
94 ALPAKA_FN_ACC
inline void searchBox(
const SearchBoxExtremes<Ndim>& searchbox_extremes,
95 SearchBoxBins<Ndim>& searchbox_bins) {
96 for (
int dim{}; dim != Ndim; ++dim) {
97 auto infBin = getBin(searchbox_extremes[dim][0], dim);
98 auto supBin = getBin(searchbox_extremes[dim][1], dim);
99 if (wrapping[dim] and infBin > supBin)
102 searchbox_bins[dim] = nostd::make_array(infBin, supBin);
106 ALPAKA_FN_ACC
inline constexpr clue::Span<int32_t> operator[](int32_t globalBinId) {
107 const auto size = offsets[globalBinId + 1] - offsets[globalBinId];
108 const auto offset = offsets[globalBinId];
109 int32_t* buf_ptr = indexes + offset;
110 return clue::Span<int32_t>{buf_ptr, size};
113 ALPAKA_FN_HOST_ACC
inline constexpr float normalizeCoordinate(
float coord,
int dim)
const {
114 const float range = minmax->range(dim);
115 float remainder = coord -
static_cast<int>(coord / range) * range;
116 if (remainder >= minmax->max(dim))
118 else if (remainder < minmax->min(dim))
123 ALPAKA_FN_ACC
inline float distance(
const std::array<float, Ndim>& coord_i,
124 const std::array<float, Ndim>& coord_j) {
126 for (
int dim = 0; dim != Ndim; ++dim) {
128 dist_sq += normalizeCoordinate(coord_i[dim] - coord_j[dim], dim) *
129 normalizeCoordinate(coord_i[dim] - coord_j[dim], dim);
131 dist_sq += (coord_i[dim] - coord_j[dim]) * (coord_i[dim] - coord_j[dim]);
140 template <concepts::queue TQueue>
141 TilesAlpaka(TQueue& queue, int32_t n_points, int32_t n_tiles)
143 m_minmax{make_device_buffer<CoordinateExtremes<Ndim>>(queue)},
144 m_tilesizes{make_device_buffer<float[Ndim]>(queue)},
145 m_wrapped{make_device_buffer<uint8_t[Ndim]>(queue)},
147 m_nperdim{
static_cast<int32_t
>(std::pow(n_tiles, 1.f / Ndim))},
149 m_view.indexes = m_assoc.indexes().data();
150 m_view.offsets = m_assoc.offsets().data();
151 m_view.minmax = m_minmax.data();
152 m_view.tilesizes = m_tilesizes.data();
153 m_view.wrapping = m_wrapped.data();
154 m_view.npoints = n_points;
155 m_view.ntiles = m_ntiles;
156 m_view.nperdim = m_nperdim;
162 template <concepts::queue TQueue>
163 ALPAKA_FN_HOST
void initialize(int32_t npoints, int32_t ntiles, int32_t nperdim, TQueue& queue) {
164 m_assoc.initialize(npoints, ntiles, queue);
168 m_view.indexes = m_assoc.indexes().data();
169 m_view.offsets = m_assoc.offsets().data();
170 m_view.minmax = m_minmax.data();
171 m_view.tilesizes = m_tilesizes.data();
172 m_view.wrapping = m_wrapped.data();
173 m_view.npoints = npoints;
174 m_view.ntiles = ntiles;
175 m_view.nperdim = nperdim;
178 template <concepts::queue TQueue>
179 ALPAKA_FN_HOST
void reset(int32_t npoints, int32_t ntiles, int32_t nperdim, TQueue& queue) {
180 m_assoc.reset(queue, npoints, ntiles);
184 m_view.indexes = m_assoc.indexes().data();
185 m_view.offsets = m_assoc.offsets().data();
186 m_view.minmax = m_minmax.data();
187 m_view.tilesizes = m_tilesizes.data();
188 m_view.wrapping = m_wrapped.data();
189 m_view.npoints = npoints;
190 m_view.ntiles = ntiles;
191 m_view.nperdim = nperdim;
195 PointsView pointsView;
198 ALPAKA_FN_ACC int32_t operator()(int32_t index)
const {
200 for (
auto dim = 0; dim < Ndim; ++dim) {
201 coords[dim] = pointsView.coords[index + dim * pointsView.n];
204 auto bin = tilesView.getGlobalBin(coords);
209 template <concepts::accelerator TAcc, concepts::queue TQueue>
211 auto dev = alpaka::getDev(queue);
212 auto pointsView = d_points.
view();
213 m_assoc.template fill<TAcc>(size,
GetGlobalBin{pointsView, m_view}, queue);
216 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, CoordinateExtremes<Ndim>> minMax()
const {
219 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, float[Ndim]> tileSize()
const {
222 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, uint8_t[Ndim]> wrapped()
const {
226 ALPAKA_FN_HOST
inline constexpr auto size()
const {
return m_ntiles; }
228 ALPAKA_FN_HOST
inline constexpr auto nPerDim()
const {
return m_nperdim; }
230 ALPAKA_FN_HOST
inline constexpr auto extents()
const {
return m_assoc.extents(); }
232 template <concepts::queue TQueue>
233 ALPAKA_FN_HOST
inline constexpr void clear(
const TQueue& queue) {}
235 ALPAKA_FN_HOST
const clue::device_buffer<TDev, int32_t[]>& indexes()
const {
236 return m_assoc.indexes();
238 ALPAKA_FN_HOST clue::device_buffer<TDev, int32_t[]>& indexes() {
return m_assoc.indexes(); }
239 ALPAKA_FN_HOST
const clue::device_buffer<TDev, int32_t[]>& offsets()
const {
240 return m_assoc.offsets();
242 ALPAKA_FN_HOST clue::device_buffer<TDev, int32_t[]>& offsets() {
return m_assoc.offsets(); }
244 ALPAKA_FN_HOST clue::device_view<TDev, int32_t[]> indexes(
const TDev& dev,
size_t assoc_id) {
245 return m_assoc.indexes(dev, assoc_id);
249 AssociationMap<TDev> m_assoc;
250 device_buffer<TDev, CoordinateExtremes<Ndim>> m_minmax;
251 device_buffer<TDev, float[Ndim]> m_tilesizes;
252 device_buffer<TDev, uint8_t[Ndim]> m_wrapped;
255 TilesAlpakaView<Ndim> m_view;