52 ALPAKA_FN_ACC
inline constexpr const float* minMax()
const {
return minmax; }
53 ALPAKA_FN_ACC
inline constexpr float* minMax() {
return minmax; }
55 ALPAKA_FN_ACC
inline constexpr const float* tileSize()
const {
return tilesizes; }
56 ALPAKA_FN_ACC
inline constexpr float* tileSize() {
return tilesizes; }
58 ALPAKA_FN_ACC
inline constexpr const uint8_t* wrapped()
const {
return wrapping; }
59 ALPAKA_FN_ACC
inline constexpr uint8_t* wrapped() {
return wrapping; }
61 ALPAKA_FN_ACC
inline constexpr int getBin(
float coord,
int dim)
const {
65 static_cast<int>((normalizeCoordinate(coord, dim) - minmax->min(dim)) / tilesizes[dim]);
67 coord_bin =
static_cast<int>((coord - minmax->min(dim)) / tilesizes[dim]);
71 coord_bin = internal::math::min(coord_bin, nperdim - 1);
72 coord_bin = internal::math::max(coord_bin, 0);
77 ALPAKA_FN_ACC
inline constexpr int getGlobalBin(
const float* coords)
const {
79 for (
int dim = 0; dim != Ndim - 1; ++dim) {
80 global_bin += internal::math::pow(
static_cast<float>(nperdim), Ndim - dim - 1) *
81 getBin(coords[dim], dim);
83 global_bin += getBin(coords[Ndim - 1], Ndim - 1);
87 ALPAKA_FN_ACC
inline constexpr int getGlobalBinByBin(
const VecArray<int32_t, Ndim>& Bins)
const {
88 int32_t globalBin = 0;
89 for (
int dim = 0; dim != Ndim; ++dim) {
90 auto bin_i = wrapping[dim] ? (Bins[dim] % nperdim) : Bins[dim];
91 globalBin += internal::math::pow(
static_cast<float>(nperdim), Ndim - dim - 1) * bin_i;
96 ALPAKA_FN_ACC
inline void searchBox(
const SearchBoxExtremes<Ndim>& searchbox_extremes,
97 SearchBoxBins<Ndim>& searchbox_bins) {
98 for (
int dim{}; dim != Ndim; ++dim) {
99 auto infBin = getBin(searchbox_extremes[dim][0], dim);
100 auto supBin = getBin(searchbox_extremes[dim][1], dim);
101 if (wrapping[dim] and infBin > supBin)
104 searchbox_bins[dim] = nostd::make_array(infBin, supBin);
108 ALPAKA_FN_ACC
inline constexpr clue::Span<int32_t> operator[](int32_t globalBinId) {
109 const auto size = offsets[globalBinId + 1] - offsets[globalBinId];
110 const auto offset = offsets[globalBinId];
111 int32_t* buf_ptr = indexes + offset;
112 return clue::Span<int32_t>{buf_ptr, size};
115 ALPAKA_FN_HOST_ACC
inline constexpr float normalizeCoordinate(
float coord,
int dim)
const {
116 const float range = minmax->range(dim);
117 float remainder = coord -
static_cast<int>(coord / range) * range;
118 if (remainder >= minmax->max(dim))
120 else if (remainder < minmax->min(dim))
125 ALPAKA_FN_ACC
inline float distance(
const std::array<float, Ndim>& coord_i,
126 const std::array<float, Ndim>& coord_j) {
128 for (
int dim = 0; dim != Ndim; ++dim) {
130 dist_sq += normalizeCoordinate(coord_i[dim] - coord_j[dim], dim) *
131 normalizeCoordinate(coord_i[dim] - coord_j[dim], dim);
133 dist_sq += (coord_i[dim] - coord_j[dim]) * (coord_i[dim] - coord_j[dim]);
142 template <concepts::queue TQueue>
143 TilesAlpaka(TQueue& queue, int32_t n_points, int32_t n_tiles)
145 m_minmax{clue::make_device_buffer<CoordinateExtremes<Ndim>>(queue)},
146 m_tilesizes{clue::make_device_buffer<float[Ndim]>(queue)},
147 m_wrapped{clue::make_device_buffer<uint8_t[Ndim]>(queue)},
149 m_nperdim{
static_cast<int32_t
>(std::pow(n_tiles, 1.f / Ndim))},
150 m_view{clue::make_device_buffer<TilesAlpakaView<Ndim>>(queue)} {
151 auto host_view = clue::make_host_buffer<TilesAlpakaView<Ndim>>(queue);
152 host_view->indexes = m_assoc.indexes().data();
153 host_view->offsets = m_assoc.offsets().data();
154 host_view->minmax = m_minmax.data();
155 host_view->tilesizes = m_tilesizes.data();
156 host_view->wrapping = m_wrapped.data();
157 host_view->npoints = n_points;
158 host_view->ntiles = m_ntiles;
159 host_view->nperdim = m_nperdim;
161 alpaka::memcpy(queue, m_view, host_view);
166 template <concepts::queue TQueue>
167 ALPAKA_FN_HOST
void initialize(int32_t npoints, int32_t ntiles, int32_t nperdim, TQueue& queue) {
168 m_assoc.initialize(npoints, ntiles, queue);
171 auto host_view = clue::make_host_buffer<TilesAlpakaView<Ndim>>(queue);
172 host_view->indexes = m_assoc.indexes().data();
173 host_view->offsets = m_assoc.offsets().data();
174 host_view->minmax = m_minmax.data();
175 host_view->tilesizes = m_tilesizes.data();
176 host_view->wrapping = m_wrapped.data();
177 host_view->npoints = npoints;
178 host_view->ntiles = ntiles;
179 host_view->nperdim = nperdim;
181 alpaka::memcpy(queue, m_view, host_view);
184 template <concepts::queue TQueue>
185 ALPAKA_FN_HOST
void reset(int32_t npoints, int32_t ntiles, int32_t nperdim, TQueue& queue) {
186 m_assoc.reset(queue, npoints, ntiles);
190 auto host_view = clue::make_host_buffer<TilesAlpakaView<Ndim>>(queue);
191 host_view->indexes = m_assoc.indexes().data();
192 host_view->offsets = m_assoc.offsets().data();
193 host_view->minmax = m_minmax.data();
194 host_view->tilesizes = m_tilesizes.data();
195 host_view->wrapping = m_wrapped.data();
196 host_view->npoints = npoints;
197 host_view->ntiles = ntiles;
198 host_view->nperdim = nperdim;
199 alpaka::memcpy(queue, m_view, host_view);
203 PointsView* pointsView;
206 ALPAKA_FN_ACC int32_t operator()(int32_t index)
const {
208 for (
auto dim = 0; dim < Ndim; ++dim) {
209 coords[dim] = pointsView->coords[index + dim * pointsView->n];
212 auto bin = tilesView->getGlobalBin(coords);
217 template <concepts::accelerator TAcc, concepts::queue TQueue>
219 auto dev = alpaka::getDev(queue);
220 auto pointsView = d_points.view();
221 m_assoc.template fill<TAcc>(size,
GetGlobalBin{pointsView, m_view.data()}, queue);
224 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, CoordinateExtremes<Ndim>> minMax()
const {
227 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, float[Ndim]> tileSize()
const {
230 ALPAKA_FN_HOST
inline clue::device_buffer<TDev, uint8_t[Ndim]> wrapped()
const {
234 ALPAKA_FN_HOST
inline constexpr auto size()
const {
return m_ntiles; }
236 ALPAKA_FN_HOST
inline constexpr auto nPerDim()
const {
return m_nperdim; }
238 template <concepts::queue TQueue>
239 ALPAKA_FN_HOST
inline constexpr void clear(
const TQueue& queue) {}
241 ALPAKA_FN_HOST
const clue::device_buffer<TDev, int32_t[]>& indexes()
const {
242 return m_assoc.indexes();
244 ALPAKA_FN_HOST clue::device_buffer<TDev, int32_t[]>& indexes() {
return m_assoc.indexes(); }
245 ALPAKA_FN_HOST
const clue::device_buffer<TDev, int32_t[]>& offsets()
const {
246 return m_assoc.offsets();
248 ALPAKA_FN_HOST clue::device_buffer<TDev, int32_t[]>& offsets() {
return m_assoc.offsets(); }
250 ALPAKA_FN_HOST clue::device_view<TDev, int32_t[]> indexes(
const TDev& dev,
size_t assoc_id) {
251 return m_assoc.indexes(dev, assoc_id);
255 clue::AssociationMap<TDev> m_assoc;
256 clue::device_buffer<TDev, CoordinateExtremes<Ndim>> m_minmax;
257 clue::device_buffer<TDev, float[Ndim]> m_tilesizes;
258 clue::device_buffer<TDev, uint8_t[Ndim]> m_wrapped;
261 clue::device_buffer<TDev, TilesAlpakaView<Ndim>> m_view;