Project
Loading...
Searching...
No Matches
GPUCommonAlgorithmThrust.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUCOMMONALGORITHMTHRUST_H
16#define GPUCOMMONALGORITHMTHRUST_H
17
18#ifndef GPUCA_GPUCODE_COMPILEKERNELS
19#pragma GCC diagnostic push
20#pragma GCC diagnostic ignored "-Wshadow"
21#include <thrust/sort.h>
22#include <thrust/execution_policy.h>
23#include <thrust/device_ptr.h>
24#pragma GCC diagnostic pop
25
26#include "GPUCommonDef.h"
27#include "GPUCommonHelpers.h"
28
29#ifndef __HIPCC__ // CUDA
30#include <cub/cub.cuh>
31#else // HIP
32#include <hipcub/hipcub.hpp>
33#endif
34#endif // GPUCA_GPUCODE_COMPILEKERNELS
35
36#ifndef __HIPCC__ // CUDA
37#define GPUCA_THRUST_NAMESPACE thrust::cuda
38#define GPUCA_CUB_NAMESPACE cub
39#else // HIP
40#define GPUCA_THRUST_NAMESPACE thrust::hip
41#define GPUCA_CUB_NAMESPACE hipcub
42#endif
43
44namespace o2::gpu
45{
46
47// - Our quicksort and bubble sort implementations are faster
48/*
49template <class T>
50GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end)
51{
52 thrust::device_ptr<T> thrustBegin(begin);
53 thrust::device_ptr<T> thrustEnd(end);
54 thrust::sort(thrust::seq, thrustBegin, thrustEnd);
55}
56
57template <class T, class S>
58GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp)
59{
60 thrust::device_ptr<T> thrustBegin(begin);
61 thrust::device_ptr<T> thrustEnd(end);
62 thrust::sort(thrust::seq, thrustBegin, thrustEnd, comp);
63}
64
65template <class T>
66GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) // TODO: Try cub::BlockMergeSort
67{
68 if (get_local_id(0) == 0) {
69 sortDeviceDynamic(begin, end);
70 }
71}
72
73template <class T, class S>
74GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp)
75{
76 if (get_local_id(0) == 0) {
77 sortDeviceDynamic(begin, end, comp);
78 }
79}
80
81*/
82
83template <class T>
84GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end)
85{
86 thrust::device_ptr<T> thrustBegin(begin);
87 thrust::device_ptr<T> thrustEnd(end);
88 thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd);
89}
90
91template <class T, class S>
92GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& comp)
93{
94 thrust::device_ptr<T> thrustBegin(begin);
95 thrust::device_ptr<T> thrustEnd(end);
96 thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp);
97}
98
99#ifndef GPUCA_GPUCODE_COMPILEKERNELS
100template <class T, class S>
101GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp)
102{
103 thrust::device_ptr<T> p(begin);
104#if 0 // Use Thrust
106 thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp);
107#else // Use CUB
108 size_t tempSize = 0;
109 void* tempMem = nullptr;
110 GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream]));
111 tempMem = rec->AllocateVolatileDeviceMemory(tempSize);
112 GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream]));
113#endif
114}
115#endif // #ifndef GPUCA_GPUCODE_COMPILEKERNELS
116
117} // namespace o2::gpu
118
119#undef GPUCA_THRUST_NAMESPACE
120#undef GPUCA_CUB_NAMESPACE
121
122#endif
#define GPUChkErrS(x)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
void * AllocateVolatileDeviceMemory(size_t size)
GLuint GLuint end
Definition glcorearb.h:469
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLuint GLuint stream
Definition glcorearb.h:1806
GPUhi() void GPUCommonAlgorithm
GPUdi() o2
Definition TrackTRD.h:38
GPUReconstruction * rec