Project
Loading...
Searching...
No Matches
IndexBuilderHelpers.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15#include <arrow/compute/api_aggregate.h>
16#include <arrow/compute/kernel.h>
17#include <arrow/status.h>
18#include <arrow/table.h>
19#include <arrow/util/key_value_metadata.h>
20
21namespace o2::framework
22{
24{
25 throw runtime_error("Cannot build an array");
26}
27
28ChunkedArrayIterator::ChunkedArrayIterator(std::shared_ptr<arrow::ChunkedArray> source)
29 : mSource{source}
30{
32 mCurrent = reinterpret_cast<int const*>(mCurrentArray->values()->data()) + mOffset;
33 mLast = mCurrent + mCurrentArray->length();
34}
35
36SelfIndexColumnBuilder::SelfIndexColumnBuilder(const char* name, arrow::MemoryPool* pool)
37 : mColumnName{name},
38 mArrowType{arrow::int32()}
39{
40 auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder);
41 if (!status.ok()) {
42 throw runtime_error("Cannot create array builder!");
43 }
44}
45
46std::shared_ptr<arrow::Field> SelfIndexColumnBuilder::field() const
47{
48 return std::make_shared<arrow::Field>(mColumnName, mArrowType);
49}
50
51IndexColumnBuilder::IndexColumnBuilder(std::shared_ptr<arrow::ChunkedArray> source, const char* name, int listSize, arrow::MemoryPool* pool)
54 mListSize{listSize},
55 mSourceSize{(size_t)source->length()}
56{
57 switch (mListSize) {
58 case 1: {
59 mValueBuilder = mBuilder.get();
60 mArrowType = arrow::int32();
61 }; break;
62 case 2: {
63 if (preSlice().ok()) {
64 mListBuilder = std::make_unique<arrow::FixedSizeListBuilder>(pool, std::move(mBuilder), mListSize);
65 mValueBuilder = static_cast<arrow::FixedSizeListBuilder*>(mListBuilder.get())->value_builder();
66 mArrowType = arrow::fixed_size_list(arrow::int32(), 2);
67 } else {
68 throw runtime_error("Cannot pre-slice an array");
69 }
70 }; break;
71 case -1: {
72 if (preFind().ok()) {
73 mListBuilder = std::make_unique<arrow::ListBuilder>(pool, std::move(mBuilder));
74 mValueBuilder = static_cast<arrow::ListBuilder*>(mListBuilder.get())->value_builder();
75 mArrowType = arrow::list(arrow::int32());
76 } else {
77 throw runtime_error("Cannot pre-find array groups");
78 }
79 }; break;
80 default:
81 throw runtime_error_f("Invalid list size for index column: %d", mListSize);
82 }
83}
84
85arrow::Status IndexColumnBuilder::preSlice()
86{
87 arrow::Datum value_counts;
88 auto options = arrow::compute::ScalarAggregateOptions::Defaults();
89 ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options));
90 auto pair = static_cast<arrow::StructArray>(value_counts.array());
91 mValuesArrow = std::make_shared<arrow::NumericArray<arrow::Int32Type>>(pair.field(0)->data());
92 mCounts = std::make_shared<arrow::NumericArray<arrow::Int64Type>>(pair.field(1)->data());
93 return arrow::Status::OK();
94}
95
96arrow::Status IndexColumnBuilder::preFind()
97{
98 arrow::Datum max;
99 auto options = arrow::compute::ScalarAggregateOptions::Defaults();
100 ARROW_ASSIGN_OR_RAISE(max, arrow::compute::CallFunction("max", {mSource}, &options));
101 auto maxValue = std::dynamic_pointer_cast<arrow::Int32Scalar>(max.scalar())->value;
102 mIndices.resize(maxValue + 1);
103
104 auto row = 0;
105 for (auto i = 0; i < mSource->length(); ++i) {
106 auto v = valueAt(i);
107 if (v >= 0) {
108 mValues.emplace_back(v);
109 mIndices[v].push_back(row);
110 }
111 ++row;
112 }
113 std::sort(mValues.begin(), mValues.end());
114
115 return arrow::Status::OK();
116}
117
118std::shared_ptr<arrow::ChunkedArray> IndexColumnBuilder::resultSingle() const
119{
120 std::shared_ptr<arrow::Array> array;
121 auto status = static_cast<arrow::Int32Builder*>(mValueBuilder)->Finish(&array);
122 if (!status.ok()) {
123 throw runtime_error("Cannot build an array");
124 }
125 return std::make_shared<arrow::ChunkedArray>(array);
126}
127
128std::shared_ptr<arrow::ChunkedArray> IndexColumnBuilder::resultSlice() const
129{
130 std::shared_ptr<arrow::Array> array;
131 auto status = static_cast<arrow::FixedSizeListBuilder*>(mListBuilder.get())->Finish(&array);
132 if (!status.ok()) {
133 throw runtime_error("Cannot build an array");
134 }
135 return std::make_shared<arrow::ChunkedArray>(array);
136}
137
138std::shared_ptr<arrow::ChunkedArray> IndexColumnBuilder::resultMulti() const
139{
140 std::shared_ptr<arrow::Array> array;
141 auto status = static_cast<arrow::ListBuilder*>(mListBuilder.get())->Finish(&array);
142 if (!status.ok()) {
143 throw runtime_error("Cannot build an array");
144 }
145 return std::make_shared<arrow::ChunkedArray>(array);
146}
147
148bool IndexColumnBuilder::findSingle(int idx)
149{
150 auto count = mSourceSize - mPosition;
151 while (count > 0) {
152 size_t step = count / 2;
153 mPosition += step;
154 if (valueAt(mPosition) <= idx) {
155 count -= step + 1;
156 } else {
157 mPosition -= step;
158 count = step;
159 }
160 }
161
162 if (mPosition < mSourceSize && valueAt(mPosition) < idx) {
163 ++mPosition;
164 }
165
166 return (mPosition < mSourceSize && valueAt(mPosition) == idx);
167}
168
169bool IndexColumnBuilder::findSlice(int idx)
170{
171 auto count = mValuesArrow->length() - mValuePos;
172 while (count > 0) {
173 auto step = count / 2;
174 mValuePos += step;
175 if (mValuesArrow->Value(mValuePos) <= idx) {
176 count -= step + 1;
177 } else {
178 mValuePos -= step;
179 count = step;
180 }
181 }
182
183 if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) <= idx) {
184 ++mPosition;
185 }
186
187 return (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx);
188}
189
190bool IndexColumnBuilder::findMulti(int idx)
191{
192 return (std::find(mValues.begin(), mValues.end(), idx) != mValues.end());
193}
194
195void IndexColumnBuilder::fillSingle(int idx)
196{
197 // entry point
198 if (mPosition < mSourceSize && valueAt(mPosition) == idx) {
199 (void)static_cast<arrow::Int32Builder*>(mValueBuilder)->Append((int)mPosition);
200 } else {
201 (void)static_cast<arrow::Int32Builder*>(mValueBuilder)->Append(-1);
202 }
203}
204
205void IndexColumnBuilder::fillSlice(int idx)
206{
207 int data[2] = {-1, -1};
208 if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx) {
209 for (auto i = 0; i < mValuePos; ++i) {
210 data[0] += mCounts->Value(i);
211 }
212 data[0] += 1;
213 data[1] = data[0] + mCounts->Value(mValuePos) - 1;
214 }
215 (void)static_cast<arrow::FixedSizeListBuilder*>(mListBuilder.get())->AppendValues(1);
216 (void)static_cast<arrow::Int32Builder*>(mValueBuilder)->AppendValues(data, 2);
217}
218
219void IndexColumnBuilder::fillMulti(int idx)
220{
221 (void)static_cast<arrow::ListBuilder*>(mListBuilder.get())->Append();
222 if (std::find(mValues.begin(), mValues.end(), idx) != mValues.end()) {
223 (void)static_cast<arrow::Int32Builder*>(mValueBuilder)->AppendValues(mIndices[idx].data(), mIndices[idx].size());
224 } else {
225 (void)static_cast<arrow::Int32Builder*>(mValueBuilder)->AppendValues(nullptr, 0);
226 }
227}
228
229std::shared_ptr<arrow::Int32Array> ChunkedArrayIterator::getCurrentArray()
230{
231 auto chunk = mSource->chunk(mChunk);
232 mOffset = chunk->offset();
233 return std::static_pointer_cast<arrow::Int32Array>(chunk);
234}
235
237{
238 auto previousArray = getCurrentArray();
239 mFirstIndex += previousArray->length();
240
241 ++mChunk;
242 auto array = getCurrentArray();
243 mCurrent = reinterpret_cast<int const*>(array->values()->data()) + mOffset - mFirstIndex;
244 mLast = mCurrent + array->length() + mFirstIndex;
245}
246
248{
249 auto previousArray = getCurrentArray();
250 mFirstIndex -= previousArray->length();
251
252 --mChunk;
253 auto array = getCurrentArray();
254 mCurrent = reinterpret_cast<int const*>(array->values()->data()) + mOffset - mFirstIndex;
255 mLast = mCurrent + array->length() + mFirstIndex;
256}
257
259{
260 while (O2_BUILTIN_UNLIKELY(mCurrent + pos >= mLast)) {
261 nextChunk();
262 }
264 prevChunk();
265 }
266 return *(mCurrent + pos);
267}
268
269std::shared_ptr<arrow::Table> makeArrowTable(const char* label, std::vector<std::shared_ptr<arrow::ChunkedArray>>&& columns, std::vector<std::shared_ptr<arrow::Field>>&& fields)
270{
271 auto schema = std::make_shared<arrow::Schema>(fields);
272 schema->WithMetadata(
273 std::make_shared<arrow::KeyValueMetadata>(
274 std::vector{std::string{"label"}},
275 std::vector{std::string{label}}));
276 return arrow::Table::Make(schema, columns);
277}
278} // namespace o2::framework
#define O2_BUILTIN_UNLIKELY(x)
int32_t i
uint16_t pos
Definition RawData.h:3
IndexColumnBuilder(std::shared_ptr< arrow::ChunkedArray > source, const char *name, int listSize, arrow::MemoryPool *pool)
GLint GLsizei count
Definition glcorearb.h:399
const GLdouble * v
Definition glcorearb.h:832
GLenum array
Definition glcorearb.h:4274
GLuint const GLchar * name
Definition glcorearb.h:781
GLsizei GLsizei GLchar * source
Definition glcorearb.h:798
GLboolean * data
Definition glcorearb.h:298
GLuint GLsizei GLsizei * length
Definition glcorearb.h:790
GLuint GLsizei const GLchar * label
Definition glcorearb.h:2519
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
RuntimeErrorRef runtime_error(const char *)
RuntimeErrorRef runtime_error_f(const char *,...)
std::shared_ptr< arrow::Table > makeArrowTable(const char *label, std::vector< std::shared_ptr< arrow::ChunkedArray > > &&columns, std::vector< std::shared_ptr< arrow::Field > > &&fields)
value_T step
Definition TrackUtils.h:42
std::shared_ptr< arrow::Int32Array > getCurrentArray()
ChunkedArrayIterator(std::shared_ptr< arrow::ChunkedArray > source)
std::shared_ptr< arrow::Int32Array > mCurrentArray
std::shared_ptr< arrow::ChunkedArray > mSource
SelfIndexColumnBuilder(const char *name, arrow::MemoryPool *pool)
std::unique_ptr< arrow::ArrayBuilder > mBuilder
std::shared_ptr< arrow::DataType > mArrowType
std::shared_ptr< arrow::Field > field() const
constexpr size_t max
std::vector< int > row