20 #ifndef _TPIE_BTREE_SERIALIZED_STORE_H_
21 #define _TPIE_BTREE_SERIALIZED_STORE_H_
24 #include <tpie/btree/base.h>
36 #ifdef TPIE_HAS_SNAPPY
57 class serialized_store {
70 typedef size_t size_type;
72 typedef uint64_t off_t;
86 struct internal_content {
90 static_assert(std::is_trivially_copyable<internal_content>::value,
"must be trivially copyable.");
92 static constexpr
size_t block_size() {
return bs_?bs_:24*1024;}
93 static constexpr
size_t min_internal_size() {
return 1;}
94 static constexpr
size_t max_internal_size() {
return a_ ? a_ : (block_size() -
sizeof(off_t) -
sizeof(
size_t)) /
sizeof(internal_content) ; }
95 static constexpr
size_t min_leaf_size() {
return 1;}
96 static constexpr
size_t max_leaf_size() {
return b_ ? b_ : (block_size() -
sizeof(off_t) -
sizeof(
size_t)) /
sizeof(T);}
98 class serilization_buffer {
100 serilization_buffer() =
default;
101 serilization_buffer(
size_t n) : m_buffer(n) {}
103 void read(
char * buf,
size_t size) {
104 assert(m_index + size <= m_buffer.size());
105 memcpy(buf, m_buffer.data() + m_index, size);
109 void write(
const char * buf,
size_t size) {
110 if (m_index + size > m_buffer.size()) {
111 m_buffer.resize(m_index + size);
113 memcpy(m_buffer.data() + m_index, buf, size);
118 return m_buffer.size();
122 return m_buffer.data();
126 std::vector<char> m_buffer;
130 template <
typename S,
typename N>
131 void serialize(S & s,
const N & i)
const {
134 auto compression_type = m_flags & btree_flags::compression_mask;
136 if (compression_type == btree_flags::compress_none) {
137 serialize(s, i.count);
138 serialize(s, i.values, i.values + i.count);
142 serilization_buffer uncompressed_buffer(
sizeof(i.count) +
sizeof(*i.values) * i.count);
143 serialize(uncompressed_buffer, i.count);
144 serialize(uncompressed_buffer, i.values, i.values + i.count);
146 int32_t uncompressed_size = (int32_t)uncompressed_buffer.size();
148 std::vector<char> compressed_buffer;
149 int32_t compressed_size;
151 switch (compression_type) {
153 case btree_flags::compress_lz4: {
154 auto max_compressed_size = LZ4_compressBound(uncompressed_size);
155 compressed_buffer.resize((
size_t)max_compressed_size);
157 compressed_size = LZ4_compress_default(uncompressed_buffer.data(), compressed_buffer.data(),
158 uncompressed_size, max_compressed_size);
159 if (compressed_size == 0)
160 throw io_exception(
"B-tree compression failed");
166 case compress_zstd: {
167 auto max_compressed_size = ZSTD_compressBound((
size_t)uncompressed_size);
168 compressed_buffer.resize(max_compressed_size);
170 int level = (int)((uint64_t)(m_flags & btree_flags::compression_level_mask) >> 8);
171 if (level == 0) level = 5;
173 size_t r = ZSTD_compress(compressed_buffer.data(), max_compressed_size, uncompressed_buffer.data(), uncompressed_size, level);
175 throw io_exception(
"B-tree compression failed");
177 compressed_size = (int32_t)r;
181 #ifdef TPIE_HAS_SNAPPY
182 case compress_snappy: {
183 auto max_compressed_size = snappy::MaxCompressedLength((
size_t) uncompressed_size);
184 compressed_buffer.resize(max_compressed_size);
186 size_t _compressed_size;
187 snappy::RawCompress(uncompressed_buffer.data(), uncompressed_size,
188 compressed_buffer.data(), &_compressed_size);
190 compressed_size = (int32_t) _compressed_size;
195 throw exception(
"Unknown compression, this code shouldn't be reachable");
198 s.write(
reinterpret_cast<char *
>(&uncompressed_size),
sizeof(uncompressed_size));
199 s.write(
reinterpret_cast<char *
>(&compressed_size),
sizeof(compressed_size));
200 s.write(compressed_buffer.data(), compressed_size);
203 template <
typename D,
typename N>
204 void unserialize(D & d, N & i)
const {
207 auto compression_type = m_flags & btree_flags::compression_mask;
209 if (compression_type == btree_flags::compress_none) {
210 unserialize(d, i.count);
211 unserialize(d, i.values, i.values + i.count);
215 int32_t uncompressed_size, compressed_size;
216 d.read(
reinterpret_cast<char *
>(&uncompressed_size),
sizeof(uncompressed_size));
217 d.read(
reinterpret_cast<char *
>(&compressed_size),
sizeof(compressed_size));
219 std::vector<char> compressed_buffer((
size_t)compressed_size);
220 d.read(compressed_buffer.data(), compressed_size);
222 serilization_buffer uncompressed_buffer((
size_t)uncompressed_size);
224 switch (m_flags & btree_flags::compression_mask) {
227 int r = LZ4_decompress_fast(compressed_buffer.data(), uncompressed_buffer.data(), uncompressed_size);
228 if (r != compressed_size)
229 throw io_exception(
"B-tree decompression failed");
234 case compress_zstd: {
235 size_t r = ZSTD_decompress(uncompressed_buffer.data(), (
size_t)uncompressed_size, compressed_buffer.data(), (
size_t)compressed_size);
236 if (ZSTD_isError(r) || (int32_t)r != uncompressed_size)
237 throw io_exception(
"B-tree decompression failed");
241 #ifdef TPIE_HAS_SNAPPY
242 case compress_snappy: {
243 bool ok = snappy::RawUncompress(compressed_buffer.data(), (
size_t)compressed_size, uncompressed_buffer.data());
245 throw io_exception(
"B-tree decompression failed");
250 throw exception(
"Unknown compression, this code shouldn't be reachable");
253 unserialize(uncompressed_buffer, i.count);
254 unserialize(uncompressed_buffer, i.values, i.values + i.count);
260 internal_content values[max_internal_size()];
266 T values[max_leaf_size()];
270 std::shared_ptr<leaf> ptr;
273 leaf_type() =
default;
274 leaf_type(
const leaf_type &) =
default;
275 leaf_type(leaf_type &&) =
default;
277 leaf_type & operator=(
const leaf_type &) =
default;
278 leaf_type & operator=(leaf_type &&) =
default;
280 leaf_type(off_t offset) {
281 ptr = std::make_shared<leaf>();
282 ptr->my_offset = offset;
285 leaf & operator*() const noexcept {
289 leaf * operator->() const noexcept {
290 return ptr.operator->();
293 void reset() noexcept {
297 explicit operator bool() const noexcept {
301 bool operator==(
const leaf_type & o)
const noexcept {
302 return ptr->my_offset == o->my_offset;
305 bool operator!=(
const leaf_type & o)
const noexcept {
316 static constexpr uint64_t good_magic = 0x8bbd51bfe5e3d477, current_version = 1;
322 off_t metadata_offset;
326 struct header : header_v0 {
330 typedef std::shared_ptr<internal> internal_type;
332 void set_flags(btree_flags flags) {
334 switch (flags & btree_flags::compression_mask) {
335 case btree_flags::compress_lz4:
337 throw exception(
"Can't use a LZ4 compressed B-tree without LZ4 installed");
340 case btree_flags::compress_zstd:
341 #ifndef TPIE_HAS_ZSTD
342 throw exception(
"Can't use a ZSTD compressed B-tree without ZSTD installed");
345 case btree_flags::compress_snappy:
346 #ifndef TPIE_HAS_SNAPPY
347 throw exception(
"Can't use a snappy compressed B-tree without snappy installed");
350 case btree_flags::compress_none:
353 throw exception(
"Unknown compression");
362 explicit serialized_store(
const std::string & path, btree_flags flags=btree_flags::defaults):
363 m_height(0), m_size(0), metadata_offset(0), metadata_size(0), path(path) {
364 f.reset(
new std::fstream());
366 if ((flags & btree_flags::read) == 0) {
367 if ((flags & btree_flags::write) == 0)
368 throw invalid_file_exception(
"Either read or write must be supplied to serialized store");
369 f->open(path, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
371 throw invalid_file_exception(
"Open failed");
372 memset(&h, 0,
sizeof(h));
375 f->write(
reinterpret_cast<char *
>(&h),
sizeof(h));
377 f->open(path, std::ios_base::in | std::ios_base::binary);
379 throw invalid_file_exception(
"Open failed");
380 f->read(
reinterpret_cast<char *
>(&h),
sizeof(header_v0));
382 throw invalid_file_exception(
"Unable to read header");
384 if (h.magic != header::good_magic)
385 throw invalid_file_exception(
"Bad magic");
387 if (h.version == 0) {
388 h.flags = btree_flags::defaults_v0;
389 }
else if (h.version == 1) {
390 f->read(
reinterpret_cast<char *
>(&h.flags),
sizeof(h.flags));
392 throw invalid_file_exception(
"Bad version");
397 metadata_offset = h.metadata_offset;
398 metadata_size = h.metadata_size;
403 root_leaf = leaf_type(h.root);
405 unserialize(*f, *root_leaf);
406 }
else if (m_height > 1) {
407 root_internal = std::make_shared<internal>();
408 root_internal->my_offset = h.root;
410 unserialize(*f, *root_internal);
416 void move(internal_type src,
size_t src_i,
417 internal_type dst,
size_t dst_i) {
418 dst->values[dst_i] = src->values[src_i];
421 void move(leaf_type src,
size_t src_i,
422 leaf_type dst,
size_t dst_i) {
423 dst->values[dst_i] = src->values[src_i];
426 void set(leaf_type dst,
size_t dst_i, T c) {
427 assert(dst == current_leaf);
428 dst->values[dst_i] = c;
431 void set(internal_type node,
size_t i, internal_type c) {
432 assert(node == current_internal);
433 node->values[i].offset = c->my_offset;
436 void set(internal_type node,
size_t i, leaf_type c) {
437 assert(node == current_internal);
438 node->values[i].offset = c->my_offset;
441 const T & get(leaf_type l,
size_t i)
const {
445 size_t count(internal_type node)
const {
449 size_t count(leaf_type node)
const {
453 void set_count(internal_type node,
size_t i) {
457 void set_count(leaf_type node,
size_t i) {
461 leaf_type create_leaf() {
462 assert(!current_internal && !current_leaf);
463 current_leaf = leaf_type((off_t)f->tellp());
466 leaf_type create(leaf_type) {
return create_leaf();}
467 internal_type create_internal() {
468 assert(!current_internal && !current_leaf);
469 current_internal = std::make_shared<internal>();
470 current_internal->my_offset = (stream_size_type)f->tellp();
471 return current_internal;
473 internal_type create(internal_type) {
return create_internal();}
475 void set_root(internal_type node) {root_internal = node;}
476 void set_root(leaf_type node) {root_leaf = node;}
478 internal_type get_root_internal()
const {
479 return root_internal;
482 leaf_type get_root_leaf()
const {
486 internal_type get_child_internal(internal_type node,
size_t i)
const {
487 internal_type child = std::make_shared<internal>();
488 assert(i < node->count);
489 child->my_offset = node->values[i].offset;
490 f->seekg(child->my_offset);
491 unserialize(*f, *child);
495 leaf_type get_child_leaf(internal_type node,
size_t i)
const {
496 leaf_type child = leaf_type(node->values[i].offset);
497 assert(i < node->count);
498 f->seekg(child->my_offset);
499 unserialize(*f, *child);
503 size_t index(off_t my_offset, internal_type node)
const {
504 for (
size_t i=0; i < node->count; ++i)
505 if (node->values[i].offset == my_offset)
return i;
510 size_t index(leaf_type l, internal_type node)
const {
511 return index(l->my_offset, node);
514 size_t index(internal_type i, internal_type node)
const {
515 return index(i->my_offset, node);
518 void set_augment(leaf_type l, internal_type p,
augment_type ag) {
519 size_t idx = index(l->my_offset, p);
520 p->values[idx].augment = ag;
523 void set_augment(internal_type i, internal_type p,
augment_type ag) {
524 size_t idx = index(i->my_offset, p);
525 p->values[idx].augment = ag;
528 const augment_type & augment(internal_type p,
size_t i)
const {
529 return p->values[i].augment;
532 size_t height()
const throw() {
536 void set_height(
size_t height)
throw() {
540 size_t size()
const throw() {
544 void set_size(
size_t size)
throw() {
549 if (current_internal) {
550 assert(!current_leaf);
551 assert((stream_size_type)f->tellp() == current_internal->my_offset);
552 serialize(*f, *current_internal);
553 current_internal.reset();
556 assert((stream_size_type)f->tellp() == current_leaf->my_offset);
557 serialize(*f, *current_leaf);
558 current_leaf.reset();
562 void finalize_build() {
564 assert(!current_internal && !current_leaf);
567 h.magic = header::good_magic;
568 h.version = header::current_version;
571 h.root = root_internal->my_offset;
572 }
else if (root_leaf) {
573 h.root = root_leaf->my_offset;
579 h.metadata_offset = metadata_offset;
580 h.metadata_size = metadata_size;
583 f->write(
reinterpret_cast<char *
>(&h),
sizeof(h));
586 f->open(path, std::ios_base::in | std::ios_base::binary);
588 throw invalid_file_exception(
"Open failed");
591 void set_metadata(
const std::string & data) {
592 assert(!current_internal && !current_leaf);
593 assert(f->is_open());
594 metadata_offset = (stream_size_type)f->tellp();
595 metadata_size = data.size();
596 f->write(data.c_str(), data.size());
599 std::string get_metadata() {
600 assert(f->is_open());
601 if (metadata_offset == 0 || metadata_size == 0)
603 std::string data(metadata_size,
'\0');
604 f->read(&data[0], metadata_size);
610 off_t metadata_offset, metadata_size;
614 std::unique_ptr<std::fstream> f;
615 internal_type current_internal, root_internal;
616 leaf_type current_leaf, root_leaf;
619 friend class ::tpie::btree_node;
622 friend class ::tpie::btree_iterator;
624 template <
typename,
typename>
625 friend class bbits::tree;
627 template <
typename,
typename>
628 friend class bbits::tree_state;
630 template<
typename,
typename>
631 friend class bbits::builder;
633 template <
typename,
bool>
634 friend struct bbits::block_size_getter;