TPIE

11a2c2d
btree_builder.h
1 // -*- mode: c++; tab-width: 4; indent-tabs-mode: t; eval: (progn (c-set-style "stroustrup") (c-set-offset 'innamespace 0)); -*-
2 // vi:set ts=4 sts=4 sw=4 noet :
3 // Copyright 2015 The TPIE development team
4 //
5 // This file is part of TPIE.
6 //
7 // TPIE is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License as published by the
9 // Free Software Foundation, either version 3 of the License, or (at your
10 // option) any later version.
11 //
12 // TPIE is distributed in the hope that it will be useful, but WITHOUT ANY
13 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with TPIE. If not, see <http://www.gnu.org/licenses/>
19 
20 #ifndef _TPIE_BTREE_BTREE_BUILDER_H_
21 #define _TPIE_BTREE_BTREE_BUILDER_H_
22 
23 #include <tpie/portability.h>
24 #include <tpie/btree/base.h>
25 #include <tpie/btree/node.h>
26 #include <tpie/memory.h>
27 
28 #include <cstddef>
29 #include <deque>
30 #include <vector>
31 
32 namespace tpie {
33 namespace bbits {
34 
35 template <typename T, bool b>
36 struct block_size_getter {
37  static constexpr size_t v() {return 0;}
38 };
39 
40 template <typename T>
41 struct block_size_getter<T, true> {
42  static constexpr size_t v() {return T::block_size();}
43 };
44 
45 template <typename T, typename O>
46 class builder {
47 private:
48  typedef bbits::tree<T, O> tree_type;
49  typedef bbits::tree_state<T, O> state_type;
50 
51  typedef typename state_type::key_type key_type;
52 
53  static const bool is_internal = state_type::is_internal;
54 
55  static const bool is_static = state_type::is_static;
56 
57  static const bool is_serialized = state_type::is_serialized;
58 
59  static_assert(!is_serialized || is_static, "The builder currently only supports static serialized trees");
60 
61  typedef T value_type;
62 
63  typedef typename O::C comp_type;
64 
65  typedef typename O::A augmenter_type;
66 
67  typedef typename tree_type::augment_type augment_type;
68 
69  typedef typename tree_type::size_type size_type;
70 
71  typedef typename state_type::combined_augment combined_augment;
72 
73  typedef typename state_type::store_type store_type;
74 
75  typedef typename state_type::store_type S;
76 
77  typedef typename S::leaf_type leaf_type;
78 
79  typedef typename S::internal_type internal_type;
80 
81  typedef btree_node<state_type> node_type;
82 
83  // Keeps the same information that the parent of a leaf keeps
84  struct leaf_summary {
85  leaf_type leaf;
86  combined_augment augment;
87  };
88 
89  // Keeps the same information that the parent of a node keeps
90  struct internal_summary {
91  internal_type internal;
92  combined_augment augment;
93  };
94 
95  // Construct a leaf from m
96  void construct_leaf(size_t size) {
97  tp_assert(size != 0, "we should not construct an empty leaf");
98  tp_assert(size <= m_items.size(), "we should not construct a leaf with more items then we have");
99  tp_assert(size <= S::max_leaf_size(), "we should not construct a leaf with more items then the max leaf size");
100  leaf_summary leaf;
101  leaf.leaf = m_state.store().create_leaf();
102 
103  m_state.store().set_count(leaf.leaf, size);
104 
105  for(size_t i = 0; i < size; ++i) {
106  m_state.store().set(leaf.leaf, i, m_items.front());
107  m_items.pop_front();
108  }
109 
110  leaf.augment = m_state.m_augmenter(node_type(&m_state, leaf.leaf));
111  m_state.store().flush();
112  m_leaves.push_back(leaf);
113 
114  if (is_serialized) {
115  tp_assert(m_items.empty(), "we should only construct complete leafs when serializing");
116  m_serialized_size = 0;
117  }
118  }
119 
120  void construct_internal_from_leaves(size_t size) {
121  internal_summary internal;
122  internal.internal = m_state.store().create_internal();
123 
124  m_state.store().set_count(internal.internal, size);
125 
126  for(size_t i = 0; i < size; ++i) {
127  leaf_summary child = m_leaves.front();
128  m_state.store().set(internal.internal, i, child.leaf);
129  m_state.store().set_augment(child.leaf, internal.internal, child.augment);
130  m_leaves.pop_front();
131  }
132 
133  internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal));
134  m_state.store().flush();
135 
136  // push the internal node to the deque of nodes
137  if(m_internal_nodes.size() < 1) m_internal_nodes.push_back(std::deque<internal_summary>());
138  m_internal_nodes[0].push_back(internal);
139  }
140 
141  void construct_internal_from_internal(size_t size, size_t level) {
142  // take nodes from internal_nodes[level] and construct a new node in internal_nodes[level+1]
143  internal_summary internal;
144  internal.internal = m_state.store().create_internal();
145  m_state.store().set_count(internal.internal, size);
146  for(size_t i = 0; i < size; ++i) {
147  internal_summary child = m_internal_nodes[level].front();
148  m_state.store().set(internal.internal, i, child.internal);
149  m_state.store().set_augment(child.internal, internal.internal, child.augment);
150  m_internal_nodes[level].pop_front();
151  }
152  internal.augment = m_state.m_augmenter(node_type(&m_state, internal.internal));
153  m_state.store().flush();
154 
155  // push the internal node to the deque of nodes
156  if(m_internal_nodes.size() < level+2) m_internal_nodes.push_back(std::deque<internal_summary>());
157  m_internal_nodes[level+1].push_back(internal);
158  }
159 
163  static constexpr size_t desired_leaf_size() noexcept {
164  return is_static
165  ? S::max_leaf_size()
166  : ((S::min_leaf_size() + S::max_leaf_size()) / 2);
167  }
168 
172  static constexpr size_t leaf_tipping_point() noexcept {
173  return desired_leaf_size() + S::min_leaf_size();
174  }
175 
179  static constexpr size_t desired_internal_size() noexcept {
180  return is_static
181  ? S::max_internal_size()
182  : ((S::min_internal_size() + S::max_internal_size()) / 2);
183  }
184 
188  static constexpr size_t internal_tipping_point() noexcept {
189  return desired_internal_size() + S::min_internal_size();
190  }
191 
195  void extract_nodes() {
196  construct_leaf(is_serialized?m_items.size() : desired_leaf_size());
197 
198  if(m_leaves.size() < internal_tipping_point()) return;
199  construct_internal_from_leaves(desired_internal_size());
200 
201  // Traverse the levels of the tree and try to construct internal nodes from other internal nodes
202  for(size_t i = 0; i < m_internal_nodes.size(); ++i) {
203  // if it is not possible to construct a node at this level, it is not possible at higher levels either
204  if(m_internal_nodes[i].size() < internal_tipping_point()) return;
205  // we have enough nodes to construct a new node.
206  construct_internal_from_internal(desired_internal_size(), i);
207  }
208  }
209 public:
213  template <typename X=enab>
214  explicit builder(std::string path, comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(),
215  btree_flags flags=btree_flags::defaults, enable<X, !is_internal> =enab() )
216  : m_state(store_type(path, (flags | btree_flags::write) & ~btree_flags::read ), std::move(augmenter), typename state_type::keyextract_type())
217  , m_comp(comp)
218  , m_serialized_size(0)
219  , m_size(0)
220  {}
221 
222  template <typename X=enab>
223  explicit builder(comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(),
224  memory_bucket_ref bucket=memory_bucket_ref(), enable<X, is_internal> =enab() )
225  : m_state(store_type(bucket), std::move(augmenter), typename state_type::keyextract_type())
226  , m_comp(comp)
227  , m_serialized_size(0)
228  , m_size(0)
229  {}
230 
231 
236  void push(value_type v) {
237  ++m_size;
238  if (is_serialized) {
239  size_t s = serialized_size(v);
240  if (m_items.size() == S::max_leaf_size() ||
241  (s + m_serialized_size > block_size_getter<S, is_serialized>::v() && m_serialized_size))
242  extract_nodes();
243  m_items.push_back(v);
244  m_serialized_size += s;
245  } else {
246  m_items.push_back(v);
247 
248  // try to construct nodes from items if possible
249  if(m_items.size() < leaf_tipping_point()) return;
250  extract_nodes();
251  }
252  }
253 
257  tree_type build(const std::string & metadata = std::string()) {
258  m_state.store().set_size(m_size);
259 
260  // finish building the tree by traversing all levels and constructing leaves/nodes
261  // construct one or two leaves if neccesary
262  if(m_items.size() > 0) {
263  if(!is_serialized && m_items.size() > S::max_leaf_size()) // construct two leaves if necessary
264  construct_leaf(m_items.size()/2);
265  construct_leaf(m_items.size()); // construct a leaf with the remaining items
266  }
267 
268  // if there already exists internal nodes and there are leaves left: construct a new internal node(since there is guaranteed to be atleast S::min_internal_size leaves)
269  // if there do not exist internal nodes, then only construct an internal node if there is more than one leaf
270  if((m_internal_nodes.size() == 0 && m_leaves.size() > 1) || (m_internal_nodes.size() > 0 && m_leaves.size() > 0)) {
271  if(m_leaves.size() > 2*S::max_internal_size() ) // construct two nodes if necessary
272  construct_internal_from_leaves(m_leaves.size()/3);
273  if(m_leaves.size() > S::max_internal_size()) // construct two nodes if necessary
274  construct_internal_from_leaves(m_leaves.size()/2);
275  construct_internal_from_leaves(m_leaves.size()); // construct a node from the remaining leaves
276  }
277 
278  for(size_t i = 0; i < m_internal_nodes.size(); ++i) {
279  // if there already exists internal nodes at a higher level and there are nodes at this level left: construct a new internal node at the higher level.
280  // if there do not exist internal nodes at a higher level, then only construct an internal nodes at that level if there are more than one node at this level.
281  if((m_internal_nodes.size() == i+1 && m_internal_nodes[i].size() > 1)
282  || (m_internal_nodes.size() > i+1 && m_internal_nodes[i].size() > 0)) {
283  if(m_internal_nodes[i].size() > 2*S::max_internal_size())
284  construct_internal_from_internal(m_internal_nodes[i].size()/3, i);
285  if(m_internal_nodes[i].size() > S::max_internal_size())
286  construct_internal_from_internal(m_internal_nodes[i].size()/2, i);
287  construct_internal_from_internal(m_internal_nodes[i].size(), i);
288  }
289  }
290 
291  // find the root and set it as such
292 
293  if(m_internal_nodes.size() == 0 && m_leaves.size() == 0) // no items were pushed
294  m_state.store().set_height(0);
295  else {
296  m_state.store().set_height(m_internal_nodes.size() + 1);
297  if(m_leaves.size() == 1)
298  m_state.store().set_root(m_leaves.front().leaf);
299  else
300  m_state.store().set_root(m_internal_nodes.back().front().internal);
301  }
302  if (metadata.size()) {
303  m_state.store().flush();
304  m_state.store().set_metadata(metadata);
305  }
306  m_state.store().finalize_build();
307  return tree_type(std::move(m_state), std::move(m_comp));
308  }
309 
310 private:
311  std::deque<value_type> m_items;
312  std::deque<leaf_summary> m_leaves;
313  std::vector<std::deque<internal_summary>> m_internal_nodes;
314 
315  state_type m_state;
316  comp_type m_comp;
317  size_t m_serialized_size;
318  stream_size_type m_size;
319 };
320 
321 } //namespace bbits
322 
323 template <typename T, typename ... Opts>
324 using btree_builder = bbits::builder<T, typename bbits::OptComp<Opts...>::type>;
325 
326 } //namespace tpie
327 #endif /*_TPIE_BTREE_BTREE_BUILDER_H_*/
portability.h
tpie::bbits::tree
Augmented btree.
Definition: base.h:238
tpie::bbits::tree_state
Definition: base.h:274
tpie::bbits::builder::build
tree_type build(const std::string &metadata=std::string())
Constructs and returns a btree from the value that was pushed to the builder.
Definition: btree_builder.h:257
tpie::bbits::tree::size_type
store_type::size_type size_type
Type of the size.
Definition: btree.h:75
tpie::memory_bucket_ref
Class storring a reference to a memory bucket.
Definition: memory.h:334
tpie::bbits::builder::push
void push(value_type v)
Push a value to the builder.
Definition: btree_builder.h:236
tpie::bbits::enab
Definition: base.h:262
tp_assert
#define tp_assert(condition, message)
Definition: tpie_assert.h:65
tpie::flags
Type safe bitflags over an enumeration T.
Definition: flags.h:73
tpie::bbits::block_size_getter
Definition: base.h:251
tpie::serialized_size
size_t serialized_size(const T &v)
Given a serializable, serialize it and measure its serialized size.
Definition: serialization2.h:354
tpie::bbits::builder::builder
builder(std::string path, comp_type comp=comp_type(), augmenter_type augmenter=augmenter_type(), btree_flags flags=btree_flags::defaults, enable< X, !is_internal >=enab())
Construct a btree builder with the given storage.
Definition: btree_builder.h:214
memory.h
tpie::btree_node
Type that is useful for navigating a btree.
Definition: base.h:221
tpie
Definition: access_type.h:26
tpie::bbits::builder
Augmented btree builder.
Definition: base.h:248