Line data Source code
1 : // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 : //
5 : // BlockBuilder generates blocks where keys are prefix-compressed:
6 : //
7 : // When we store a key, we drop the prefix shared with the previous
8 : // string. This helps reduce the space requirement significantly.
9 : // Furthermore, once every K keys, we do not apply the prefix
10 : // compression and store the entire key. We call this a "restart
11 : // point". The tail end of the block stores the offsets of all of the
12 : // restart points, and can be used to do a binary search when looking
13 : // for a particular key. Values are stored as-is (without compression)
14 : // immediately following the corresponding key.
15 : //
16 : // An entry for a particular key-value pair has the form:
17 : // shared_bytes: varint32
18 : // unshared_bytes: varint32
19 : // value_length: varint32
20 : // key_delta: char[unshared_bytes]
21 : // value: char[value_length]
22 : // shared_bytes == 0 for restart points.
23 : //
24 : // The trailer of the block has the form:
25 : // restarts: uint32[num_restarts]
26 : // num_restarts: uint32
27 : // restarts[i] contains the offset within the block of the ith restart point.
28 :
29 : #include "table/block_builder.h"
30 :
31 : #include <algorithm>
32 : #include <assert.h>
33 : #include "leveldb/comparator.h"
34 : #include "leveldb/table_builder.h"
35 : #include "util/coding.h"
36 :
37 : namespace leveldb {
38 :
39 366 : BlockBuilder::BlockBuilder(const Options* options)
40 : : options_(options),
41 : restarts_(),
42 : counter_(0),
43 1098 : finished_(false) {
44 366 : assert(options->block_restart_interval >= 1);
45 732 : restarts_.push_back(0); // First restart point is at offset 0
46 366 : }
47 :
48 761 : void BlockBuilder::Reset() {
49 761 : buffer_.clear();
50 761 : restarts_.clear();
51 1522 : restarts_.push_back(0); // First restart point is at offset 0
52 761 : counter_ = 0;
53 761 : finished_ = false;
54 761 : last_key_.clear();
55 761 : }
56 :
57 18237 : size_t BlockBuilder::CurrentSizeEstimate() const {
58 36474 : return (buffer_.size() + // Raw data buffer
59 36474 : restarts_.size() * sizeof(uint32_t) + // Restart array
60 18237 : sizeof(uint32_t)); // Restart array length
61 : }
62 :
63 761 : Slice BlockBuilder::Finish() {
64 : // Append restart array
65 5470 : for (size_t i = 0; i < restarts_.size(); i++) {
66 3948 : PutFixed32(&buffer_, restarts_[i]);
67 : }
68 1522 : PutFixed32(&buffer_, restarts_.size());
69 761 : finished_ = true;
70 1522 : return Slice(buffer_);
71 : }
72 :
73 18876 : void BlockBuilder::Add(const Slice& key, const Slice& value) {
74 18876 : Slice last_key_piece(last_key_);
75 18876 : assert(!finished_);
76 18876 : assert(counter_ <= options_->block_restart_interval);
77 36991 : assert(buffer_.empty() // No values yet?
78 18876 : || options_->comparator->Compare(key, last_key_piece) > 0);
79 18876 : size_t shared = 0;
80 18876 : if (counter_ < options_->block_restart_interval) {
81 : // See how much sharing to do with previous string
82 35326 : const size_t min_length = std::min(last_key_piece.size(), key.size());
83 56641 : while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
84 21315 : shared++;
85 : }
86 : } else {
87 : // Restart compression
88 3639 : restarts_.push_back(buffer_.size());
89 1213 : counter_ = 0;
90 : }
91 18876 : const size_t non_shared = key.size() - shared;
92 :
93 : // Add "<shared><non_shared><value_size>" to buffer_
94 18876 : PutVarint32(&buffer_, shared);
95 18876 : PutVarint32(&buffer_, non_shared);
96 18876 : PutVarint32(&buffer_, value.size());
97 :
98 : // Add string delta to buffer_ followed by value
99 18876 : buffer_.append(key.data() + shared, non_shared);
100 18876 : buffer_.append(value.data(), value.size());
101 :
102 : // Update state
103 18876 : last_key_.resize(shared);
104 18876 : last_key_.append(key.data() + shared, non_shared);
105 37752 : assert(Slice(last_key_) == key);
106 18876 : counter_++;
107 18876 : }
108 :
109 : } // namespace leveldb
|