Line data Source code
1 : // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 :
5 : #include "leveldb/table.h"
6 :
7 : #include "leveldb/cache.h"
8 : #include "leveldb/comparator.h"
9 : #include "leveldb/env.h"
10 : #include "leveldb/filter_policy.h"
11 : #include "leveldb/options.h"
12 : #include "table/block.h"
13 : #include "table/filter_block.h"
14 : #include "table/format.h"
15 : #include "table/two_level_iterator.h"
16 : #include "util/coding.h"
17 :
18 : namespace leveldb {
19 :
20 320 : struct Table::Rep {
21 320 : ~Rep() {
22 160 : delete filter;
23 160 : delete [] filter_data;
24 160 : delete index_block;
25 160 : }
26 :
27 : Options options;
28 : Status status;
29 : RandomAccessFile* file;
30 : uint64_t cache_id;
31 : FilterBlockReader* filter;
32 : const char* filter_data;
33 :
34 : BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
35 : Block* index_block;
36 : };
37 :
38 160 : Status Table::Open(const Options& options,
39 : RandomAccessFile* file,
40 : uint64_t size,
41 : Table** table) {
42 160 : *table = NULL;
43 160 : if (size < Footer::kEncodedLength) {
44 0 : return Status::Corruption("file is too short to be an sstable");
45 : }
46 :
47 : char footer_space[Footer::kEncodedLength];
48 : Slice footer_input;
49 : Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
50 160 : &footer_input, footer_space);
51 160 : if (!s.ok()) return s;
52 :
53 : Footer footer;
54 320 : s = footer.DecodeFrom(&footer_input);
55 160 : if (!s.ok()) return s;
56 :
57 : // Read the index block
58 : BlockContents contents;
59 160 : Block* index_block = NULL;
60 160 : if (s.ok()) {
61 : ReadOptions opt;
62 160 : if (options.paranoid_checks) {
63 160 : opt.verify_checksums = true;
64 : }
65 320 : s = ReadBlock(file, opt, footer.index_handle(), &contents);
66 160 : if (s.ok()) {
67 160 : index_block = new Block(contents);
68 : }
69 : }
70 :
71 160 : if (s.ok()) {
72 : // We've successfully read the footer and the index block: we're
73 : // ready to serve requests.
74 320 : Rep* rep = new Table::Rep;
75 160 : rep->options = options;
76 160 : rep->file = file;
77 160 : rep->metaindex_handle = footer.metaindex_handle();
78 160 : rep->index_block = index_block;
79 160 : rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
80 160 : rep->filter_data = NULL;
81 160 : rep->filter = NULL;
82 320 : *table = new Table(rep);
83 160 : (*table)->ReadMeta(footer);
84 : } else {
85 0 : if (index_block) delete index_block;
86 : }
87 :
88 : return s;
89 : }
90 :
91 160 : void Table::ReadMeta(const Footer& footer) {
92 160 : if (rep_->options.filter_policy == NULL) {
93 0 : return; // Do not need any metadata
94 : }
95 :
96 : // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
97 : // it is an empty block.
98 : ReadOptions opt;
99 160 : if (rep_->options.paranoid_checks) {
100 160 : opt.verify_checksums = true;
101 : }
102 : BlockContents contents;
103 480 : if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
104 : // Do not propagate errors since meta info is not needed for operation
105 : return;
106 : }
107 160 : Block* meta = new Block(contents);
108 :
109 160 : Iterator* iter = meta->NewIterator(BytewiseComparator());
110 320 : std::string key = "filter.";
111 160 : key.append(rep_->options.filter_policy->Name());
112 320 : iter->Seek(key);
113 320 : if (iter->Valid() && iter->key() == Slice(key)) {
114 160 : ReadFilter(iter->value());
115 : }
116 160 : delete iter;
117 160 : delete meta;
118 : }
119 :
120 160 : void Table::ReadFilter(const Slice& filter_handle_value) {
121 160 : Slice v = filter_handle_value;
122 : BlockHandle filter_handle;
123 480 : if (!filter_handle.DecodeFrom(&v).ok()) {
124 0 : return;
125 : }
126 :
127 : // We might want to unify with ReadBlock() if we start
128 : // requiring checksum verification in Table::Open.
129 : ReadOptions opt;
130 160 : if (rep_->options.paranoid_checks) {
131 160 : opt.verify_checksums = true;
132 : }
133 : BlockContents block;
134 480 : if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
135 : return;
136 : }
137 160 : if (block.heap_allocated) {
138 0 : rep_->filter_data = block.data.data(); // Will need to delete later
139 : }
140 160 : rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
141 : }
142 :
143 160 : Table::~Table() {
144 160 : delete rep_;
145 160 : }
146 :
147 12416 : static void DeleteBlock(void* arg, void* ignored) {
148 12416 : delete reinterpret_cast<Block*>(arg);
149 12416 : }
150 :
151 0 : static void DeleteCachedBlock(const Slice& key, void* value) {
152 0 : Block* block = reinterpret_cast<Block*>(value);
153 0 : delete block;
154 0 : }
155 :
156 0 : static void ReleaseBlock(void* arg, void* h) {
157 0 : Cache* cache = reinterpret_cast<Cache*>(arg);
158 0 : Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
159 0 : cache->Release(handle);
160 0 : }
161 :
162 : // Convert an index iterator value (i.e., an encoded BlockHandle)
163 : // into an iterator over the contents of the corresponding block.
164 12416 : Iterator* Table::BlockReader(void* arg,
165 : const ReadOptions& options,
166 : const Slice& index_value) {
167 12416 : Table* table = reinterpret_cast<Table*>(arg);
168 12416 : Cache* block_cache = table->rep_->options.block_cache;
169 12416 : Block* block = NULL;
170 12416 : Cache::Handle* cache_handle = NULL;
171 :
172 : BlockHandle handle;
173 12416 : Slice input = index_value;
174 12416 : Status s = handle.DecodeFrom(&input);
175 : // We intentionally allow extra stuff in index_value so that we
176 : // can add more features in the future.
177 :
178 12416 : if (s.ok()) {
179 : BlockContents contents;
180 12416 : if (block_cache != NULL) {
181 : char cache_key_buffer[16];
182 12416 : EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
183 12416 : EncodeFixed64(cache_key_buffer+8, handle.offset());
184 : Slice key(cache_key_buffer, sizeof(cache_key_buffer));
185 12416 : cache_handle = block_cache->Lookup(key);
186 12416 : if (cache_handle != NULL) {
187 0 : block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
188 : } else {
189 24832 : s = ReadBlock(table->rep_->file, options, handle, &contents);
190 12416 : if (s.ok()) {
191 12416 : block = new Block(contents);
192 12416 : if (contents.cachable && options.fill_cache) {
193 : cache_handle = block_cache->Insert(
194 0 : key, block, block->size(), &DeleteCachedBlock);
195 : }
196 : }
197 : }
198 : } else {
199 0 : s = ReadBlock(table->rep_->file, options, handle, &contents);
200 0 : if (s.ok()) {
201 0 : block = new Block(contents);
202 : }
203 : }
204 : }
205 :
206 : Iterator* iter;
207 12416 : if (block != NULL) {
208 12416 : iter = block->NewIterator(table->rep_->options.comparator);
209 12416 : if (cache_handle == NULL) {
210 12416 : iter->RegisterCleanup(&DeleteBlock, block, NULL);
211 : } else {
212 0 : iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
213 : }
214 : } else {
215 0 : iter = NewErrorIterator(s);
216 : }
217 24832 : return iter;
218 : }
219 :
220 218 : Iterator* Table::NewIterator(const ReadOptions& options) const {
221 : return NewTwoLevelIterator(
222 : rep_->index_block->NewIterator(rep_->options.comparator),
223 218 : &Table::BlockReader, const_cast<Table*>(this), options);
224 : }
225 :
226 16890 : Status Table::InternalGet(const ReadOptions& options, const Slice& k,
227 : void* arg,
228 : void (*saver)(void*, const Slice&, const Slice&)) {
229 : Status s;
230 16890 : Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
231 16890 : iiter->Seek(k);
232 16890 : if (iiter->Valid()) {
233 16890 : Slice handle_value = iiter->value();
234 16890 : FilterBlockReader* filter = rep_->filter;
235 : BlockHandle handle;
236 50670 : if (filter != NULL &&
237 50670 : handle.DecodeFrom(&handle_value).ok() &&
238 16890 : !filter->KeyMayMatch(handle.offset(), k)) {
239 : // Not found
240 : } else {
241 11968 : Iterator* block_iter = BlockReader(this, options, iiter->value());
242 11968 : block_iter->Seek(k);
243 11968 : if (block_iter->Valid()) {
244 11968 : (*saver)(arg, block_iter->key(), block_iter->value());
245 : }
246 23936 : s = block_iter->status();
247 11968 : delete block_iter;
248 : }
249 : }
250 16890 : if (s.ok()) {
251 33780 : s = iiter->status();
252 : }
253 16890 : delete iiter;
254 16890 : return s;
255 : }
256 :
257 :
258 0 : uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
259 : Iterator* index_iter =
260 0 : rep_->index_block->NewIterator(rep_->options.comparator);
261 0 : index_iter->Seek(key);
262 : uint64_t result;
263 0 : if (index_iter->Valid()) {
264 : BlockHandle handle;
265 0 : Slice input = index_iter->value();
266 0 : Status s = handle.DecodeFrom(&input);
267 0 : if (s.ok()) {
268 0 : result = handle.offset();
269 : } else {
270 : // Strange: we can't decode the block handle in the index block.
271 : // We'll just return the offset of the metaindex block, which is
272 : // close to the whole file size for this case.
273 0 : result = rep_->metaindex_handle.offset();
274 : }
275 : } else {
276 : // key is past the last key in the file. Approximate the offset
277 : // by returning the offset of the metaindex block (which is
278 : // right near the end of the file).
279 0 : result = rep_->metaindex_handle.offset();
280 : }
281 0 : delete index_iter;
282 0 : return result;
283 : }
284 :
285 : } // namespace leveldb
|