/root/bitcoin/src/leveldb/db/version_set.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // The representation of a DBImpl consists of a set of Versions. The |
6 | | // newest version is called "current". Older versions may be kept |
7 | | // around to provide a consistent view to live iterators. |
8 | | // |
9 | | // Each Version keeps track of a set of Table files per level. The |
10 | | // entire set of versions is maintained in a VersionSet. |
11 | | // |
12 | | // Version,VersionSet are thread-compatible, but require external |
13 | | // synchronization on all accesses. |
14 | | |
15 | | #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ |
16 | | #define STORAGE_LEVELDB_DB_VERSION_SET_H_ |
17 | | |
18 | | #include <map> |
19 | | #include <set> |
20 | | #include <vector> |
21 | | |
22 | | #include "db/dbformat.h" |
23 | | #include "db/version_edit.h" |
24 | | #include "port/port.h" |
25 | | #include "port/thread_annotations.h" |
26 | | |
27 | | namespace leveldb { |
28 | | |
29 | | namespace log { |
30 | | class Writer; |
31 | | } |
32 | | |
33 | | class Compaction; |
34 | | class Iterator; |
35 | | class MemTable; |
36 | | class TableBuilder; |
37 | | class TableCache; |
38 | | class Version; |
39 | | class VersionSet; |
40 | | class WritableFile; |
41 | | |
42 | | // Return the smallest index i such that files[i]->largest >= key. |
43 | | // Return files.size() if there is no such file. |
44 | | // REQUIRES: "files" contains a sorted list of non-overlapping files. |
45 | | int FindFile(const InternalKeyComparator& icmp, |
46 | | const std::vector<FileMetaData*>& files, const Slice& key); |
47 | | |
48 | | // Returns true iff some file in "files" overlaps the user key range |
49 | | // [*smallest,*largest]. |
50 | | // smallest==nullptr represents a key smaller than all keys in the DB. |
51 | | // largest==nullptr represents a key largest than all keys in the DB. |
52 | | // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges |
53 | | // in sorted order. |
54 | | bool SomeFileOverlapsRange(const InternalKeyComparator& icmp, |
55 | | bool disjoint_sorted_files, |
56 | | const std::vector<FileMetaData*>& files, |
57 | | const Slice* smallest_user_key, |
58 | | const Slice* largest_user_key); |
59 | | |
60 | | class Version { |
61 | | public: |
62 | | // Lookup the value for key. If found, store it in *val and |
63 | | // return OK. Else return a non-OK status. Fills *stats. |
64 | | // REQUIRES: lock is not held |
65 | | struct GetStats { |
66 | | FileMetaData* seek_file; |
67 | | int seek_file_level; |
68 | | }; |
69 | | |
70 | | // Append to *iters a sequence of iterators that will |
71 | | // yield the contents of this Version when merged together. |
72 | | // REQUIRES: This version has been saved (see VersionSet::SaveTo) |
73 | | void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); |
74 | | |
75 | | Status Get(const ReadOptions&, const LookupKey& key, std::string* val, |
76 | | GetStats* stats); |
77 | | |
78 | | // Adds "stats" into the current state. Returns true if a new |
79 | | // compaction may need to be triggered, false otherwise. |
80 | | // REQUIRES: lock is held |
81 | | bool UpdateStats(const GetStats& stats); |
82 | | |
83 | | // Record a sample of bytes read at the specified internal key. |
84 | | // Samples are taken approximately once every config::kReadBytesPeriod |
85 | | // bytes. Returns true if a new compaction may need to be triggered. |
86 | | // REQUIRES: lock is held |
87 | | bool RecordReadSample(Slice key); |
88 | | |
89 | | // Reference count management (so Versions do not disappear out from |
90 | | // under live iterators) |
91 | | void Ref(); |
92 | | void Unref(); |
93 | | |
94 | | void GetOverlappingInputs( |
95 | | int level, |
96 | | const InternalKey* begin, // nullptr means before all keys |
97 | | const InternalKey* end, // nullptr means after all keys |
98 | | std::vector<FileMetaData*>* inputs); |
99 | | |
100 | | // Returns true iff some file in the specified level overlaps |
101 | | // some part of [*smallest_user_key,*largest_user_key]. |
102 | | // smallest_user_key==nullptr represents a key smaller than all the DB's keys. |
103 | | // largest_user_key==nullptr represents a key largest than all the DB's keys. |
104 | | bool OverlapInLevel(int level, const Slice* smallest_user_key, |
105 | | const Slice* largest_user_key); |
106 | | |
107 | | // Return the level at which we should place a new memtable compaction |
108 | | // result that covers the range [smallest_user_key,largest_user_key]. |
109 | | int PickLevelForMemTableOutput(const Slice& smallest_user_key, |
110 | | const Slice& largest_user_key); |
111 | | |
112 | 0 | int NumFiles(int level) const { return files_[level].size(); } |
113 | | |
114 | | // Return a human readable string that describes this version's contents. |
115 | | std::string DebugString() const; |
116 | | |
117 | | private: |
118 | | friend class Compaction; |
119 | | friend class VersionSet; |
120 | | |
121 | | class LevelFileNumIterator; |
122 | | |
123 | | explicit Version(VersionSet* vset) |
124 | 0 | : vset_(vset), |
125 | 0 | next_(this), |
126 | 0 | prev_(this), |
127 | 0 | refs_(0), |
128 | 0 | file_to_compact_(nullptr), |
129 | 0 | file_to_compact_level_(-1), |
130 | 0 | compaction_score_(-1), |
131 | 0 | compaction_level_(-1) {} |
132 | | |
133 | | Version(const Version&) = delete; |
134 | | Version& operator=(const Version&) = delete; |
135 | | |
136 | | ~Version(); |
137 | | |
138 | | Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; |
139 | | |
140 | | // Call func(arg, level, f) for every file that overlaps user_key in |
141 | | // order from newest to oldest. If an invocation of func returns |
142 | | // false, makes no more calls. |
143 | | // |
144 | | // REQUIRES: user portion of internal_key == user_key. |
145 | | void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg, |
146 | | bool (*func)(void*, int, FileMetaData*)); |
147 | | |
148 | | VersionSet* vset_; // VersionSet to which this Version belongs |
149 | | Version* next_; // Next version in linked list |
150 | | Version* prev_; // Previous version in linked list |
151 | | int refs_; // Number of live refs to this version |
152 | | |
153 | | // List of files per level |
154 | | std::vector<FileMetaData*> files_[config::kNumLevels]; |
155 | | |
156 | | // Next file to compact based on seek stats. |
157 | | FileMetaData* file_to_compact_; |
158 | | int file_to_compact_level_; |
159 | | |
160 | | // Level that should be compacted next and its compaction score. |
161 | | // Score < 1 means compaction is not strictly needed. These fields |
162 | | // are initialized by Finalize(). |
163 | | double compaction_score_; |
164 | | int compaction_level_; |
165 | | }; |
166 | | |
167 | | class VersionSet { |
168 | | public: |
169 | | VersionSet(const std::string& dbname, const Options* options, |
170 | | TableCache* table_cache, const InternalKeyComparator*); |
171 | | VersionSet(const VersionSet&) = delete; |
172 | | VersionSet& operator=(const VersionSet&) = delete; |
173 | | |
174 | | ~VersionSet(); |
175 | | |
176 | | // Apply *edit to the current version to form a new descriptor that |
177 | | // is both saved to persistent state and installed as the new |
178 | | // current version. Will release *mu while actually writing to the file. |
179 | | // REQUIRES: *mu is held on entry. |
180 | | // REQUIRES: no other thread concurrently calls LogAndApply() |
181 | | Status LogAndApply(VersionEdit* edit, port::Mutex* mu) |
182 | | EXCLUSIVE_LOCKS_REQUIRED(mu); |
183 | | |
184 | | // Recover the last saved descriptor from persistent storage. |
185 | | Status Recover(bool* save_manifest); |
186 | | |
187 | | // Return the current version. |
188 | 0 | Version* current() const { return current_; } |
189 | | |
190 | | // Return the current manifest file number |
191 | 0 | uint64_t ManifestFileNumber() const { return manifest_file_number_; } |
192 | | |
193 | | // Allocate and return a new file number |
194 | 0 | uint64_t NewFileNumber() { return next_file_number_++; } |
195 | | |
196 | | // Arrange to reuse "file_number" unless a newer file number has |
197 | | // already been allocated. |
198 | | // REQUIRES: "file_number" was returned by a call to NewFileNumber(). |
199 | 0 | void ReuseFileNumber(uint64_t file_number) { |
200 | 0 | if (next_file_number_ == file_number + 1) { |
201 | 0 | next_file_number_ = file_number; |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | | // Return the number of Table files at the specified level. |
206 | | int NumLevelFiles(int level) const; |
207 | | |
208 | | // Return the combined file size of all files at the specified level. |
209 | | int64_t NumLevelBytes(int level) const; |
210 | | |
211 | | // Return the last sequence number. |
212 | 0 | uint64_t LastSequence() const { return last_sequence_; } |
213 | | |
214 | | // Set the last sequence number to s. |
215 | 0 | void SetLastSequence(uint64_t s) { |
216 | 0 | assert(s >= last_sequence_); |
217 | 0 | last_sequence_ = s; |
218 | 0 | } |
219 | | |
220 | | // Mark the specified file number as used. |
221 | | void MarkFileNumberUsed(uint64_t number); |
222 | | |
223 | | // Return the current log file number. |
224 | 0 | uint64_t LogNumber() const { return log_number_; } |
225 | | |
226 | | // Return the log file number for the log file that is currently |
227 | | // being compacted, or zero if there is no such log file. |
228 | 0 | uint64_t PrevLogNumber() const { return prev_log_number_; } |
229 | | |
230 | | // Pick level and inputs for a new compaction. |
231 | | // Returns nullptr if there is no compaction to be done. |
232 | | // Otherwise returns a pointer to a heap-allocated object that |
233 | | // describes the compaction. Caller should delete the result. |
234 | | Compaction* PickCompaction(); |
235 | | |
236 | | // Return a compaction object for compacting the range [begin,end] in |
237 | | // the specified level. Returns nullptr if there is nothing in that |
238 | | // level that overlaps the specified range. Caller should delete |
239 | | // the result. |
240 | | Compaction* CompactRange(int level, const InternalKey* begin, |
241 | | const InternalKey* end); |
242 | | |
243 | | // Return the maximum overlapping data (in bytes) at next level for any |
244 | | // file at a level >= 1. |
245 | | int64_t MaxNextLevelOverlappingBytes(); |
246 | | |
247 | | // Create an iterator that reads over the compaction inputs for "*c". |
248 | | // The caller should delete the iterator when no longer needed. |
249 | | Iterator* MakeInputIterator(Compaction* c); |
250 | | |
251 | | // Returns true iff some level needs a compaction. |
252 | 0 | bool NeedsCompaction() const { |
253 | 0 | Version* v = current_; |
254 | 0 | return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr); |
255 | 0 | } |
256 | | |
257 | | // Add all files listed in any live version to *live. |
258 | | // May also mutate some internal state. |
259 | | void AddLiveFiles(std::set<uint64_t>* live); |
260 | | |
261 | | // Return the approximate offset in the database of the data for |
262 | | // "key" as of version "v". |
263 | | uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); |
264 | | |
265 | | // Return a human-readable short (single-line) summary of the number |
266 | | // of files per level. Uses *scratch as backing store. |
267 | | struct LevelSummaryStorage { |
268 | | char buffer[100]; |
269 | | }; |
270 | | const char* LevelSummary(LevelSummaryStorage* scratch) const; |
271 | | |
272 | | private: |
273 | | class Builder; |
274 | | |
275 | | friend class Compaction; |
276 | | friend class Version; |
277 | | |
278 | | bool ReuseManifest(const std::string& dscname, const std::string& dscbase); |
279 | | |
280 | | void Finalize(Version* v); |
281 | | |
282 | | void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest, |
283 | | InternalKey* largest); |
284 | | |
285 | | void GetRange2(const std::vector<FileMetaData*>& inputs1, |
286 | | const std::vector<FileMetaData*>& inputs2, |
287 | | InternalKey* smallest, InternalKey* largest); |
288 | | |
289 | | void SetupOtherInputs(Compaction* c); |
290 | | |
291 | | // Save current contents to *log |
292 | | Status WriteSnapshot(log::Writer* log); |
293 | | |
294 | | void AppendVersion(Version* v); |
295 | | |
296 | | Env* const env_; |
297 | | const std::string dbname_; |
298 | | const Options* const options_; |
299 | | TableCache* const table_cache_; |
300 | | const InternalKeyComparator icmp_; |
301 | | uint64_t next_file_number_; |
302 | | uint64_t manifest_file_number_; |
303 | | uint64_t last_sequence_; |
304 | | uint64_t log_number_; |
305 | | uint64_t prev_log_number_; // 0 or backing store for memtable being compacted |
306 | | |
307 | | // Opened lazily |
308 | | WritableFile* descriptor_file_; |
309 | | log::Writer* descriptor_log_; |
310 | | Version dummy_versions_; // Head of circular doubly-linked list of versions. |
311 | | Version* current_; // == dummy_versions_.prev_ |
312 | | |
313 | | // Per-level key at which the next compaction at that level should start. |
314 | | // Either an empty string, or a valid InternalKey. |
315 | | std::string compact_pointer_[config::kNumLevels]; |
316 | | }; |
317 | | |
318 | | // A Compaction encapsulates information about a compaction. |
319 | | class Compaction { |
320 | | public: |
321 | | ~Compaction(); |
322 | | |
323 | | // Return the level that is being compacted. Inputs from "level" |
324 | | // and "level+1" will be merged to produce a set of "level+1" files. |
325 | 0 | int level() const { return level_; } |
326 | | |
327 | | // Return the object that holds the edits to the descriptor done |
328 | | // by this compaction. |
329 | 0 | VersionEdit* edit() { return &edit_; } |
330 | | |
331 | | // "which" must be either 0 or 1 |
332 | 0 | int num_input_files(int which) const { return inputs_[which].size(); } |
333 | | |
334 | | // Return the ith input file at "level()+which" ("which" must be 0 or 1). |
335 | 0 | FileMetaData* input(int which, int i) const { return inputs_[which][i]; } |
336 | | |
337 | | // Maximum size of files to build during this compaction. |
338 | 0 | uint64_t MaxOutputFileSize() const { return max_output_file_size_; } |
339 | | |
340 | | // Is this a trivial compaction that can be implemented by just |
341 | | // moving a single input file to the next level (no merging or splitting) |
342 | | bool IsTrivialMove() const; |
343 | | |
344 | | // Add all inputs to this compaction as delete operations to *edit. |
345 | | void AddInputDeletions(VersionEdit* edit); |
346 | | |
347 | | // Returns true if the information we have available guarantees that |
348 | | // the compaction is producing data in "level+1" for which no data exists |
349 | | // in levels greater than "level+1". |
350 | | bool IsBaseLevelForKey(const Slice& user_key); |
351 | | |
352 | | // Returns true iff we should stop building the current output |
353 | | // before processing "internal_key". |
354 | | bool ShouldStopBefore(const Slice& internal_key); |
355 | | |
356 | | // Release the input version for the compaction, once the compaction |
357 | | // is successful. |
358 | | void ReleaseInputs(); |
359 | | |
360 | | private: |
361 | | friend class Version; |
362 | | friend class VersionSet; |
363 | | |
364 | | Compaction(const Options* options, int level); |
365 | | |
366 | | int level_; |
367 | | uint64_t max_output_file_size_; |
368 | | Version* input_version_; |
369 | | VersionEdit edit_; |
370 | | |
371 | | // Each compaction reads inputs from "level_" and "level_+1" |
372 | | std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs |
373 | | |
374 | | // State used to check for number of overlapping grandparent files |
375 | | // (parent == level_ + 1, grandparent == level_ + 2) |
376 | | std::vector<FileMetaData*> grandparents_; |
377 | | size_t grandparent_index_; // Index in grandparent_starts_ |
378 | | bool seen_key_; // Some output key has been seen |
379 | | int64_t overlapped_bytes_; // Bytes of overlap between current output |
380 | | // and grandparent files |
381 | | |
382 | | // State for implementing IsBaseLevelForKey |
383 | | |
384 | | // level_ptrs_ holds indices into input_version_->levels_: our state |
385 | | // is that we are positioned at one of the file ranges for each |
386 | | // higher level than the ones involved in this compaction (i.e. for |
387 | | // all L >= level_ + 2). |
388 | | size_t level_ptrs_[config::kNumLevels]; |
389 | | }; |
390 | | |
391 | | } // namespace leveldb |
392 | | |
393 | | #endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ |