/root/bitcoin/src/leveldb/util/env_posix.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | |
5 | | #include <dirent.h> |
6 | | #include <fcntl.h> |
7 | | #include <pthread.h> |
8 | | #include <sys/mman.h> |
9 | | #include <sys/resource.h> |
10 | | #include <sys/stat.h> |
11 | | #include <sys/time.h> |
12 | | #include <sys/types.h> |
13 | | #include <unistd.h> |
14 | | |
15 | | #include <atomic> |
16 | | #include <cerrno> |
17 | | #include <cstddef> |
18 | | #include <cstdint> |
19 | | #include <cstdio> |
20 | | #include <cstdlib> |
21 | | #include <cstring> |
22 | | #include <limits> |
23 | | #include <queue> |
24 | | #include <set> |
25 | | #include <string> |
26 | | #include <thread> |
27 | | #include <type_traits> |
28 | | #include <utility> |
29 | | |
30 | | #include "leveldb/env.h" |
31 | | #include "leveldb/slice.h" |
32 | | #include "leveldb/status.h" |
33 | | #include "port/port.h" |
34 | | #include "port/thread_annotations.h" |
35 | | #include "util/env_posix_test_helper.h" |
36 | | #include "util/posix_logger.h" |
37 | | |
38 | | namespace leveldb { |
39 | | |
40 | | namespace { |
41 | | |
42 | | // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles(). |
43 | | int g_open_read_only_file_limit = -1; |
44 | | |
45 | | // Up to 4096 mmap regions for 64-bit binaries; none for 32-bit. |
46 | | constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 4096 : 0; |
47 | | |
48 | | // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit(). |
49 | | int g_mmap_limit = kDefaultMmapLimit; |
50 | | |
51 | | // Common flags defined for all posix open operations |
52 | | #if HAVE_O_CLOEXEC |
53 | | constexpr const int kOpenBaseFlags = O_CLOEXEC; |
54 | | #else |
55 | | constexpr const int kOpenBaseFlags = 0; |
56 | | #endif // defined(HAVE_O_CLOEXEC) |
57 | | |
58 | | constexpr const size_t kWritableFileBufferSize = 65536; |
59 | | |
60 | 0 | Status PosixError(const std::string& context, int error_number) { |
61 | 0 | if (error_number == ENOENT) { |
62 | 0 | return Status::NotFound(context, std::strerror(error_number)); |
63 | 0 | } else { |
64 | 0 | return Status::IOError(context, std::strerror(error_number)); |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | // Helper class to limit resource usage to avoid exhaustion. |
69 | | // Currently used to limit read-only file descriptors and mmap file usage |
70 | | // so that we do not run out of file descriptors or virtual memory, or run into |
71 | | // kernel performance problems for very large databases. |
72 | | class Limiter { |
73 | | public: |
74 | | // Limit maximum number of resources to |max_acquires|. |
75 | 0 | Limiter(int max_acquires) : acquires_allowed_(max_acquires) {} |
76 | | |
77 | | Limiter(const Limiter&) = delete; |
78 | | Limiter operator=(const Limiter&) = delete; |
79 | | |
80 | | // If another resource is available, acquire it and return true. |
81 | | // Else return false. |
82 | 0 | bool Acquire() { |
83 | 0 | int old_acquires_allowed = |
84 | 0 | acquires_allowed_.fetch_sub(1, std::memory_order_relaxed); |
85 | |
|
86 | 0 | if (old_acquires_allowed > 0) return true; |
87 | | |
88 | 0 | acquires_allowed_.fetch_add(1, std::memory_order_relaxed); |
89 | 0 | return false; |
90 | 0 | } |
91 | | |
92 | | // Release a resource acquired by a previous call to Acquire() that returned |
93 | | // true. |
94 | 0 | void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); } |
95 | | |
96 | | private: |
97 | | // The number of available resources. |
98 | | // |
99 | | // This is a counter and is not tied to the invariants of any other class, so |
100 | | // it can be operated on safely using std::memory_order_relaxed. |
101 | | std::atomic<int> acquires_allowed_; |
102 | | }; |
103 | | |
104 | | // Implements sequential read access in a file using read(). |
105 | | // |
106 | | // Instances of this class are thread-friendly but not thread-safe, as required |
107 | | // by the SequentialFile API. |
108 | | class PosixSequentialFile final : public SequentialFile { |
109 | | public: |
110 | | PosixSequentialFile(std::string filename, int fd) |
111 | 0 | : fd_(fd), filename_(filename) {} |
112 | 0 | ~PosixSequentialFile() override { close(fd_); } |
113 | | |
114 | 0 | Status Read(size_t n, Slice* result, char* scratch) override { |
115 | 0 | Status status; |
116 | 0 | while (true) { |
117 | 0 | ::ssize_t read_size = ::read(fd_, scratch, n); |
118 | 0 | if (read_size < 0) { // Read error. |
119 | 0 | if (errno == EINTR) { |
120 | 0 | continue; // Retry |
121 | 0 | } |
122 | 0 | status = PosixError(filename_, errno); |
123 | 0 | break; |
124 | 0 | } |
125 | 0 | *result = Slice(scratch, read_size); |
126 | 0 | break; |
127 | 0 | } |
128 | 0 | return status; |
129 | 0 | } |
130 | | |
131 | 0 | Status Skip(uint64_t n) override { |
132 | 0 | if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) { |
133 | 0 | return PosixError(filename_, errno); |
134 | 0 | } |
135 | 0 | return Status::OK(); |
136 | 0 | } |
137 | | |
138 | 0 | virtual std::string GetName() const override { return filename_; } |
139 | | |
140 | | private: |
141 | | const int fd_; |
142 | | const std::string filename_; |
143 | | }; |
144 | | |
145 | | // Implements random read access in a file using pread(). |
146 | | // |
147 | | // Instances of this class are thread-safe, as required by the RandomAccessFile |
148 | | // API. Instances are immutable and Read() only calls thread-safe library |
149 | | // functions. |
150 | | class PosixRandomAccessFile final : public RandomAccessFile { |
151 | | public: |
152 | | // The new instance takes ownership of |fd|. |fd_limiter| must outlive this |
153 | | // instance, and will be used to determine if . |
154 | | PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter) |
155 | 0 | : has_permanent_fd_(fd_limiter->Acquire()), |
156 | 0 | fd_(has_permanent_fd_ ? fd : -1), |
157 | 0 | fd_limiter_(fd_limiter), |
158 | 0 | filename_(std::move(filename)) { |
159 | 0 | if (!has_permanent_fd_) { |
160 | 0 | assert(fd_ == -1); |
161 | 0 | ::close(fd); // The file will be opened on every read. |
162 | 0 | } |
163 | 0 | } |
164 | | |
165 | 0 | ~PosixRandomAccessFile() override { |
166 | 0 | if (has_permanent_fd_) { |
167 | 0 | assert(fd_ != -1); |
168 | 0 | ::close(fd_); |
169 | 0 | fd_limiter_->Release(); |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | | Status Read(uint64_t offset, size_t n, Slice* result, |
174 | 0 | char* scratch) const override { |
175 | 0 | int fd = fd_; |
176 | 0 | if (!has_permanent_fd_) { |
177 | 0 | fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags); |
178 | 0 | if (fd < 0) { |
179 | 0 | return PosixError(filename_, errno); |
180 | 0 | } |
181 | 0 | } |
182 | | |
183 | 0 | assert(fd != -1); |
184 | | |
185 | 0 | Status status; |
186 | 0 | ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset)); |
187 | 0 | *result = Slice(scratch, (read_size < 0) ? 0 : read_size); |
188 | 0 | if (read_size < 0) { |
189 | | // An error: return a non-ok status. |
190 | 0 | status = PosixError(filename_, errno); |
191 | 0 | } |
192 | 0 | if (!has_permanent_fd_) { |
193 | | // Close the temporary file descriptor opened earlier. |
194 | 0 | assert(fd != fd_); |
195 | 0 | ::close(fd); |
196 | 0 | } |
197 | 0 | return status; |
198 | 0 | } |
199 | | |
200 | 0 | virtual std::string GetName() const override { return filename_; } |
201 | | |
202 | | private: |
203 | | const bool has_permanent_fd_; // If false, the file is opened on every read. |
204 | | const int fd_; // -1 if has_permanent_fd_ is false. |
205 | | Limiter* const fd_limiter_; |
206 | | const std::string filename_; |
207 | | }; |
208 | | |
209 | | // Implements random read access in a file using mmap(). |
210 | | // |
211 | | // Instances of this class are thread-safe, as required by the RandomAccessFile |
212 | | // API. Instances are immutable and Read() only calls thread-safe library |
213 | | // functions. |
214 | | class PosixMmapReadableFile final : public RandomAccessFile { |
215 | | public: |
216 | | // mmap_base[0, length-1] points to the memory-mapped contents of the file. It |
217 | | // must be the result of a successful call to mmap(). This instances takes |
218 | | // over the ownership of the region. |
219 | | // |
220 | | // |mmap_limiter| must outlive this instance. The caller must have already |
221 | | // aquired the right to use one mmap region, which will be released when this |
222 | | // instance is destroyed. |
223 | | PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length, |
224 | | Limiter* mmap_limiter) |
225 | 0 | : mmap_base_(mmap_base), |
226 | 0 | length_(length), |
227 | 0 | mmap_limiter_(mmap_limiter), |
228 | 0 | filename_(std::move(filename)) {} |
229 | | |
230 | 0 | ~PosixMmapReadableFile() override { |
231 | 0 | ::munmap(static_cast<void*>(mmap_base_), length_); |
232 | 0 | mmap_limiter_->Release(); |
233 | 0 | } |
234 | | |
235 | | Status Read(uint64_t offset, size_t n, Slice* result, |
236 | 0 | char* scratch) const override { |
237 | 0 | if (offset + n > length_) { |
238 | 0 | *result = Slice(); |
239 | 0 | return PosixError(filename_, EINVAL); |
240 | 0 | } |
241 | | |
242 | 0 | *result = Slice(mmap_base_ + offset, n); |
243 | 0 | return Status::OK(); |
244 | 0 | } |
245 | | |
246 | 0 | virtual std::string GetName() const override { return filename_; } |
247 | | |
248 | | private: |
249 | | char* const mmap_base_; |
250 | | const size_t length_; |
251 | | Limiter* const mmap_limiter_; |
252 | | const std::string filename_; |
253 | | }; |
254 | | |
255 | | class PosixWritableFile final : public WritableFile { |
256 | | public: |
257 | | PosixWritableFile(std::string filename, int fd) |
258 | 0 | : pos_(0), |
259 | 0 | fd_(fd), |
260 | 0 | is_manifest_(IsManifest(filename)), |
261 | 0 | filename_(std::move(filename)), |
262 | 0 | dirname_(Dirname(filename_)) {} |
263 | | |
264 | 0 | ~PosixWritableFile() override { |
265 | 0 | if (fd_ >= 0) { |
266 | | // Ignoring any potential errors |
267 | 0 | Close(); |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | 0 | Status Append(const Slice& data) override { |
272 | 0 | size_t write_size = data.size(); |
273 | 0 | const char* write_data = data.data(); |
274 | | |
275 | | // Fit as much as possible into buffer. |
276 | 0 | size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_); |
277 | 0 | std::memcpy(buf_ + pos_, write_data, copy_size); |
278 | 0 | write_data += copy_size; |
279 | 0 | write_size -= copy_size; |
280 | 0 | pos_ += copy_size; |
281 | 0 | if (write_size == 0) { |
282 | 0 | return Status::OK(); |
283 | 0 | } |
284 | | |
285 | | // Can't fit in buffer, so need to do at least one write. |
286 | 0 | Status status = FlushBuffer(); |
287 | 0 | if (!status.ok()) { |
288 | 0 | return status; |
289 | 0 | } |
290 | | |
291 | | // Small writes go to buffer, large writes are written directly. |
292 | 0 | if (write_size < kWritableFileBufferSize) { |
293 | 0 | std::memcpy(buf_, write_data, write_size); |
294 | 0 | pos_ = write_size; |
295 | 0 | return Status::OK(); |
296 | 0 | } |
297 | 0 | return WriteUnbuffered(write_data, write_size); |
298 | 0 | } |
299 | | |
300 | 0 | Status Close() override { |
301 | 0 | Status status = FlushBuffer(); |
302 | 0 | const int close_result = ::close(fd_); |
303 | 0 | if (close_result < 0 && status.ok()) { |
304 | 0 | status = PosixError(filename_, errno); |
305 | 0 | } |
306 | 0 | fd_ = -1; |
307 | 0 | return status; |
308 | 0 | } |
309 | | |
310 | 0 | Status Flush() override { return FlushBuffer(); } |
311 | | |
312 | 0 | Status Sync() override { |
313 | | // Ensure new files referred to by the manifest are in the filesystem. |
314 | | // |
315 | | // This needs to happen before the manifest file is flushed to disk, to |
316 | | // avoid crashing in a state where the manifest refers to files that are not |
317 | | // yet on disk. |
318 | 0 | Status status = SyncDirIfManifest(); |
319 | 0 | if (!status.ok()) { |
320 | 0 | return status; |
321 | 0 | } |
322 | | |
323 | 0 | status = FlushBuffer(); |
324 | 0 | if (!status.ok()) { |
325 | 0 | return status; |
326 | 0 | } |
327 | | |
328 | 0 | return SyncFd(fd_, filename_, false); |
329 | 0 | } |
330 | | |
331 | | private: |
332 | 0 | Status FlushBuffer() { |
333 | 0 | Status status = WriteUnbuffered(buf_, pos_); |
334 | 0 | pos_ = 0; |
335 | 0 | return status; |
336 | 0 | } |
337 | | |
338 | 0 | Status WriteUnbuffered(const char* data, size_t size) { |
339 | 0 | while (size > 0) { |
340 | 0 | ssize_t write_result = ::write(fd_, data, size); |
341 | 0 | if (write_result < 0) { |
342 | 0 | if (errno == EINTR) { |
343 | 0 | continue; // Retry |
344 | 0 | } |
345 | 0 | return PosixError(filename_, errno); |
346 | 0 | } |
347 | 0 | data += write_result; |
348 | 0 | size -= write_result; |
349 | 0 | } |
350 | 0 | return Status::OK(); |
351 | 0 | } |
352 | | |
353 | 0 | Status SyncDirIfManifest() { |
354 | 0 | Status status; |
355 | 0 | if (!is_manifest_) { |
356 | 0 | return status; |
357 | 0 | } |
358 | | |
359 | 0 | int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags); |
360 | 0 | if (fd < 0) { |
361 | 0 | status = PosixError(dirname_, errno); |
362 | 0 | } else { |
363 | 0 | status = SyncFd(fd, dirname_, true); |
364 | 0 | ::close(fd); |
365 | 0 | } |
366 | 0 | return status; |
367 | 0 | } |
368 | | |
369 | | // Ensures that all the caches associated with the given file descriptor's |
370 | | // data are flushed all the way to durable media, and can withstand power |
371 | | // failures. |
372 | | // |
373 | | // The path argument is only used to populate the description string in the |
374 | | // returned Status if an error occurs. |
375 | 0 | static Status SyncFd(int fd, const std::string& fd_path, bool syncing_dir) { |
376 | | #if HAVE_FULLFSYNC |
377 | | // On macOS and iOS, fsync() doesn't guarantee durability past power |
378 | | // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some |
379 | | // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to |
380 | | // fsync(). |
381 | | if (::fcntl(fd, F_FULLFSYNC) == 0) { |
382 | | return Status::OK(); |
383 | | } |
384 | | #endif // HAVE_FULLFSYNC |
385 | |
|
386 | 0 | #if HAVE_FDATASYNC |
387 | 0 | bool sync_success = ::fdatasync(fd) == 0; |
388 | | #else |
389 | | bool sync_success = ::fsync(fd) == 0; |
390 | | #endif // HAVE_FDATASYNC |
391 | |
|
392 | 0 | if (sync_success) { |
393 | 0 | return Status::OK(); |
394 | 0 | } |
395 | | // Do not crash if filesystem can't fsync directories |
396 | | // (see https://github.com/bitcoin/bitcoin/pull/10000) |
397 | 0 | if (syncing_dir && errno == EINVAL) { |
398 | 0 | return Status::OK(); |
399 | 0 | } |
400 | 0 | return PosixError(fd_path, errno); |
401 | 0 | } |
402 | | |
403 | | // Returns the directory name in a path pointing to a file. |
404 | | // |
405 | | // Returns "." if the path does not contain any directory separator. |
406 | 0 | static std::string Dirname(const std::string& filename) { |
407 | 0 | std::string::size_type separator_pos = filename.rfind('/'); |
408 | 0 | if (separator_pos == std::string::npos) { |
409 | 0 | return std::string("."); |
410 | 0 | } |
411 | | // The filename component should not contain a path separator. If it does, |
412 | | // the splitting was done incorrectly. |
413 | 0 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
414 | | |
415 | 0 | return filename.substr(0, separator_pos); |
416 | 0 | } |
417 | | |
418 | | // Extracts the file name from a path pointing to a file. |
419 | | // |
420 | | // The returned Slice points to |filename|'s data buffer, so it is only valid |
421 | | // while |filename| is alive and unchanged. |
422 | 0 | static Slice Basename(const std::string& filename) { |
423 | 0 | std::string::size_type separator_pos = filename.rfind('/'); |
424 | 0 | if (separator_pos == std::string::npos) { |
425 | 0 | return Slice(filename); |
426 | 0 | } |
427 | | // The filename component should not contain a path separator. If it does, |
428 | | // the splitting was done incorrectly. |
429 | 0 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
430 | | |
431 | 0 | return Slice(filename.data() + separator_pos + 1, |
432 | 0 | filename.length() - separator_pos - 1); |
433 | 0 | } |
434 | | |
435 | | // True if the given file is a manifest file. |
436 | 0 | static bool IsManifest(const std::string& filename) { |
437 | 0 | return Basename(filename).starts_with("MANIFEST"); |
438 | 0 | } |
439 | | |
440 | 0 | virtual std::string GetName() const override { return filename_; } |
441 | | |
442 | | // buf_[0, pos_ - 1] contains data to be written to fd_. |
443 | | char buf_[kWritableFileBufferSize]; |
444 | | size_t pos_; |
445 | | int fd_; |
446 | | |
447 | | const bool is_manifest_; // True if the file's name starts with MANIFEST. |
448 | | const std::string filename_; |
449 | | const std::string dirname_; // The directory of filename_. |
450 | | }; |
451 | | |
452 | 0 | int LockOrUnlock(int fd, bool lock) { |
453 | 0 | errno = 0; |
454 | 0 | struct ::flock file_lock_info; |
455 | 0 | std::memset(&file_lock_info, 0, sizeof(file_lock_info)); |
456 | 0 | file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK); |
457 | 0 | file_lock_info.l_whence = SEEK_SET; |
458 | 0 | file_lock_info.l_start = 0; |
459 | 0 | file_lock_info.l_len = 0; // Lock/unlock entire file. |
460 | 0 | return ::fcntl(fd, F_SETLK, &file_lock_info); |
461 | 0 | } |
462 | | |
463 | | // Instances are thread-safe because they are immutable. |
464 | | class PosixFileLock : public FileLock { |
465 | | public: |
466 | | PosixFileLock(int fd, std::string filename) |
467 | 0 | : fd_(fd), filename_(std::move(filename)) {} |
468 | | |
469 | 0 | int fd() const { return fd_; } |
470 | 0 | const std::string& filename() const { return filename_; } |
471 | | |
472 | | private: |
473 | | const int fd_; |
474 | | const std::string filename_; |
475 | | }; |
476 | | |
477 | | // Tracks the files locked by PosixEnv::LockFile(). |
478 | | // |
479 | | // We maintain a separate set instead of relying on fcntl(F_SETLK) because |
480 | | // fcntl(F_SETLK) does not provide any protection against multiple uses from the |
481 | | // same process. |
482 | | // |
483 | | // Instances are thread-safe because all member data is guarded by a mutex. |
484 | | class PosixLockTable { |
485 | | public: |
486 | 0 | bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
487 | 0 | mu_.Lock(); |
488 | 0 | bool succeeded = locked_files_.insert(fname).second; |
489 | 0 | mu_.Unlock(); |
490 | 0 | return succeeded; |
491 | 0 | } |
492 | 0 | void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
493 | 0 | mu_.Lock(); |
494 | 0 | locked_files_.erase(fname); |
495 | 0 | mu_.Unlock(); |
496 | 0 | } |
497 | | |
498 | | private: |
499 | | port::Mutex mu_; |
500 | | std::set<std::string> locked_files_ GUARDED_BY(mu_); |
501 | | }; |
502 | | |
503 | | class PosixEnv : public Env { |
504 | | public: |
505 | | PosixEnv(); |
506 | 0 | ~PosixEnv() override { |
507 | 0 | static const char msg[] = |
508 | 0 | "PosixEnv singleton destroyed. Unsupported behavior!\n"; |
509 | 0 | std::fwrite(msg, 1, sizeof(msg), stderr); |
510 | 0 | std::abort(); |
511 | 0 | } |
512 | | |
513 | | Status NewSequentialFile(const std::string& filename, |
514 | 0 | SequentialFile** result) override { |
515 | 0 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
516 | 0 | if (fd < 0) { |
517 | 0 | *result = nullptr; |
518 | 0 | return PosixError(filename, errno); |
519 | 0 | } |
520 | | |
521 | 0 | *result = new PosixSequentialFile(filename, fd); |
522 | 0 | return Status::OK(); |
523 | 0 | } |
524 | | |
525 | | Status NewRandomAccessFile(const std::string& filename, |
526 | 0 | RandomAccessFile** result) override { |
527 | 0 | *result = nullptr; |
528 | 0 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
529 | 0 | if (fd < 0) { |
530 | 0 | return PosixError(filename, errno); |
531 | 0 | } |
532 | | |
533 | 0 | if (!mmap_limiter_.Acquire()) { |
534 | 0 | *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_); |
535 | 0 | return Status::OK(); |
536 | 0 | } |
537 | | |
538 | 0 | uint64_t file_size; |
539 | 0 | Status status = GetFileSize(filename, &file_size); |
540 | 0 | if (status.ok()) { |
541 | 0 | void* mmap_base = |
542 | 0 | ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); |
543 | 0 | if (mmap_base != MAP_FAILED) { |
544 | 0 | *result = new PosixMmapReadableFile(filename, |
545 | 0 | reinterpret_cast<char*>(mmap_base), |
546 | 0 | file_size, &mmap_limiter_); |
547 | 0 | } else { |
548 | 0 | status = PosixError(filename, errno); |
549 | 0 | } |
550 | 0 | } |
551 | 0 | ::close(fd); |
552 | 0 | if (!status.ok()) { |
553 | 0 | mmap_limiter_.Release(); |
554 | 0 | } |
555 | 0 | return status; |
556 | 0 | } |
557 | | |
558 | | Status NewWritableFile(const std::string& filename, |
559 | 0 | WritableFile** result) override { |
560 | 0 | int fd = ::open(filename.c_str(), |
561 | 0 | O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
562 | 0 | if (fd < 0) { |
563 | 0 | *result = nullptr; |
564 | 0 | return PosixError(filename, errno); |
565 | 0 | } |
566 | | |
567 | 0 | *result = new PosixWritableFile(filename, fd); |
568 | 0 | return Status::OK(); |
569 | 0 | } |
570 | | |
571 | | Status NewAppendableFile(const std::string& filename, |
572 | 0 | WritableFile** result) override { |
573 | 0 | int fd = ::open(filename.c_str(), |
574 | 0 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
575 | 0 | if (fd < 0) { |
576 | 0 | *result = nullptr; |
577 | 0 | return PosixError(filename, errno); |
578 | 0 | } |
579 | | |
580 | 0 | *result = new PosixWritableFile(filename, fd); |
581 | 0 | return Status::OK(); |
582 | 0 | } |
583 | | |
584 | 0 | bool FileExists(const std::string& filename) override { |
585 | 0 | return ::access(filename.c_str(), F_OK) == 0; |
586 | 0 | } |
587 | | |
588 | | Status GetChildren(const std::string& directory_path, |
589 | 0 | std::vector<std::string>* result) override { |
590 | 0 | result->clear(); |
591 | 0 | ::DIR* dir = ::opendir(directory_path.c_str()); |
592 | 0 | if (dir == nullptr) { |
593 | 0 | return PosixError(directory_path, errno); |
594 | 0 | } |
595 | 0 | struct ::dirent* entry; |
596 | 0 | while ((entry = ::readdir(dir)) != nullptr) { |
597 | 0 | result->emplace_back(entry->d_name); |
598 | 0 | } |
599 | 0 | ::closedir(dir); |
600 | 0 | return Status::OK(); |
601 | 0 | } |
602 | | |
603 | 0 | Status DeleteFile(const std::string& filename) override { |
604 | 0 | if (::unlink(filename.c_str()) != 0) { |
605 | 0 | return PosixError(filename, errno); |
606 | 0 | } |
607 | 0 | return Status::OK(); |
608 | 0 | } |
609 | | |
610 | 0 | Status CreateDir(const std::string& dirname) override { |
611 | 0 | if (::mkdir(dirname.c_str(), 0755) != 0) { |
612 | 0 | return PosixError(dirname, errno); |
613 | 0 | } |
614 | 0 | return Status::OK(); |
615 | 0 | } |
616 | | |
617 | 0 | Status DeleteDir(const std::string& dirname) override { |
618 | 0 | if (::rmdir(dirname.c_str()) != 0) { |
619 | 0 | return PosixError(dirname, errno); |
620 | 0 | } |
621 | 0 | return Status::OK(); |
622 | 0 | } |
623 | | |
624 | 0 | Status GetFileSize(const std::string& filename, uint64_t* size) override { |
625 | 0 | struct ::stat file_stat; |
626 | 0 | if (::stat(filename.c_str(), &file_stat) != 0) { |
627 | 0 | *size = 0; |
628 | 0 | return PosixError(filename, errno); |
629 | 0 | } |
630 | 0 | *size = file_stat.st_size; |
631 | 0 | return Status::OK(); |
632 | 0 | } |
633 | | |
634 | 0 | Status RenameFile(const std::string& from, const std::string& to) override { |
635 | 0 | if (std::rename(from.c_str(), to.c_str()) != 0) { |
636 | 0 | return PosixError(from, errno); |
637 | 0 | } |
638 | 0 | return Status::OK(); |
639 | 0 | } |
640 | | |
641 | 0 | Status LockFile(const std::string& filename, FileLock** lock) override { |
642 | 0 | *lock = nullptr; |
643 | |
|
644 | 0 | int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644); |
645 | 0 | if (fd < 0) { |
646 | 0 | return PosixError(filename, errno); |
647 | 0 | } |
648 | | |
649 | 0 | if (!locks_.Insert(filename)) { |
650 | 0 | ::close(fd); |
651 | 0 | return Status::IOError("lock " + filename, "already held by process"); |
652 | 0 | } |
653 | | |
654 | 0 | if (LockOrUnlock(fd, true) == -1) { |
655 | 0 | int lock_errno = errno; |
656 | 0 | ::close(fd); |
657 | 0 | locks_.Remove(filename); |
658 | 0 | return PosixError("lock " + filename, lock_errno); |
659 | 0 | } |
660 | | |
661 | 0 | *lock = new PosixFileLock(fd, filename); |
662 | 0 | return Status::OK(); |
663 | 0 | } |
664 | | |
665 | 0 | Status UnlockFile(FileLock* lock) override { |
666 | 0 | PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock); |
667 | 0 | if (LockOrUnlock(posix_file_lock->fd(), false) == -1) { |
668 | 0 | return PosixError("unlock " + posix_file_lock->filename(), errno); |
669 | 0 | } |
670 | 0 | locks_.Remove(posix_file_lock->filename()); |
671 | 0 | ::close(posix_file_lock->fd()); |
672 | 0 | delete posix_file_lock; |
673 | 0 | return Status::OK(); |
674 | 0 | } |
675 | | |
676 | | void Schedule(void (*background_work_function)(void* background_work_arg), |
677 | | void* background_work_arg) override; |
678 | | |
679 | | void StartThread(void (*thread_main)(void* thread_main_arg), |
680 | 0 | void* thread_main_arg) override { |
681 | 0 | std::thread new_thread(thread_main, thread_main_arg); |
682 | 0 | new_thread.detach(); |
683 | 0 | } |
684 | | |
685 | 0 | Status GetTestDirectory(std::string* result) override { |
686 | 0 | const char* env = std::getenv("TEST_TMPDIR"); |
687 | 0 | if (env && env[0] != '\0') { |
688 | 0 | *result = env; |
689 | 0 | } else { |
690 | 0 | char buf[100]; |
691 | 0 | std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", |
692 | 0 | static_cast<int>(::geteuid())); |
693 | 0 | *result = buf; |
694 | 0 | } |
695 | | |
696 | | // The CreateDir status is ignored because the directory may already exist. |
697 | 0 | CreateDir(*result); |
698 | |
|
699 | 0 | return Status::OK(); |
700 | 0 | } |
701 | | |
702 | 0 | Status NewLogger(const std::string& filename, Logger** result) override { |
703 | 0 | int fd = ::open(filename.c_str(), |
704 | 0 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
705 | 0 | if (fd < 0) { |
706 | 0 | *result = nullptr; |
707 | 0 | return PosixError(filename, errno); |
708 | 0 | } |
709 | | |
710 | 0 | std::FILE* fp = ::fdopen(fd, "w"); |
711 | 0 | if (fp == nullptr) { |
712 | 0 | ::close(fd); |
713 | 0 | *result = nullptr; |
714 | 0 | return PosixError(filename, errno); |
715 | 0 | } else { |
716 | 0 | *result = new PosixLogger(fp); |
717 | 0 | return Status::OK(); |
718 | 0 | } |
719 | 0 | } |
720 | | |
721 | 0 | uint64_t NowMicros() override { |
722 | 0 | static constexpr uint64_t kUsecondsPerSecond = 1000000; |
723 | 0 | struct ::timeval tv; |
724 | 0 | ::gettimeofday(&tv, nullptr); |
725 | 0 | return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec; |
726 | 0 | } |
727 | | |
728 | 0 | void SleepForMicroseconds(int micros) override { |
729 | 0 | std::this_thread::sleep_for(std::chrono::microseconds(micros)); |
730 | 0 | } |
731 | | |
732 | | private: |
733 | | void BackgroundThreadMain(); |
734 | | |
735 | 0 | static void BackgroundThreadEntryPoint(PosixEnv* env) { |
736 | 0 | env->BackgroundThreadMain(); |
737 | 0 | } |
738 | | |
739 | | // Stores the work item data in a Schedule() call. |
740 | | // |
741 | | // Instances are constructed on the thread calling Schedule() and used on the |
742 | | // background thread. |
743 | | // |
744 | | // This structure is thread-safe beacuse it is immutable. |
745 | | struct BackgroundWorkItem { |
746 | | explicit BackgroundWorkItem(void (*function)(void* arg), void* arg) |
747 | 0 | : function(function), arg(arg) {} |
748 | | |
749 | | void (*const function)(void*); |
750 | | void* const arg; |
751 | | }; |
752 | | |
753 | | port::Mutex background_work_mutex_; |
754 | | port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_); |
755 | | bool started_background_thread_ GUARDED_BY(background_work_mutex_); |
756 | | |
757 | | std::queue<BackgroundWorkItem> background_work_queue_ |
758 | | GUARDED_BY(background_work_mutex_); |
759 | | |
760 | | PosixLockTable locks_; // Thread-safe. |
761 | | Limiter mmap_limiter_; // Thread-safe. |
762 | | Limiter fd_limiter_; // Thread-safe. |
763 | | }; |
764 | | |
765 | | // Return the maximum number of concurrent mmaps. |
766 | 0 | int MaxMmaps() { return g_mmap_limit; } |
767 | | |
768 | | // Return the maximum number of read-only files to keep open. |
769 | 0 | int MaxOpenFiles() { |
770 | 0 | if (g_open_read_only_file_limit >= 0) { |
771 | 0 | return g_open_read_only_file_limit; |
772 | 0 | } |
773 | 0 | struct ::rlimit rlim; |
774 | 0 | if (::getrlimit(RLIMIT_NOFILE, &rlim)) { |
775 | | // getrlimit failed, fallback to hard-coded default. |
776 | 0 | g_open_read_only_file_limit = 50; |
777 | 0 | } else if (rlim.rlim_cur == RLIM_INFINITY) { |
778 | 0 | g_open_read_only_file_limit = std::numeric_limits<int>::max(); |
779 | 0 | } else { |
780 | | // Allow use of 20% of available file descriptors for read-only files. |
781 | 0 | g_open_read_only_file_limit = rlim.rlim_cur / 5; |
782 | 0 | } |
783 | 0 | return g_open_read_only_file_limit; |
784 | 0 | } |
785 | | |
786 | | } // namespace |
787 | | |
788 | | PosixEnv::PosixEnv() |
789 | 0 | : background_work_cv_(&background_work_mutex_), |
790 | 0 | started_background_thread_(false), |
791 | 0 | mmap_limiter_(MaxMmaps()), |
792 | 0 | fd_limiter_(MaxOpenFiles()) {} |
793 | | |
794 | | void PosixEnv::Schedule( |
795 | | void (*background_work_function)(void* background_work_arg), |
796 | 0 | void* background_work_arg) { |
797 | 0 | background_work_mutex_.Lock(); |
798 | | |
799 | | // Start the background thread, if we haven't done so already. |
800 | 0 | if (!started_background_thread_) { |
801 | 0 | started_background_thread_ = true; |
802 | 0 | std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this); |
803 | 0 | background_thread.detach(); |
804 | 0 | } |
805 | | |
806 | | // If the queue is empty, the background thread may be waiting for work. |
807 | 0 | if (background_work_queue_.empty()) { |
808 | 0 | background_work_cv_.Signal(); |
809 | 0 | } |
810 | |
|
811 | 0 | background_work_queue_.emplace(background_work_function, background_work_arg); |
812 | 0 | background_work_mutex_.Unlock(); |
813 | 0 | } |
814 | | |
815 | 0 | void PosixEnv::BackgroundThreadMain() { |
816 | 0 | while (true) { |
817 | 0 | background_work_mutex_.Lock(); |
818 | | |
819 | | // Wait until there is work to be done. |
820 | 0 | while (background_work_queue_.empty()) { |
821 | 0 | background_work_cv_.Wait(); |
822 | 0 | } |
823 | |
|
824 | 0 | assert(!background_work_queue_.empty()); |
825 | 0 | auto background_work_function = background_work_queue_.front().function; |
826 | 0 | void* background_work_arg = background_work_queue_.front().arg; |
827 | 0 | background_work_queue_.pop(); |
828 | |
|
829 | 0 | background_work_mutex_.Unlock(); |
830 | 0 | background_work_function(background_work_arg); |
831 | 0 | } |
832 | 0 | } |
833 | | |
834 | | namespace { |
835 | | |
836 | | // Wraps an Env instance whose destructor is never created. |
837 | | // |
838 | | // Intended usage: |
839 | | // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>; |
840 | | // void ConfigurePosixEnv(int param) { |
841 | | // PlatformSingletonEnv::AssertEnvNotInitialized(); |
842 | | // // set global configuration flags. |
843 | | // } |
844 | | // Env* Env::Default() { |
845 | | // static PlatformSingletonEnv default_env; |
846 | | // return default_env.env(); |
847 | | // } |
848 | | template <typename EnvType> |
849 | | class SingletonEnv { |
850 | | public: |
851 | 0 | SingletonEnv() { |
852 | 0 | #if !defined(NDEBUG) |
853 | 0 | env_initialized_.store(true, std::memory_order_relaxed); |
854 | 0 | #endif // !defined(NDEBUG) |
855 | 0 | static_assert(sizeof(env_storage_) >= sizeof(EnvType), |
856 | 0 | "env_storage_ will not fit the Env"); |
857 | 0 | static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType), |
858 | 0 | "env_storage_ does not meet the Env's alignment needs"); |
859 | 0 | new (&env_storage_) EnvType(); |
860 | 0 | } |
861 | | ~SingletonEnv() = default; |
862 | | |
863 | | SingletonEnv(const SingletonEnv&) = delete; |
864 | | SingletonEnv& operator=(const SingletonEnv&) = delete; |
865 | | |
866 | 0 | Env* env() { return reinterpret_cast<Env*>(&env_storage_); } |
867 | | |
868 | 0 | static void AssertEnvNotInitialized() { |
869 | 0 | #if !defined(NDEBUG) |
870 | 0 | assert(!env_initialized_.load(std::memory_order_relaxed)); |
871 | 0 | #endif // !defined(NDEBUG) |
872 | 0 | } |
873 | | |
874 | | private: |
875 | | typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type |
876 | | env_storage_; |
877 | | #if !defined(NDEBUG) |
878 | | static std::atomic<bool> env_initialized_; |
879 | | #endif // !defined(NDEBUG) |
880 | | }; |
881 | | |
882 | | #if !defined(NDEBUG) |
883 | | template <typename EnvType> |
884 | | std::atomic<bool> SingletonEnv<EnvType>::env_initialized_; |
885 | | #endif // !defined(NDEBUG) |
886 | | |
887 | | using PosixDefaultEnv = SingletonEnv<PosixEnv>; |
888 | | |
889 | | } // namespace |
890 | | |
891 | 0 | void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) { |
892 | 0 | PosixDefaultEnv::AssertEnvNotInitialized(); |
893 | 0 | g_open_read_only_file_limit = limit; |
894 | 0 | } |
895 | | |
896 | 0 | void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) { |
897 | 0 | PosixDefaultEnv::AssertEnvNotInitialized(); |
898 | 0 | g_mmap_limit = limit; |
899 | 0 | } |
900 | | |
901 | 0 | Env* Env::Default() { |
902 | 0 | static PosixDefaultEnv env_container; |
903 | 0 | return env_container.env(); |
904 | 0 | } |
905 | | |
906 | | } // namespace leveldb |