Coverage Report

Created: 2025-02-21 14:37

/root/bitcoin/src/leveldb/util/env_posix.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5
#include <dirent.h>
6
#include <fcntl.h>
7
#include <pthread.h>
8
#include <sys/mman.h>
9
#include <sys/resource.h>
10
#include <sys/stat.h>
11
#include <sys/time.h>
12
#include <sys/types.h>
13
#include <unistd.h>
14
15
#include <atomic>
16
#include <cerrno>
17
#include <cstddef>
18
#include <cstdint>
19
#include <cstdio>
20
#include <cstdlib>
21
#include <cstring>
22
#include <limits>
23
#include <queue>
24
#include <set>
25
#include <string>
26
#include <thread>
27
#include <type_traits>
28
#include <utility>
29
30
#include "leveldb/env.h"
31
#include "leveldb/slice.h"
32
#include "leveldb/status.h"
33
#include "port/port.h"
34
#include "port/thread_annotations.h"
35
#include "util/env_posix_test_helper.h"
36
#include "util/posix_logger.h"
37
38
namespace leveldb {
39
40
namespace {
41
42
// Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
43
int g_open_read_only_file_limit = -1;
44
45
// Up to 4096 mmap regions for 64-bit binaries; none for 32-bit.
46
constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 4096 : 0;
47
48
// Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
49
int g_mmap_limit = kDefaultMmapLimit;
50
51
// Common flags defined for all posix open operations
52
#if HAVE_O_CLOEXEC
53
constexpr const int kOpenBaseFlags = O_CLOEXEC;
54
#else
55
constexpr const int kOpenBaseFlags = 0;
56
#endif  // defined(HAVE_O_CLOEXEC)
57
58
constexpr const size_t kWritableFileBufferSize = 65536;
59
60
0
Status PosixError(const std::string& context, int error_number) {
61
0
  if (error_number == ENOENT) {
62
0
    return Status::NotFound(context, std::strerror(error_number));
63
0
  } else {
64
0
    return Status::IOError(context, std::strerror(error_number));
65
0
  }
66
0
}
67
68
// Helper class to limit resource usage to avoid exhaustion.
69
// Currently used to limit read-only file descriptors and mmap file usage
70
// so that we do not run out of file descriptors or virtual memory, or run into
71
// kernel performance problems for very large databases.
72
class Limiter {
73
 public:
74
  // Limit maximum number of resources to |max_acquires|.
75
0
  Limiter(int max_acquires) : acquires_allowed_(max_acquires) {}
76
77
  Limiter(const Limiter&) = delete;
78
  Limiter operator=(const Limiter&) = delete;
79
80
  // If another resource is available, acquire it and return true.
81
  // Else return false.
82
0
  bool Acquire() {
83
0
    int old_acquires_allowed =
84
0
        acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
85
86
0
    if (old_acquires_allowed > 0) return true;
87
88
0
    acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
89
0
    return false;
90
0
  }
91
92
  // Release a resource acquired by a previous call to Acquire() that returned
93
  // true.
94
0
  void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); }
95
96
 private:
97
  // The number of available resources.
98
  //
99
  // This is a counter and is not tied to the invariants of any other class, so
100
  // it can be operated on safely using std::memory_order_relaxed.
101
  std::atomic<int> acquires_allowed_;
102
};
103
104
// Implements sequential read access in a file using read().
105
//
106
// Instances of this class are thread-friendly but not thread-safe, as required
107
// by the SequentialFile API.
108
class PosixSequentialFile final : public SequentialFile {
109
 public:
110
  PosixSequentialFile(std::string filename, int fd)
111
0
      : fd_(fd), filename_(filename) {}
112
0
  ~PosixSequentialFile() override { close(fd_); }
113
114
0
  Status Read(size_t n, Slice* result, char* scratch) override {
115
0
    Status status;
116
0
    while (true) {
117
0
      ::ssize_t read_size = ::read(fd_, scratch, n);
118
0
      if (read_size < 0) {  // Read error.
119
0
        if (errno == EINTR) {
120
0
          continue;  // Retry
121
0
        }
122
0
        status = PosixError(filename_, errno);
123
0
        break;
124
0
      }
125
0
      *result = Slice(scratch, read_size);
126
0
      break;
127
0
    }
128
0
    return status;
129
0
  }
130
131
0
  Status Skip(uint64_t n) override {
132
0
    if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
133
0
      return PosixError(filename_, errno);
134
0
    }
135
0
    return Status::OK();
136
0
  }
137
138
0
  virtual std::string GetName() const override { return filename_; }
139
140
 private:
141
  const int fd_;
142
  const std::string filename_;
143
};
144
145
// Implements random read access in a file using pread().
146
//
147
// Instances of this class are thread-safe, as required by the RandomAccessFile
148
// API. Instances are immutable and Read() only calls thread-safe library
149
// functions.
150
class PosixRandomAccessFile final : public RandomAccessFile {
151
 public:
152
  // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
153
  // instance, and will be used to determine if .
154
  PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
155
0
      : has_permanent_fd_(fd_limiter->Acquire()),
156
0
        fd_(has_permanent_fd_ ? fd : -1),
157
0
        fd_limiter_(fd_limiter),
158
0
        filename_(std::move(filename)) {
159
0
    if (!has_permanent_fd_) {
160
0
      assert(fd_ == -1);
161
0
      ::close(fd);  // The file will be opened on every read.
162
0
    }
163
0
  }
164
165
0
  ~PosixRandomAccessFile() override {
166
0
    if (has_permanent_fd_) {
167
0
      assert(fd_ != -1);
168
0
      ::close(fd_);
169
0
      fd_limiter_->Release();
170
0
    }
171
0
  }
172
173
  Status Read(uint64_t offset, size_t n, Slice* result,
174
0
              char* scratch) const override {
175
0
    int fd = fd_;
176
0
    if (!has_permanent_fd_) {
177
0
      fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
178
0
      if (fd < 0) {
179
0
        return PosixError(filename_, errno);
180
0
      }
181
0
    }
182
183
0
    assert(fd != -1);
184
185
0
    Status status;
186
0
    ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
187
0
    *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
188
0
    if (read_size < 0) {
189
      // An error: return a non-ok status.
190
0
      status = PosixError(filename_, errno);
191
0
    }
192
0
    if (!has_permanent_fd_) {
193
      // Close the temporary file descriptor opened earlier.
194
0
      assert(fd != fd_);
195
0
      ::close(fd);
196
0
    }
197
0
    return status;
198
0
  }
199
200
0
  virtual std::string GetName() const override { return filename_; }
201
202
 private:
203
  const bool has_permanent_fd_;  // If false, the file is opened on every read.
204
  const int fd_;                 // -1 if has_permanent_fd_ is false.
205
  Limiter* const fd_limiter_;
206
  const std::string filename_;
207
};
208
209
// Implements random read access in a file using mmap().
210
//
211
// Instances of this class are thread-safe, as required by the RandomAccessFile
212
// API. Instances are immutable and Read() only calls thread-safe library
213
// functions.
214
class PosixMmapReadableFile final : public RandomAccessFile {
215
 public:
216
  // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
217
  // must be the result of a successful call to mmap(). This instances takes
218
  // over the ownership of the region.
219
  //
220
  // |mmap_limiter| must outlive this instance. The caller must have already
221
  // aquired the right to use one mmap region, which will be released when this
222
  // instance is destroyed.
223
  PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
224
                        Limiter* mmap_limiter)
225
0
      : mmap_base_(mmap_base),
226
0
        length_(length),
227
0
        mmap_limiter_(mmap_limiter),
228
0
        filename_(std::move(filename)) {}
229
230
0
  ~PosixMmapReadableFile() override {
231
0
    ::munmap(static_cast<void*>(mmap_base_), length_);
232
0
    mmap_limiter_->Release();
233
0
  }
234
235
  Status Read(uint64_t offset, size_t n, Slice* result,
236
0
              char* scratch) const override {
237
0
    if (offset + n > length_) {
238
0
      *result = Slice();
239
0
      return PosixError(filename_, EINVAL);
240
0
    }
241
242
0
    *result = Slice(mmap_base_ + offset, n);
243
0
    return Status::OK();
244
0
  }
245
246
0
  virtual std::string GetName() const override { return filename_; }
247
248
 private:
249
  char* const mmap_base_;
250
  const size_t length_;
251
  Limiter* const mmap_limiter_;
252
  const std::string filename_;
253
};
254
255
class PosixWritableFile final : public WritableFile {
256
 public:
257
  PosixWritableFile(std::string filename, int fd)
258
0
      : pos_(0),
259
0
        fd_(fd),
260
0
        is_manifest_(IsManifest(filename)),
261
0
        filename_(std::move(filename)),
262
0
        dirname_(Dirname(filename_)) {}
263
264
0
  ~PosixWritableFile() override {
265
0
    if (fd_ >= 0) {
266
      // Ignoring any potential errors
267
0
      Close();
268
0
    }
269
0
  }
270
271
0
  Status Append(const Slice& data) override {
272
0
    size_t write_size = data.size();
273
0
    const char* write_data = data.data();
274
275
    // Fit as much as possible into buffer.
276
0
    size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
277
0
    std::memcpy(buf_ + pos_, write_data, copy_size);
278
0
    write_data += copy_size;
279
0
    write_size -= copy_size;
280
0
    pos_ += copy_size;
281
0
    if (write_size == 0) {
282
0
      return Status::OK();
283
0
    }
284
285
    // Can't fit in buffer, so need to do at least one write.
286
0
    Status status = FlushBuffer();
287
0
    if (!status.ok()) {
288
0
      return status;
289
0
    }
290
291
    // Small writes go to buffer, large writes are written directly.
292
0
    if (write_size < kWritableFileBufferSize) {
293
0
      std::memcpy(buf_, write_data, write_size);
294
0
      pos_ = write_size;
295
0
      return Status::OK();
296
0
    }
297
0
    return WriteUnbuffered(write_data, write_size);
298
0
  }
299
300
0
  Status Close() override {
301
0
    Status status = FlushBuffer();
302
0
    const int close_result = ::close(fd_);
303
0
    if (close_result < 0 && status.ok()) {
304
0
      status = PosixError(filename_, errno);
305
0
    }
306
0
    fd_ = -1;
307
0
    return status;
308
0
  }
309
310
0
  Status Flush() override { return FlushBuffer(); }
311
312
0
  Status Sync() override {
313
    // Ensure new files referred to by the manifest are in the filesystem.
314
    //
315
    // This needs to happen before the manifest file is flushed to disk, to
316
    // avoid crashing in a state where the manifest refers to files that are not
317
    // yet on disk.
318
0
    Status status = SyncDirIfManifest();
319
0
    if (!status.ok()) {
320
0
      return status;
321
0
    }
322
323
0
    status = FlushBuffer();
324
0
    if (!status.ok()) {
325
0
      return status;
326
0
    }
327
328
0
    return SyncFd(fd_, filename_, false);
329
0
  }
330
331
 private:
332
0
  Status FlushBuffer() {
333
0
    Status status = WriteUnbuffered(buf_, pos_);
334
0
    pos_ = 0;
335
0
    return status;
336
0
  }
337
338
0
  Status WriteUnbuffered(const char* data, size_t size) {
339
0
    while (size > 0) {
340
0
      ssize_t write_result = ::write(fd_, data, size);
341
0
      if (write_result < 0) {
342
0
        if (errno == EINTR) {
343
0
          continue;  // Retry
344
0
        }
345
0
        return PosixError(filename_, errno);
346
0
      }
347
0
      data += write_result;
348
0
      size -= write_result;
349
0
    }
350
0
    return Status::OK();
351
0
  }
352
353
0
  Status SyncDirIfManifest() {
354
0
    Status status;
355
0
    if (!is_manifest_) {
356
0
      return status;
357
0
    }
358
359
0
    int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
360
0
    if (fd < 0) {
361
0
      status = PosixError(dirname_, errno);
362
0
    } else {
363
0
      status = SyncFd(fd, dirname_, true);
364
0
      ::close(fd);
365
0
    }
366
0
    return status;
367
0
  }
368
369
  // Ensures that all the caches associated with the given file descriptor's
370
  // data are flushed all the way to durable media, and can withstand power
371
  // failures.
372
  //
373
  // The path argument is only used to populate the description string in the
374
  // returned Status if an error occurs.
375
0
  static Status SyncFd(int fd, const std::string& fd_path, bool syncing_dir) {
376
#if HAVE_FULLFSYNC
377
    // On macOS and iOS, fsync() doesn't guarantee durability past power
378
    // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
379
    // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
380
    // fsync().
381
    if (::fcntl(fd, F_FULLFSYNC) == 0) {
382
      return Status::OK();
383
    }
384
#endif  // HAVE_FULLFSYNC
385
386
0
#if HAVE_FDATASYNC
387
0
    bool sync_success = ::fdatasync(fd) == 0;
388
#else
389
    bool sync_success = ::fsync(fd) == 0;
390
#endif  // HAVE_FDATASYNC
391
392
0
    if (sync_success) {
393
0
      return Status::OK();
394
0
    }
395
    // Do not crash if filesystem can't fsync directories
396
    // (see https://github.com/bitcoin/bitcoin/pull/10000)
397
0
    if (syncing_dir && errno == EINVAL) {
398
0
      return Status::OK();
399
0
    }
400
0
    return PosixError(fd_path, errno);
401
0
  }
402
403
  // Returns the directory name in a path pointing to a file.
404
  //
405
  // Returns "." if the path does not contain any directory separator.
406
0
  static std::string Dirname(const std::string& filename) {
407
0
    std::string::size_type separator_pos = filename.rfind('/');
408
0
    if (separator_pos == std::string::npos) {
409
0
      return std::string(".");
410
0
    }
411
    // The filename component should not contain a path separator. If it does,
412
    // the splitting was done incorrectly.
413
0
    assert(filename.find('/', separator_pos + 1) == std::string::npos);
414
415
0
    return filename.substr(0, separator_pos);
416
0
  }
417
418
  // Extracts the file name from a path pointing to a file.
419
  //
420
  // The returned Slice points to |filename|'s data buffer, so it is only valid
421
  // while |filename| is alive and unchanged.
422
0
  static Slice Basename(const std::string& filename) {
423
0
    std::string::size_type separator_pos = filename.rfind('/');
424
0
    if (separator_pos == std::string::npos) {
425
0
      return Slice(filename);
426
0
    }
427
    // The filename component should not contain a path separator. If it does,
428
    // the splitting was done incorrectly.
429
0
    assert(filename.find('/', separator_pos + 1) == std::string::npos);
430
431
0
    return Slice(filename.data() + separator_pos + 1,
432
0
                 filename.length() - separator_pos - 1);
433
0
  }
434
435
  // True if the given file is a manifest file.
436
0
  static bool IsManifest(const std::string& filename) {
437
0
    return Basename(filename).starts_with("MANIFEST");
438
0
  }
439
440
0
  virtual std::string GetName() const override { return filename_; }
441
442
  // buf_[0, pos_ - 1] contains data to be written to fd_.
443
  char buf_[kWritableFileBufferSize];
444
  size_t pos_;
445
  int fd_;
446
447
  const bool is_manifest_;  // True if the file's name starts with MANIFEST.
448
  const std::string filename_;
449
  const std::string dirname_;  // The directory of filename_.
450
};
451
452
0
int LockOrUnlock(int fd, bool lock) {
453
0
  errno = 0;
454
0
  struct ::flock file_lock_info;
455
0
  std::memset(&file_lock_info, 0, sizeof(file_lock_info));
456
0
  file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
457
0
  file_lock_info.l_whence = SEEK_SET;
458
0
  file_lock_info.l_start = 0;
459
0
  file_lock_info.l_len = 0;  // Lock/unlock entire file.
460
0
  return ::fcntl(fd, F_SETLK, &file_lock_info);
461
0
}
462
463
// Instances are thread-safe because they are immutable.
464
class PosixFileLock : public FileLock {
465
 public:
466
  PosixFileLock(int fd, std::string filename)
467
0
      : fd_(fd), filename_(std::move(filename)) {}
468
469
0
  int fd() const { return fd_; }
470
0
  const std::string& filename() const { return filename_; }
471
472
 private:
473
  const int fd_;
474
  const std::string filename_;
475
};
476
477
// Tracks the files locked by PosixEnv::LockFile().
478
//
479
// We maintain a separate set instead of relying on fcntl(F_SETLK) because
480
// fcntl(F_SETLK) does not provide any protection against multiple uses from the
481
// same process.
482
//
483
// Instances are thread-safe because all member data is guarded by a mutex.
484
class PosixLockTable {
485
 public:
486
0
  bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
487
0
    mu_.Lock();
488
0
    bool succeeded = locked_files_.insert(fname).second;
489
0
    mu_.Unlock();
490
0
    return succeeded;
491
0
  }
492
0
  void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
493
0
    mu_.Lock();
494
0
    locked_files_.erase(fname);
495
0
    mu_.Unlock();
496
0
  }
497
498
 private:
499
  port::Mutex mu_;
500
  std::set<std::string> locked_files_ GUARDED_BY(mu_);
501
};
502
503
class PosixEnv : public Env {
504
 public:
505
  PosixEnv();
506
0
  ~PosixEnv() override {
507
0
    static const char msg[] =
508
0
        "PosixEnv singleton destroyed. Unsupported behavior!\n";
509
0
    std::fwrite(msg, 1, sizeof(msg), stderr);
510
0
    std::abort();
511
0
  }
512
513
  Status NewSequentialFile(const std::string& filename,
514
0
                           SequentialFile** result) override {
515
0
    int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
516
0
    if (fd < 0) {
517
0
      *result = nullptr;
518
0
      return PosixError(filename, errno);
519
0
    }
520
521
0
    *result = new PosixSequentialFile(filename, fd);
522
0
    return Status::OK();
523
0
  }
524
525
  Status NewRandomAccessFile(const std::string& filename,
526
0
                             RandomAccessFile** result) override {
527
0
    *result = nullptr;
528
0
    int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
529
0
    if (fd < 0) {
530
0
      return PosixError(filename, errno);
531
0
    }
532
533
0
    if (!mmap_limiter_.Acquire()) {
534
0
      *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
535
0
      return Status::OK();
536
0
    }
537
538
0
    uint64_t file_size;
539
0
    Status status = GetFileSize(filename, &file_size);
540
0
    if (status.ok()) {
541
0
      void* mmap_base =
542
0
          ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
543
0
      if (mmap_base != MAP_FAILED) {
544
0
        *result = new PosixMmapReadableFile(filename,
545
0
                                            reinterpret_cast<char*>(mmap_base),
546
0
                                            file_size, &mmap_limiter_);
547
0
      } else {
548
0
        status = PosixError(filename, errno);
549
0
      }
550
0
    }
551
0
    ::close(fd);
552
0
    if (!status.ok()) {
553
0
      mmap_limiter_.Release();
554
0
    }
555
0
    return status;
556
0
  }
557
558
  Status NewWritableFile(const std::string& filename,
559
0
                         WritableFile** result) override {
560
0
    int fd = ::open(filename.c_str(),
561
0
                    O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
562
0
    if (fd < 0) {
563
0
      *result = nullptr;
564
0
      return PosixError(filename, errno);
565
0
    }
566
567
0
    *result = new PosixWritableFile(filename, fd);
568
0
    return Status::OK();
569
0
  }
570
571
  Status NewAppendableFile(const std::string& filename,
572
0
                           WritableFile** result) override {
573
0
    int fd = ::open(filename.c_str(),
574
0
                    O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
575
0
    if (fd < 0) {
576
0
      *result = nullptr;
577
0
      return PosixError(filename, errno);
578
0
    }
579
580
0
    *result = new PosixWritableFile(filename, fd);
581
0
    return Status::OK();
582
0
  }
583
584
0
  bool FileExists(const std::string& filename) override {
585
0
    return ::access(filename.c_str(), F_OK) == 0;
586
0
  }
587
588
  Status GetChildren(const std::string& directory_path,
589
0
                     std::vector<std::string>* result) override {
590
0
    result->clear();
591
0
    ::DIR* dir = ::opendir(directory_path.c_str());
592
0
    if (dir == nullptr) {
593
0
      return PosixError(directory_path, errno);
594
0
    }
595
0
    struct ::dirent* entry;
596
0
    while ((entry = ::readdir(dir)) != nullptr) {
597
0
      result->emplace_back(entry->d_name);
598
0
    }
599
0
    ::closedir(dir);
600
0
    return Status::OK();
601
0
  }
602
603
0
  Status DeleteFile(const std::string& filename) override {
604
0
    if (::unlink(filename.c_str()) != 0) {
605
0
      return PosixError(filename, errno);
606
0
    }
607
0
    return Status::OK();
608
0
  }
609
610
0
  Status CreateDir(const std::string& dirname) override {
611
0
    if (::mkdir(dirname.c_str(), 0755) != 0) {
612
0
      return PosixError(dirname, errno);
613
0
    }
614
0
    return Status::OK();
615
0
  }
616
617
0
  Status DeleteDir(const std::string& dirname) override {
618
0
    if (::rmdir(dirname.c_str()) != 0) {
619
0
      return PosixError(dirname, errno);
620
0
    }
621
0
    return Status::OK();
622
0
  }
623
624
0
  Status GetFileSize(const std::string& filename, uint64_t* size) override {
625
0
    struct ::stat file_stat;
626
0
    if (::stat(filename.c_str(), &file_stat) != 0) {
627
0
      *size = 0;
628
0
      return PosixError(filename, errno);
629
0
    }
630
0
    *size = file_stat.st_size;
631
0
    return Status::OK();
632
0
  }
633
634
0
  Status RenameFile(const std::string& from, const std::string& to) override {
635
0
    if (std::rename(from.c_str(), to.c_str()) != 0) {
636
0
      return PosixError(from, errno);
637
0
    }
638
0
    return Status::OK();
639
0
  }
640
641
0
  Status LockFile(const std::string& filename, FileLock** lock) override {
642
0
    *lock = nullptr;
643
644
0
    int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
645
0
    if (fd < 0) {
646
0
      return PosixError(filename, errno);
647
0
    }
648
649
0
    if (!locks_.Insert(filename)) {
650
0
      ::close(fd);
651
0
      return Status::IOError("lock " + filename, "already held by process");
652
0
    }
653
654
0
    if (LockOrUnlock(fd, true) == -1) {
655
0
      int lock_errno = errno;
656
0
      ::close(fd);
657
0
      locks_.Remove(filename);
658
0
      return PosixError("lock " + filename, lock_errno);
659
0
    }
660
661
0
    *lock = new PosixFileLock(fd, filename);
662
0
    return Status::OK();
663
0
  }
664
665
0
  Status UnlockFile(FileLock* lock) override {
666
0
    PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
667
0
    if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
668
0
      return PosixError("unlock " + posix_file_lock->filename(), errno);
669
0
    }
670
0
    locks_.Remove(posix_file_lock->filename());
671
0
    ::close(posix_file_lock->fd());
672
0
    delete posix_file_lock;
673
0
    return Status::OK();
674
0
  }
675
676
  void Schedule(void (*background_work_function)(void* background_work_arg),
677
                void* background_work_arg) override;
678
679
  void StartThread(void (*thread_main)(void* thread_main_arg),
680
0
                   void* thread_main_arg) override {
681
0
    std::thread new_thread(thread_main, thread_main_arg);
682
0
    new_thread.detach();
683
0
  }
684
685
0
  Status GetTestDirectory(std::string* result) override {
686
0
    const char* env = std::getenv("TEST_TMPDIR");
687
0
    if (env && env[0] != '\0') {
688
0
      *result = env;
689
0
    } else {
690
0
      char buf[100];
691
0
      std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
692
0
                    static_cast<int>(::geteuid()));
693
0
      *result = buf;
694
0
    }
695
696
    // The CreateDir status is ignored because the directory may already exist.
697
0
    CreateDir(*result);
698
699
0
    return Status::OK();
700
0
  }
701
702
0
  Status NewLogger(const std::string& filename, Logger** result) override {
703
0
    int fd = ::open(filename.c_str(),
704
0
                    O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
705
0
    if (fd < 0) {
706
0
      *result = nullptr;
707
0
      return PosixError(filename, errno);
708
0
    }
709
710
0
    std::FILE* fp = ::fdopen(fd, "w");
711
0
    if (fp == nullptr) {
712
0
      ::close(fd);
713
0
      *result = nullptr;
714
0
      return PosixError(filename, errno);
715
0
    } else {
716
0
      *result = new PosixLogger(fp);
717
0
      return Status::OK();
718
0
    }
719
0
  }
720
721
0
  uint64_t NowMicros() override {
722
0
    static constexpr uint64_t kUsecondsPerSecond = 1000000;
723
0
    struct ::timeval tv;
724
0
    ::gettimeofday(&tv, nullptr);
725
0
    return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
726
0
  }
727
728
0
  void SleepForMicroseconds(int micros) override {
729
0
    std::this_thread::sleep_for(std::chrono::microseconds(micros));
730
0
  }
731
732
 private:
733
  void BackgroundThreadMain();
734
735
0
  static void BackgroundThreadEntryPoint(PosixEnv* env) {
736
0
    env->BackgroundThreadMain();
737
0
  }
738
739
  // Stores the work item data in a Schedule() call.
740
  //
741
  // Instances are constructed on the thread calling Schedule() and used on the
742
  // background thread.
743
  //
744
  // This structure is thread-safe beacuse it is immutable.
745
  struct BackgroundWorkItem {
746
    explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
747
0
        : function(function), arg(arg) {}
748
749
    void (*const function)(void*);
750
    void* const arg;
751
  };
752
753
  port::Mutex background_work_mutex_;
754
  port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
755
  bool started_background_thread_ GUARDED_BY(background_work_mutex_);
756
757
  std::queue<BackgroundWorkItem> background_work_queue_
758
      GUARDED_BY(background_work_mutex_);
759
760
  PosixLockTable locks_;  // Thread-safe.
761
  Limiter mmap_limiter_;  // Thread-safe.
762
  Limiter fd_limiter_;    // Thread-safe.
763
};
764
765
// Return the maximum number of concurrent mmaps.
766
0
int MaxMmaps() { return g_mmap_limit; }
767
768
// Return the maximum number of read-only files to keep open.
769
0
int MaxOpenFiles() {
770
0
  if (g_open_read_only_file_limit >= 0) {
771
0
    return g_open_read_only_file_limit;
772
0
  }
773
0
  struct ::rlimit rlim;
774
0
  if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
775
    // getrlimit failed, fallback to hard-coded default.
776
0
    g_open_read_only_file_limit = 50;
777
0
  } else if (rlim.rlim_cur == RLIM_INFINITY) {
778
0
    g_open_read_only_file_limit = std::numeric_limits<int>::max();
779
0
  } else {
780
    // Allow use of 20% of available file descriptors for read-only files.
781
0
    g_open_read_only_file_limit = rlim.rlim_cur / 5;
782
0
  }
783
0
  return g_open_read_only_file_limit;
784
0
}
785
786
}  // namespace
787
788
PosixEnv::PosixEnv()
789
0
    : background_work_cv_(&background_work_mutex_),
790
0
      started_background_thread_(false),
791
0
      mmap_limiter_(MaxMmaps()),
792
0
      fd_limiter_(MaxOpenFiles()) {}
793
794
void PosixEnv::Schedule(
795
    void (*background_work_function)(void* background_work_arg),
796
0
    void* background_work_arg) {
797
0
  background_work_mutex_.Lock();
798
799
  // Start the background thread, if we haven't done so already.
800
0
  if (!started_background_thread_) {
801
0
    started_background_thread_ = true;
802
0
    std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
803
0
    background_thread.detach();
804
0
  }
805
806
  // If the queue is empty, the background thread may be waiting for work.
807
0
  if (background_work_queue_.empty()) {
808
0
    background_work_cv_.Signal();
809
0
  }
810
811
0
  background_work_queue_.emplace(background_work_function, background_work_arg);
812
0
  background_work_mutex_.Unlock();
813
0
}
814
815
0
void PosixEnv::BackgroundThreadMain() {
816
0
  while (true) {
817
0
    background_work_mutex_.Lock();
818
819
    // Wait until there is work to be done.
820
0
    while (background_work_queue_.empty()) {
821
0
      background_work_cv_.Wait();
822
0
    }
823
824
0
    assert(!background_work_queue_.empty());
825
0
    auto background_work_function = background_work_queue_.front().function;
826
0
    void* background_work_arg = background_work_queue_.front().arg;
827
0
    background_work_queue_.pop();
828
829
0
    background_work_mutex_.Unlock();
830
0
    background_work_function(background_work_arg);
831
0
  }
832
0
}
833
834
namespace {
835
836
// Wraps an Env instance whose destructor is never created.
837
//
838
// Intended usage:
839
//   using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
840
//   void ConfigurePosixEnv(int param) {
841
//     PlatformSingletonEnv::AssertEnvNotInitialized();
842
//     // set global configuration flags.
843
//   }
844
//   Env* Env::Default() {
845
//     static PlatformSingletonEnv default_env;
846
//     return default_env.env();
847
//   }
848
template <typename EnvType>
849
class SingletonEnv {
850
 public:
851
0
  SingletonEnv() {
852
0
#if !defined(NDEBUG)
853
0
    env_initialized_.store(true, std::memory_order_relaxed);
854
0
#endif  // !defined(NDEBUG)
855
0
    static_assert(sizeof(env_storage_) >= sizeof(EnvType),
856
0
                  "env_storage_ will not fit the Env");
857
0
    static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
858
0
                  "env_storage_ does not meet the Env's alignment needs");
859
0
    new (&env_storage_) EnvType();
860
0
  }
861
  ~SingletonEnv() = default;
862
863
  SingletonEnv(const SingletonEnv&) = delete;
864
  SingletonEnv& operator=(const SingletonEnv&) = delete;
865
866
0
  Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
867
868
0
  static void AssertEnvNotInitialized() {
869
0
#if !defined(NDEBUG)
870
0
    assert(!env_initialized_.load(std::memory_order_relaxed));
871
0
#endif  // !defined(NDEBUG)
872
0
  }
873
874
 private:
875
  typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
876
      env_storage_;
877
#if !defined(NDEBUG)
878
  static std::atomic<bool> env_initialized_;
879
#endif  // !defined(NDEBUG)
880
};
881
882
#if !defined(NDEBUG)
883
template <typename EnvType>
884
std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
885
#endif  // !defined(NDEBUG)
886
887
using PosixDefaultEnv = SingletonEnv<PosixEnv>;
888
889
}  // namespace
890
891
0
void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
892
0
  PosixDefaultEnv::AssertEnvNotInitialized();
893
0
  g_open_read_only_file_limit = limit;
894
0
}
895
896
0
void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
897
0
  PosixDefaultEnv::AssertEnvNotInitialized();
898
0
  g_mmap_limit = limit;
899
0
}
900
901
0
Env* Env::Default() {
902
0
  static PosixDefaultEnv env_container;
903
0
  return env_container.env();
904
0
}
905
906
}  // namespace leveldb