Skip to content

Commit a53ed91

Browse files
pdillingerfacebook-github-bot
authored andcommitted
Fix/improve temperature handling for file ingestion (facebook#12402)
Summary: Partly following up on leftovers from facebook#12388 In terms of public API: * Make it clear that IngestExternalFileArg::file_temperature is just a hint for opening the existing file, though it was previously used for both copy-from temp hint and copy-to temp, which was bizarre. * Specify how IngestExternalFile assigns temperature to file ingested into DB. (See details in comments.) This approach is not perfect in terms of matching how the DB assigns temperatures, but was the simplest way to get close. The key complication for matching DB temperature assignments is that ingestion files are copied (to a destination temp) before their target level is determined (in general). * Add a temperature option to SstFileWriter::Open so that files intended for ingestion can be initially written to a chosen temperature. * Note that "fail_if_not_bottommost_level" is obsolete/confusing use of "bottommost" In terms of the implementation, there was a similar bit of oddness with the internal CopyFile API, which only took one temperature, ambiguously applicable to the source, destination, or both. This is also fixed. Eventual suggested follow-up: * Before copying files for ingestion, determine a tentative level assignment to use for destination temperature, and keep that even if final level assignment happens to be different at commit time (rare). * More temperature handling for CreateColumnFamilyWithImport and Checkpoints. Pull Request resolved: facebook#12402 Test Plan: Deeply revamped ExternalSSTFileBasicTest.IngestWithTemperature to test the new changes. Previously this test was insufficient because it was only looking at temperatures according to the DB manifest. Incorporating FileTemperatureTestFS allows us to also test the temperatures in the storage layer. Used macros instead of functions for better tracing to critical source location on test failures. Some enhancements to FileTemperatureTestFS in the process of developing the revamped test. Reviewed By: jowlyzhang Differential Revision: D54442794 Pulled By: pdillinger fbshipit-source-id: 41d9d0afdc073e6a983304c10bbc07c70cc7e995
1 parent 3412195 commit a53ed91

14 files changed

+287
-148
lines changed

db/db_test_util.h

+37-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "db/db_impl/db_impl.h"
2626
#include "file/filename.h"
27+
#include "options/options_helper.h"
2728
#include "rocksdb/advanced_options.h"
2829
#include "rocksdb/cache.h"
2930
#include "rocksdb/compaction_filter.h"
@@ -729,7 +730,11 @@ class FileTemperatureTestFS : public FileSystemWrapper {
729730
if (e != current_sst_file_temperatures_.end() &&
730731
e->second != opts.temperature) {
731732
result->reset();
732-
return IOStatus::PathNotFound("Temperature mismatch on " + fname);
733+
return IOStatus::PathNotFound(
734+
"Read requested temperature " +
735+
temperature_to_string[opts.temperature] +
736+
" but stored with temperature " +
737+
temperature_to_string[e->second] + " for " + fname);
733738
}
734739
}
735740
*result = WrapWithTemperature<FSSequentialFileOwnerWrapper>(
@@ -758,7 +763,11 @@ class FileTemperatureTestFS : public FileSystemWrapper {
758763
if (e != current_sst_file_temperatures_.end() &&
759764
e->second != opts.temperature) {
760765
result->reset();
761-
return IOStatus::PathNotFound("Temperature mismatch on " + fname);
766+
return IOStatus::PathNotFound(
767+
"Read requested temperature " +
768+
temperature_to_string[opts.temperature] +
769+
" but stored with temperature " +
770+
temperature_to_string[e->second] + " for " + fname);
762771
}
763772
}
764773
*result = WrapWithTemperature<FSRandomAccessFileOwnerWrapper>(
@@ -792,11 +801,37 @@ class FileTemperatureTestFS : public FileSystemWrapper {
792801
return target()->NewWritableFile(fname, opts, result, dbg);
793802
}
794803

804+
IOStatus DeleteFile(const std::string& fname, const IOOptions& options,
805+
IODebugContext* dbg) override {
806+
IOStatus ios = target()->DeleteFile(fname, options, dbg);
807+
if (ios.ok()) {
808+
uint64_t number;
809+
FileType type;
810+
if (ParseFileName(GetFileName(fname), &number, &type) &&
811+
type == kTableFile) {
812+
MutexLock lock(&mu_);
813+
current_sst_file_temperatures_.erase(number);
814+
}
815+
}
816+
return ios;
817+
}
818+
795819
void CopyCurrentSstFileTemperatures(std::map<uint64_t, Temperature>* out) {
796820
MutexLock lock(&mu_);
797821
*out = current_sst_file_temperatures_;
798822
}
799823

824+
size_t CountCurrentSstFilesWithTemperature(Temperature temp) {
825+
MutexLock lock(&mu_);
826+
size_t count = 0;
827+
for (const auto& e : current_sst_file_temperatures_) {
828+
if (e.second == temp) {
829+
++count;
830+
}
831+
}
832+
return count;
833+
}
834+
800835
void OverrideSstFileTemperature(uint64_t number, Temperature temp) {
801836
MutexLock lock(&mu_);
802837
current_sst_file_temperatures_[number] = temp;

db/external_sst_file_basic_test.cc

+152-86
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "rocksdb/sst_file_writer.h"
1515
#include "test_util/testharness.h"
1616
#include "test_util/testutil.h"
17+
#include "util/defer.h"
1718
#include "util/random.h"
1819
#include "utilities/fault_injection_env.h"
1920

@@ -1861,100 +1862,165 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) {
18611862
}
18621863

18631864
TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) {
1864-
Options options = CurrentOptions();
1865-
const ImmutableCFOptions ioptions(options);
1866-
options.last_level_temperature = Temperature::kWarm;
1867-
SstFileWriter sst_file_writer(EnvOptions(), options);
1868-
options.level0_file_num_compaction_trigger = 2;
1869-
Reopen(options);
1865+
// Rather than doubling the running time of this test, this boolean
1866+
// field gets a random starting value and then alternates between
1867+
// true and false.
1868+
bool alternate_hint = Random::GetTLSInstance()->OneIn(2);
1869+
Destroy(CurrentOptions());
18701870

1871-
auto size = GetSstSizeHelper(Temperature::kUnknown);
1872-
ASSERT_EQ(size, 0);
1873-
size = GetSstSizeHelper(Temperature::kWarm);
1874-
ASSERT_EQ(size, 0);
1875-
size = GetSstSizeHelper(Temperature::kHot);
1876-
ASSERT_EQ(size, 0);
1871+
for (std::string mode : {"ingest_behind", "fail_if_not", "neither"}) {
1872+
SCOPED_TRACE("Mode: " + mode);
18771873

1878-
// create file01.sst (1000 => 1099) and ingest it
1879-
std::string file1 = sst_files_dir_ + "file01.sst";
1880-
ASSERT_OK(sst_file_writer.Open(file1));
1881-
for (int k = 1000; k < 1100; k++) {
1882-
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
1874+
Options options = CurrentOptions();
1875+
1876+
auto test_fs =
1877+
std::make_shared<FileTemperatureTestFS>(options.env->GetFileSystem());
1878+
std::unique_ptr<Env> env(new CompositeEnvWrapper(options.env, test_fs));
1879+
options.env = env.get();
1880+
1881+
const ImmutableCFOptions ioptions(options);
1882+
options.last_level_temperature = Temperature::kCold;
1883+
options.default_write_temperature = Temperature::kHot;
1884+
SstFileWriter sst_file_writer(EnvOptions(), options);
1885+
options.level0_file_num_compaction_trigger = 2;
1886+
options.allow_ingest_behind = (mode == "ingest_behind");
1887+
Reopen(options);
1888+
Defer destroyer([&]() { Destroy(options); });
1889+
1890+
#define VERIFY_SST_COUNT(temp, expected_count_in_db, \
1891+
expected_count_outside_db) \
1892+
{ \
1893+
/* Partially verify against FileSystem */ \
1894+
ASSERT_EQ( \
1895+
test_fs->CountCurrentSstFilesWithTemperature(temp), \
1896+
size_t{expected_count_in_db} + size_t{expected_count_outside_db}); \
1897+
/* Partially verify against DB manifest */ \
1898+
if (expected_count_in_db == 0) { \
1899+
ASSERT_EQ(GetSstSizeHelper(temp), 0); \
1900+
} else { \
1901+
ASSERT_GE(GetSstSizeHelper(temp), 1); \
1902+
} \
18831903
}
1884-
ExternalSstFileInfo file1_info;
1885-
Status s = sst_file_writer.Finish(&file1_info);
1886-
ASSERT_OK(s);
1887-
ASSERT_EQ(file1_info.file_path, file1);
1888-
ASSERT_EQ(file1_info.num_entries, 100);
1889-
ASSERT_EQ(file1_info.smallest_key, Key(1000));
1890-
ASSERT_EQ(file1_info.largest_key, Key(1099));
18911904

1892-
std::vector<std::string> files;
1893-
std::vector<std::string> files_checksums;
1894-
std::vector<std::string> files_checksum_func_names;
1895-
Temperature file_temperature = Temperature::kWarm;
1896-
1897-
files.push_back(file1);
1898-
IngestExternalFileOptions in_opts;
1899-
in_opts.move_files = false;
1900-
in_opts.snapshot_consistency = true;
1901-
in_opts.allow_global_seqno = false;
1902-
in_opts.allow_blocking_flush = false;
1903-
in_opts.write_global_seqno = true;
1904-
in_opts.verify_file_checksum = false;
1905-
IngestExternalFileArg arg;
1906-
arg.column_family = db_->DefaultColumnFamily();
1907-
arg.external_files = files;
1908-
arg.options = in_opts;
1909-
arg.files_checksums = files_checksums;
1910-
arg.files_checksum_func_names = files_checksum_func_names;
1911-
arg.file_temperature = file_temperature;
1912-
s = db_->IngestExternalFiles({arg});
1913-
ASSERT_OK(s);
1905+
size_t ex_unknown_in_db = 0;
1906+
size_t ex_hot_in_db = 0;
1907+
size_t ex_warm_in_db = 0;
1908+
size_t ex_cold_in_db = 0;
1909+
size_t ex_unknown_outside_db = 0;
1910+
size_t ex_hot_outside_db = 0;
1911+
size_t ex_warm_outside_db = 0;
1912+
size_t ex_cold_outside_db = 0;
1913+
#define VERIFY_SST_COUNTS() \
1914+
{ \
1915+
VERIFY_SST_COUNT(Temperature::kUnknown, ex_unknown_in_db, \
1916+
ex_unknown_outside_db); \
1917+
VERIFY_SST_COUNT(Temperature::kHot, ex_hot_in_db, ex_hot_outside_db); \
1918+
VERIFY_SST_COUNT(Temperature::kWarm, ex_warm_in_db, ex_warm_outside_db); \
1919+
VERIFY_SST_COUNT(Temperature::kCold, ex_cold_in_db, ex_cold_outside_db); \
1920+
}
19141921

1915-
// check the temperature of the file being ingested
1916-
ColumnFamilyMetaData metadata;
1917-
db_->GetColumnFamilyMetaData(&metadata);
1918-
ASSERT_EQ(1, metadata.file_count);
1919-
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
1920-
size = GetSstSizeHelper(Temperature::kUnknown);
1921-
ASSERT_EQ(size, 0);
1922-
size = GetSstSizeHelper(Temperature::kWarm);
1923-
ASSERT_GT(size, 1);
1922+
// Create sst file, using a name recognized by FileTemperatureTestFS and
1923+
// specified temperature
1924+
std::string file1 = sst_files_dir_ + "9000000.sst";
1925+
ASSERT_OK(sst_file_writer.Open(file1, Temperature::kWarm));
1926+
for (int k = 1000; k < 1100; k++) {
1927+
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
1928+
}
1929+
ExternalSstFileInfo file1_info;
1930+
Status s = sst_file_writer.Finish(&file1_info);
1931+
ASSERT_OK(s);
19241932

1925-
// non-bottommost file still has unknown temperature
1926-
ASSERT_OK(Put("foo", "bar"));
1927-
ASSERT_OK(Put("bar", "bar"));
1928-
ASSERT_OK(Flush());
1929-
db_->GetColumnFamilyMetaData(&metadata);
1930-
ASSERT_EQ(2, metadata.file_count);
1931-
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
1932-
size = GetSstSizeHelper(Temperature::kUnknown);
1933-
ASSERT_GT(size, 0);
1934-
size = GetSstSizeHelper(Temperature::kWarm);
1935-
ASSERT_GT(size, 0);
1933+
ex_warm_outside_db++;
1934+
VERIFY_SST_COUNTS();
1935+
1936+
ASSERT_EQ(file1_info.file_path, file1);
1937+
ASSERT_EQ(file1_info.num_entries, 100);
1938+
ASSERT_EQ(file1_info.smallest_key, Key(1000));
1939+
ASSERT_EQ(file1_info.largest_key, Key(1099));
1940+
1941+
std::vector<std::string> files;
1942+
std::vector<std::string> files_checksums;
1943+
std::vector<std::string> files_checksum_func_names;
1944+
1945+
files.push_back(file1);
1946+
IngestExternalFileOptions in_opts;
1947+
in_opts.move_files = false;
1948+
in_opts.snapshot_consistency = true;
1949+
in_opts.allow_global_seqno = false;
1950+
in_opts.allow_blocking_flush = false;
1951+
in_opts.write_global_seqno = true;
1952+
in_opts.verify_file_checksum = false;
1953+
in_opts.ingest_behind = (mode == "ingest_behind");
1954+
in_opts.fail_if_not_bottommost_level = (mode == "fail_if_not");
1955+
IngestExternalFileArg arg;
1956+
arg.column_family = db_->DefaultColumnFamily();
1957+
arg.external_files = files;
1958+
arg.options = in_opts;
1959+
arg.files_checksums = files_checksums;
1960+
arg.files_checksum_func_names = files_checksum_func_names;
1961+
if ((alternate_hint = !alternate_hint)) {
1962+
// Provide correct hint (for optimal file open performance)
1963+
arg.file_temperature = Temperature::kWarm;
1964+
} else {
1965+
// No hint (also works because ingestion will read the temperature
1966+
// according to storage)
1967+
arg.file_temperature = Temperature::kUnknown;
1968+
}
1969+
s = db_->IngestExternalFiles({arg});
1970+
ASSERT_OK(s);
19361971

1937-
// reopen and check the information is persisted
1938-
Reopen(options);
1939-
db_->GetColumnFamilyMetaData(&metadata);
1940-
ASSERT_EQ(2, metadata.file_count);
1941-
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
1942-
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
1943-
size = GetSstSizeHelper(Temperature::kUnknown);
1944-
ASSERT_GT(size, 0);
1945-
size = GetSstSizeHelper(Temperature::kWarm);
1946-
ASSERT_GT(size, 0);
1972+
// check the temperature of the file ingested (copied)
1973+
ColumnFamilyMetaData metadata;
1974+
db_->GetColumnFamilyMetaData(&metadata);
1975+
ASSERT_EQ(1, metadata.file_count);
19471976

1948-
// check other non-exist temperatures
1949-
size = GetSstSizeHelper(Temperature::kHot);
1950-
ASSERT_EQ(size, 0);
1951-
size = GetSstSizeHelper(Temperature::kCold);
1952-
ASSERT_EQ(size, 0);
1953-
std::string prop;
1954-
ASSERT_TRUE(dbfull()->GetProperty(
1955-
DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
1956-
&prop));
1957-
ASSERT_EQ(std::atoi(prop.c_str()), 0);
1977+
if (mode != "neither") {
1978+
ASSERT_EQ(Temperature::kCold, metadata.levels[6].files[0].temperature);
1979+
ex_cold_in_db++;
1980+
} else {
1981+
// Currently, we are only able to use last_level_temperature for ingestion
1982+
// when using an ingestion option that guarantees ingestion to last level.
1983+
ASSERT_EQ(Temperature::kHot, metadata.levels[6].files[0].temperature);
1984+
ex_hot_in_db++;
1985+
}
1986+
VERIFY_SST_COUNTS();
1987+
1988+
// non-bottommost file still has kHot temperature
1989+
ASSERT_OK(Put("foo", "bar"));
1990+
ASSERT_OK(Put("bar", "bar"));
1991+
ASSERT_OK(Flush());
1992+
db_->GetColumnFamilyMetaData(&metadata);
1993+
ASSERT_EQ(2, metadata.file_count);
1994+
ASSERT_EQ(Temperature::kHot, metadata.levels[0].files[0].temperature);
1995+
1996+
ex_hot_in_db++;
1997+
VERIFY_SST_COUNTS();
1998+
1999+
// reopen and check the information is persisted
2000+
Reopen(options);
2001+
db_->GetColumnFamilyMetaData(&metadata);
2002+
ASSERT_EQ(2, metadata.file_count);
2003+
ASSERT_EQ(Temperature::kHot, metadata.levels[0].files[0].temperature);
2004+
if (mode != "neither") {
2005+
ASSERT_EQ(Temperature::kCold, metadata.levels[6].files[0].temperature);
2006+
} else {
2007+
ASSERT_EQ(Temperature::kHot, metadata.levels[6].files[0].temperature);
2008+
}
2009+
2010+
// (no change)
2011+
VERIFY_SST_COUNTS();
2012+
2013+
// check invalid temperature with DB property. Not sure why the original
2014+
// author is testing this case, but perhaps so that downgrading DB with
2015+
// new GetProperty code using a new Temperature will report something
2016+
// reasonable and not an error.
2017+
std::string prop;
2018+
ASSERT_TRUE(dbfull()->GetProperty(
2019+
DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
2020+
&prop));
2021+
ASSERT_EQ(std::atoi(prop.c_str()), 0);
2022+
#undef VERIFY_SST_COUNT
2023+
}
19582024
}
19592025

19602026
TEST_F(ExternalSSTFileBasicTest, FailIfNotBottommostLevel) {

0 commit comments

Comments
 (0)