Skip to content

Commit

Permalink
[Backport 2.6] [#9898] docdb: Fix queries on system.partitions when u…
Browse files Browse the repository at this point in the history
…nable to resolve some addresses

Summary:
Fixing two issues:
1. YQL system.partitions vtable performs host name resolution to determine the replica addresses. If this resolution fails, then we fail to create the vtable and return the dns resolution error. Fixing this by only including hosts that we are able to resolve in the `replica_addresses` column of `system.partitions`, and logging a warning in case we cannot.
2. Fixing issue where IsMultiMaster would incorrectly return false due to being unable to resolve one of the master addresses in `full_master_server_addrs[0]`. Fixed by parsing `full_master_addrs[0]` earlier and returning true if we have multiple addresses there, and only fallback to resolving if there is only one address and it may resolve to multiple endpoints.

Original commit / diff : e796408 / D13254

Test Plan:
```
 ybd --cxx_test cql-test --gtest_filter CqlThreeMastersTest.HostnameResolutionFailureInYqlPartitionsTable

ybd --cxx_test client-test --gtest_filter ClientTestWithThreeMasters.IsMultiMasterWithFailingHostnameResolution
```

Reviewers: bogdan, sergei

Reviewed By: sergei

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D13350
  • Loading branch information
hulien22 committed Oct 8, 2021
1 parent 334eba6 commit 9136dce
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 11 deletions.
22 changes: 18 additions & 4 deletions src/yb/client/client-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2344,11 +2344,25 @@ bool YBClient::Data::IsMultiMaster() {
if (full_master_server_addrs_.size() > 1) {
return true;
}
// For single entry case, check if it is a list of host/ports.
std::vector<Endpoint> addrs;
const auto status = ParseAddressList(full_master_server_addrs_[0],

// For single entry case, first check if it is a list of hosts/ports.
std::vector<HostPort> host_ports;
auto status = HostPort::ParseStrings(full_master_server_addrs_[0],
yb::master::kMasterDefaultPort,
&addrs);
&host_ports);
if (!status.ok()) {
// Will fail ResolveAddresses as well, so log error and return false early.
LOG(WARNING) << "Failure parsing address list: " << full_master_server_addrs_[0]
<< ": " << status;
return false;
}
if (host_ports.size() > 1) {
return true;
}

// If we only have one HostPort, check if it resolves to multiple endpoints.
std::vector<Endpoint> addrs;
status = host_ports[0].ResolveAddresses(&addrs);
return status.ok() && (addrs.size() > 1);
}

Expand Down
64 changes: 61 additions & 3 deletions src/yb/client/client-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ DECLARE_int32(min_backoff_ms_exponent);
DECLARE_int32(max_backoff_ms_exponent);
DECLARE_bool(TEST_force_master_lookup_all_tablets);
DECLARE_double(TEST_simulate_lookup_timeout_probability);
DECLARE_string(TEST_fail_to_fast_resolve_address);

METRIC_DECLARE_counter(rpcs_queue_overflow);

Expand Down Expand Up @@ -178,13 +179,12 @@ class ClientTest: public YBMiniClusterTestBase<MiniCluster> {
// Start minicluster and wait for tablet servers to connect to master.
auto opts = MiniClusterOptions();
opts.num_tablet_servers = 3;
opts.num_masters = NumMasters();
cluster_.reset(new MiniCluster(env_.get(), opts));
ASSERT_OK(cluster_->Start());

// Connect to the cluster.
client_ = ASSERT_RESULT(YBClientBuilder()
.add_master_server_addr(yb::ToString(cluster_->mini_master()->bound_rpc_addr()))
.Build());
ASSERT_OK(InitClient());

// Create a keyspace;
ASSERT_OK(client_->CreateNamespace(kKeyspaceName));
Expand All @@ -207,6 +207,17 @@ class ClientTest: public YBMiniClusterTestBase<MiniCluster> {
static const YBTableName kTable2Name;
static const YBTableName kTable3Name;

virtual int NumMasters() {
return 1;
}

virtual Status InitClient() {
client_ = VERIFY_RESULT(YBClientBuilder()
.add_master_server_addr(yb::ToString(cluster_->mini_master()->bound_rpc_addr()))
.Build());
return Status::OK();
}

string GetFirstTabletId(YBTable* table) {
GetTableLocationsRequestPB req;
GetTableLocationsResponsePB resp;
Expand Down Expand Up @@ -2659,5 +2670,52 @@ TEST_F_EX(ClientTest, YB_DISABLE_TEST_IN_TSAN(EmptiedBatcherFlush), ClientTestWi
thread_holder.JoinAll();
}

class ClientTestWithThreeMasters : public ClientTest {
protected:
int NumMasters() override {
return 3;
}

Status InitClient() override {
// Connect to the cluster using hostnames.
string master_addrs;
for (int i = 1; i <= NumMasters(); ++i) {
// TEST_RpcAddress is 1-indexed, but mini_master is 0-indexed.
master_addrs += server::TEST_RpcAddress(i, server::Private::kFalse) +
":" + yb::ToString(cluster_->mini_master(i - 1)->bound_rpc_addr().port());
if (i < NumMasters()) {
master_addrs += ",";
}
}

client_ = VERIFY_RESULT(YBClientBuilder()
.add_master_server_addr(master_addrs)
.Build());

return Status::OK();
}
};

TEST_F_EX(ClientTest, IsMultiMasterWithFailingHostnameResolution, ClientTestWithThreeMasters) {
google::FlagSaver flag_saver;
// TEST_RpcAddress is 1-indexed.
string hostname = server::TEST_RpcAddress(cluster_->LeaderMasterIdx() + 1,
server::Private::kFalse);

// Shutdown the master leader.
cluster_->leader_mini_master()->Shutdown();

// Fail resolution of the old leader master's hostname.
FLAGS_TEST_fail_to_fast_resolve_address = hostname;
LOG(INFO) << "Setting FLAGS_TEST_fail_to_fast_resolve_address to: "
<< FLAGS_TEST_fail_to_fast_resolve_address;

// Make a client request to the leader master, since that master is no longer the leader, we will
// check that we have a MultiMaster setup. That check should not fail even though one of the
// master addresses currently doesn't resolve. Thus, we should be able to find the new master
// leader and complete the request.
ASSERT_RESULT(client_->ListTables());
}

} // namespace client
} // namespace yb
55 changes: 55 additions & 0 deletions src/yb/integration-tests/cql-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "yb/consensus/raft_consensus.h"

#include "yb/master/mini_master.h"

#include "yb/util/random_util.h"
#include "yb/util/test_macros.h"
#include "yb/util/test_util.h"
Expand All @@ -24,6 +26,9 @@ using namespace std::literals;
DECLARE_int64(cql_processors_limit);
DECLARE_int32(client_read_write_timeout_ms);

DECLARE_string(TEST_fail_to_fast_resolve_address);
DECLARE_int32(partitions_vtable_cache_refresh_secs);

namespace yb {

class CqlTest : public CqlTestBase {
Expand Down Expand Up @@ -217,4 +222,54 @@ TEST_F(CqlTest, RecreateTableWithInserts) {
}
}

class CqlThreeMastersTest : public CqlTest {
public:
void SetUp() override {
FLAGS_partitions_vtable_cache_refresh_secs = 0;
CqlTest::SetUp();
}

int num_masters() override {
return 3;
}
};

Status CheckNumAddressesInYqlPartitionsTable(CassandraSession* session, int expected_num_addrs) {
const int kReplicaAddressesIndex = 5;
auto result = VERIFY_RESULT(session->ExecuteWithResult("SELECT * FROM system.partitions"));
auto iterator = result.CreateIterator();
while (iterator.Next()) {
auto replica_addresses = iterator.Row().Value(kReplicaAddressesIndex).ToString();
int num_addrs = 0;
if (replica_addresses.size() > std::strlen("{}")) {
num_addrs = std::count(replica_addresses.begin(), replica_addresses.end(), ',') + 1;
}

EXPECT_EQ(expected_num_addrs, num_addrs);
}
return Status::OK();
}

TEST_F(CqlThreeMastersTest, HostnameResolutionFailureInYqlPartitionsTable) {
google::FlagSaver flag_saver;
auto session = ASSERT_RESULT(EstablishSession(driver_.get()));
ASSERT_OK(CheckNumAddressesInYqlPartitionsTable(&session, 3));

// TEST_RpcAddress is 1-indexed.
string hostname = server::TEST_RpcAddress(cluster_->LeaderMasterIdx() + 1,
server::Private::kFalse);

// Shutdown the master leader.
cluster_->leader_mini_master()->Shutdown();

// Fail resolution of the old leader master's hostname.
FLAGS_TEST_fail_to_fast_resolve_address = hostname;
LOG(INFO) << "Setting FLAGS_TEST_fail_to_fast_resolve_address to: "
<< FLAGS_TEST_fail_to_fast_resolve_address;

// Assert that a new call will succeed, but will be missing the shutdown master address.
ASSERT_OK(CheckNumAddressesInYqlPartitionsTable(&session, 2));
}


} // namespace yb
3 changes: 2 additions & 1 deletion src/yb/integration-tests/cql_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ void CqlTestBase::SetUp() {
YBMiniClusterTestBase<MiniCluster>::SetUp();

MiniClusterOptions options;
options.num_tablet_servers = 3;
options.num_masters = num_masters();
options.num_tablet_servers = num_tablet_servers();
cluster_ = std::make_unique<MiniCluster>(env_.get(), options);
ASSERT_OK(cluster_->Start());

Expand Down
11 changes: 11 additions & 0 deletions src/yb/integration-tests/cql_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ namespace yb {

class CqlTestBase : public MiniClusterTestWithClient<MiniCluster> {
public:
static constexpr auto kDefaultNumMasters = 1;
static constexpr auto kDefaultNumTabletServers = 3;

virtual int num_masters() {
return kDefaultNumMasters;
}

virtual int num_tablet_servers() {
return kDefaultNumTabletServers;
}

void SetUp() override;

protected:
Expand Down
15 changes: 12 additions & 3 deletions src/yb/master/yql_partitions_vtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,12 @@ Status YQLPartitionsVTable::GenerateAndCacheData() const {
std::unordered_map<std::string, InetAddress> dns_results;

for (auto& p : dns_lookups) {
dns_results.emplace(p.first, InetAddress(VERIFY_RESULT(p.second.get())));
const auto res = p.second.get();
if (!res.ok()) {
YB_LOG_EVERY_N_SECS(WARNING, 30) << "Unable to resolve host: " << res;
} else {
dns_results.emplace(p.first, InetAddress(res.get()));
}
}

// Reserve upfront memory, as we're likely to need to insert a row for each tablet.
Expand Down Expand Up @@ -162,9 +167,13 @@ Status YQLPartitionsVTable::GenerateAndCacheData() const {
QLMapValuePB *map_value = replica_addresses.mutable_map_value();
for (const auto& replica : data.locations->replicas()) {
auto host = DesiredHostPort(replica.ts_info(), CloudInfoPB()).host();
QLValue::set_inetaddress_value(dns_results[host], map_value->add_keys());

map_value->add_values()->set_string_value(consensus::RaftPeerPB::Role_Name(replica.role()));
// In case of resolution failure, we may not find the host in dns_results.
const auto addr = dns_results.find(host);
if (addr != dns_results.end()) {
QLValue::set_inetaddress_value(addr->second, map_value->add_keys());
map_value->add_values()->set_string_value(consensus::RaftPeerPB::Role_Name(replica.role()));
}
}
RETURN_NOT_OK(SetColumnValue(kReplicaAddresses, replica_addresses, &row));
}
Expand Down
7 changes: 7 additions & 0 deletions src/yb/util/net/net_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "yb/util/debug/trace_event.h"
#include "yb/util/errno.h"
#include "yb/util/faststring.h"
#include "yb/util/flag_tags.h"
#include "yb/util/env.h"
#include "yb/util/env_util.h"
#include "yb/util/memory/memory.h"
Expand Down Expand Up @@ -89,6 +90,9 @@ DEFINE_string(
"prefer external IPv4 "
"addresses first. Other options include ipv6_external,ipv6_non_link_local");

DEFINE_test_flag(string, fail_to_fast_resolve_address, "",
"A hostname to fail to fast resolve for tests.");

namespace yb {

namespace {
Expand Down Expand Up @@ -637,6 +641,9 @@ boost::optional<IpAddress> TryFastResolve(const std::string& host) {
// For testing purpose we resolve A.B.C.D.ip.yugabyte to A.B.C.D.
static const std::string kYbIpSuffix = ".ip.yugabyte";
if (boost::ends_with(host, kYbIpSuffix)) {
if (PREDICT_FALSE(host == FLAGS_TEST_fail_to_fast_resolve_address)) {
return boost::none;
}
boost::system::error_code ec;
auto address = IpAddress::from_string(
host.substr(0, host.length() - kYbIpSuffix.length()), ec);
Expand Down

0 comments on commit 9136dce

Please sign in to comment.