diff --git a/src/yb/client/client-internal.cc b/src/yb/client/client-internal.cc index 02e870ff2093..3a7ecf5ae7f1 100644 --- a/src/yb/client/client-internal.cc +++ b/src/yb/client/client-internal.cc @@ -2344,11 +2344,25 @@ bool YBClient::Data::IsMultiMaster() { if (full_master_server_addrs_.size() > 1) { return true; } - // For single entry case, check if it is a list of host/ports. - std::vector addrs; - const auto status = ParseAddressList(full_master_server_addrs_[0], + + // For single entry case, first check if it is a list of hosts/ports. + std::vector host_ports; + auto status = HostPort::ParseStrings(full_master_server_addrs_[0], yb::master::kMasterDefaultPort, - &addrs); + &host_ports); + if (!status.ok()) { + // Will fail ResolveAddresses as well, so log error and return false early. + LOG(WARNING) << "Failure parsing address list: " << full_master_server_addrs_[0] + << ": " << status; + return false; + } + if (host_ports.size() > 1) { + return true; + } + + // If we only have one HostPort, check if it resolves to multiple endpoints. + std::vector addrs; + status = host_ports[0].ResolveAddresses(&addrs); return status.ok() && (addrs.size() > 1); } diff --git a/src/yb/client/client-test.cc b/src/yb/client/client-test.cc index 7ee0c50d2bc3..c5e1f0ca8d06 100644 --- a/src/yb/client/client-test.cc +++ b/src/yb/client/client-test.cc @@ -115,6 +115,7 @@ DECLARE_int32(min_backoff_ms_exponent); DECLARE_int32(max_backoff_ms_exponent); DECLARE_bool(TEST_force_master_lookup_all_tablets); DECLARE_double(TEST_simulate_lookup_timeout_probability); +DECLARE_string(TEST_fail_to_fast_resolve_address); METRIC_DECLARE_counter(rpcs_queue_overflow); @@ -178,13 +179,12 @@ class ClientTest: public YBMiniClusterTestBase { // Start minicluster and wait for tablet servers to connect to master. auto opts = MiniClusterOptions(); opts.num_tablet_servers = 3; + opts.num_masters = NumMasters(); cluster_.reset(new MiniCluster(env_.get(), opts)); ASSERT_OK(cluster_->Start()); // Connect to the cluster. - client_ = ASSERT_RESULT(YBClientBuilder() - .add_master_server_addr(yb::ToString(cluster_->mini_master()->bound_rpc_addr())) - .Build()); + ASSERT_OK(InitClient()); // Create a keyspace; ASSERT_OK(client_->CreateNamespace(kKeyspaceName)); @@ -207,6 +207,17 @@ class ClientTest: public YBMiniClusterTestBase { static const YBTableName kTable2Name; static const YBTableName kTable3Name; + virtual int NumMasters() { + return 1; + } + + virtual Status InitClient() { + client_ = VERIFY_RESULT(YBClientBuilder() + .add_master_server_addr(yb::ToString(cluster_->mini_master()->bound_rpc_addr())) + .Build()); + return Status::OK(); + } + string GetFirstTabletId(YBTable* table) { GetTableLocationsRequestPB req; GetTableLocationsResponsePB resp; @@ -2659,5 +2670,52 @@ TEST_F_EX(ClientTest, YB_DISABLE_TEST_IN_TSAN(EmptiedBatcherFlush), ClientTestWi thread_holder.JoinAll(); } +class ClientTestWithThreeMasters : public ClientTest { + protected: + int NumMasters() override { + return 3; + } + + Status InitClient() override { + // Connect to the cluster using hostnames. + string master_addrs; + for (int i = 1; i <= NumMasters(); ++i) { + // TEST_RpcAddress is 1-indexed, but mini_master is 0-indexed. + master_addrs += server::TEST_RpcAddress(i, server::Private::kFalse) + + ":" + yb::ToString(cluster_->mini_master(i - 1)->bound_rpc_addr().port()); + if (i < NumMasters()) { + master_addrs += ","; + } + } + + client_ = VERIFY_RESULT(YBClientBuilder() + .add_master_server_addr(master_addrs) + .Build()); + + return Status::OK(); + } +}; + +TEST_F_EX(ClientTest, IsMultiMasterWithFailingHostnameResolution, ClientTestWithThreeMasters) { + google::FlagSaver flag_saver; + // TEST_RpcAddress is 1-indexed. + string hostname = server::TEST_RpcAddress(cluster_->LeaderMasterIdx() + 1, + server::Private::kFalse); + + // Shutdown the master leader. + cluster_->leader_mini_master()->Shutdown(); + + // Fail resolution of the old leader master's hostname. + FLAGS_TEST_fail_to_fast_resolve_address = hostname; + LOG(INFO) << "Setting FLAGS_TEST_fail_to_fast_resolve_address to: " + << FLAGS_TEST_fail_to_fast_resolve_address; + + // Make a client request to the leader master, since that master is no longer the leader, we will + // check that we have a MultiMaster setup. That check should not fail even though one of the + // master addresses currently doesn't resolve. Thus, we should be able to find the new master + // leader and complete the request. + ASSERT_RESULT(client_->ListTables()); +} + } // namespace client } // namespace yb diff --git a/src/yb/integration-tests/cql-test.cc b/src/yb/integration-tests/cql-test.cc index b4736e8ccff0..00f0d25ce6e9 100644 --- a/src/yb/integration-tests/cql-test.cc +++ b/src/yb/integration-tests/cql-test.cc @@ -15,6 +15,8 @@ #include "yb/consensus/raft_consensus.h" +#include "yb/master/mini_master.h" + #include "yb/util/random_util.h" #include "yb/util/test_macros.h" #include "yb/util/test_util.h" @@ -24,6 +26,9 @@ using namespace std::literals; DECLARE_int64(cql_processors_limit); DECLARE_int32(client_read_write_timeout_ms); +DECLARE_string(TEST_fail_to_fast_resolve_address); +DECLARE_int32(partitions_vtable_cache_refresh_secs); + namespace yb { class CqlTest : public CqlTestBase { @@ -217,4 +222,54 @@ TEST_F(CqlTest, RecreateTableWithInserts) { } } +class CqlThreeMastersTest : public CqlTest { + public: + void SetUp() override { + FLAGS_partitions_vtable_cache_refresh_secs = 0; + CqlTest::SetUp(); + } + + int num_masters() override { + return 3; + } +}; + +Status CheckNumAddressesInYqlPartitionsTable(CassandraSession* session, int expected_num_addrs) { + const int kReplicaAddressesIndex = 5; + auto result = VERIFY_RESULT(session->ExecuteWithResult("SELECT * FROM system.partitions")); + auto iterator = result.CreateIterator(); + while (iterator.Next()) { + auto replica_addresses = iterator.Row().Value(kReplicaAddressesIndex).ToString(); + int num_addrs = 0; + if (replica_addresses.size() > std::strlen("{}")) { + num_addrs = std::count(replica_addresses.begin(), replica_addresses.end(), ',') + 1; + } + + EXPECT_EQ(expected_num_addrs, num_addrs); + } + return Status::OK(); +} + +TEST_F(CqlThreeMastersTest, HostnameResolutionFailureInYqlPartitionsTable) { + google::FlagSaver flag_saver; + auto session = ASSERT_RESULT(EstablishSession(driver_.get())); + ASSERT_OK(CheckNumAddressesInYqlPartitionsTable(&session, 3)); + + // TEST_RpcAddress is 1-indexed. + string hostname = server::TEST_RpcAddress(cluster_->LeaderMasterIdx() + 1, + server::Private::kFalse); + + // Shutdown the master leader. + cluster_->leader_mini_master()->Shutdown(); + + // Fail resolution of the old leader master's hostname. + FLAGS_TEST_fail_to_fast_resolve_address = hostname; + LOG(INFO) << "Setting FLAGS_TEST_fail_to_fast_resolve_address to: " + << FLAGS_TEST_fail_to_fast_resolve_address; + + // Assert that a new call will succeed, but will be missing the shutdown master address. + ASSERT_OK(CheckNumAddressesInYqlPartitionsTable(&session, 2)); +} + + } // namespace yb diff --git a/src/yb/integration-tests/cql_test_base.cc b/src/yb/integration-tests/cql_test_base.cc index 0661dff43c59..401a663ccf84 100644 --- a/src/yb/integration-tests/cql_test_base.cc +++ b/src/yb/integration-tests/cql_test_base.cc @@ -24,7 +24,8 @@ void CqlTestBase::SetUp() { YBMiniClusterTestBase::SetUp(); MiniClusterOptions options; - options.num_tablet_servers = 3; + options.num_masters = num_masters(); + options.num_tablet_servers = num_tablet_servers(); cluster_ = std::make_unique(env_.get(), options); ASSERT_OK(cluster_->Start()); diff --git a/src/yb/integration-tests/cql_test_base.h b/src/yb/integration-tests/cql_test_base.h index 290adc0fc1a9..c5a99921a6b5 100644 --- a/src/yb/integration-tests/cql_test_base.h +++ b/src/yb/integration-tests/cql_test_base.h @@ -24,6 +24,17 @@ namespace yb { class CqlTestBase : public MiniClusterTestWithClient { public: + static constexpr auto kDefaultNumMasters = 1; + static constexpr auto kDefaultNumTabletServers = 3; + + virtual int num_masters() { + return kDefaultNumMasters; + } + + virtual int num_tablet_servers() { + return kDefaultNumTabletServers; + } + void SetUp() override; protected: diff --git a/src/yb/master/yql_partitions_vtable.cc b/src/yb/master/yql_partitions_vtable.cc index 1b90268092cf..77993ec19a37 100644 --- a/src/yb/master/yql_partitions_vtable.cc +++ b/src/yb/master/yql_partitions_vtable.cc @@ -133,7 +133,12 @@ Status YQLPartitionsVTable::GenerateAndCacheData() const { std::unordered_map dns_results; for (auto& p : dns_lookups) { - dns_results.emplace(p.first, InetAddress(VERIFY_RESULT(p.second.get()))); + const auto res = p.second.get(); + if (!res.ok()) { + YB_LOG_EVERY_N_SECS(WARNING, 30) << "Unable to resolve host: " << res; + } else { + dns_results.emplace(p.first, InetAddress(res.get())); + } } // Reserve upfront memory, as we're likely to need to insert a row for each tablet. @@ -162,9 +167,13 @@ Status YQLPartitionsVTable::GenerateAndCacheData() const { QLMapValuePB *map_value = replica_addresses.mutable_map_value(); for (const auto& replica : data.locations->replicas()) { auto host = DesiredHostPort(replica.ts_info(), CloudInfoPB()).host(); - QLValue::set_inetaddress_value(dns_results[host], map_value->add_keys()); - map_value->add_values()->set_string_value(consensus::RaftPeerPB::Role_Name(replica.role())); + // In case of resolution failure, we may not find the host in dns_results. + const auto addr = dns_results.find(host); + if (addr != dns_results.end()) { + QLValue::set_inetaddress_value(addr->second, map_value->add_keys()); + map_value->add_values()->set_string_value(consensus::RaftPeerPB::Role_Name(replica.role())); + } } RETURN_NOT_OK(SetColumnValue(kReplicaAddresses, replica_addresses, &row)); } diff --git a/src/yb/util/net/net_util.cc b/src/yb/util/net/net_util.cc index f078bf58a961..2182b727bbf5 100644 --- a/src/yb/util/net/net_util.cc +++ b/src/yb/util/net/net_util.cc @@ -59,6 +59,7 @@ #include "yb/util/debug/trace_event.h" #include "yb/util/errno.h" #include "yb/util/faststring.h" +#include "yb/util/flag_tags.h" #include "yb/util/env.h" #include "yb/util/env_util.h" #include "yb/util/memory/memory.h" @@ -89,6 +90,9 @@ DEFINE_string( "prefer external IPv4 " "addresses first. Other options include ipv6_external,ipv6_non_link_local"); +DEFINE_test_flag(string, fail_to_fast_resolve_address, "", + "A hostname to fail to fast resolve for tests."); + namespace yb { namespace { @@ -637,6 +641,9 @@ boost::optional TryFastResolve(const std::string& host) { // For testing purpose we resolve A.B.C.D.ip.yugabyte to A.B.C.D. static const std::string kYbIpSuffix = ".ip.yugabyte"; if (boost::ends_with(host, kYbIpSuffix)) { + if (PREDICT_FALSE(host == FLAGS_TEST_fail_to_fast_resolve_address)) { + return boost::none; + } boost::system::error_code ec; auto address = IpAddress::from_string( host.substr(0, host.length() - kYbIpSuffix.length()), ec);