Commit 87d4dc84 authored by hrishi18pathak's avatar hrishi18pathak

make auto discoverability of nodes synchronous, actually fix issue 300

parent 6b317395
......@@ -1203,114 +1203,153 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
const CommandFlags flags = CommandFlags.NoRedirect | CommandFlags.HighPriority;
var available = new Task<bool>[endpoints.Count];
var servers = new ServerEndPoint[available.Length];
List<ServerEndPoint> masters = new List<ServerEndPoint>(endpoints.Count);
bool useTieBreakers = !string.IsNullOrWhiteSpace(configuration.TieBreaker);
var tieBreakers = useTieBreakers ? new Task<string>[endpoints.Count] : null;
RedisKey tieBreakerKey = useTieBreakers ? (RedisKey)configuration.TieBreaker : default(RedisKey);
for (int i = 0; i < available.Length; i++)
ServerEndPoint[] servers = null;
Task<string>[] tieBreakers = null;
bool encounteredConnectedServer = false;
Stopwatch watch = null;
int iterCount = first ? 2 : 1;
// this is fix for https://github.com/StackExchange/StackExchange.Redis/issues/300
// auto discoverability of cluster nodes is made synchronous.
// we try to connect to endpoints specified inside the user provided configuration
// and when we encounter one such endpoint to which we are able to successfully connect,
// we get the list of cluster nodes from this endpoint and try to proactively connect
// to these nodes instead of relying on auto configure
for (int iter = 0; iter < iterCount; ++iter)
{
Trace("Testing: " + Format.ToString(endpoints[i]));
var server = GetServerEndPoint(endpoints[i]);
//server.ReportNextFailure();
servers[i] = server;
if (reconfigureAll && server.IsConnected)
if (endpoints == null) break;
var available = new Task<bool>[endpoints.Count];
tieBreakers = useTieBreakers ? new Task<string>[endpoints.Count] : null;
servers = new ServerEndPoint[available.Length];
RedisKey tieBreakerKey = useTieBreakers ? (RedisKey)configuration.TieBreaker : default(RedisKey);
for (int i = 0; i < available.Length; i++)
{
LogLocked(log, "Refreshing {0}...", Format.ToString(server.EndPoint));
// note that these will be processed synchronously *BEFORE* the tracer is processed,
// so we know that the configuration will be up to date if we see the tracer
server.AutoConfigure(null);
}
available[i] = server.SendTracer(log);
if (useTieBreakers)
{
LogLocked(log, "Requesting tie-break from {0} > {1}...", Format.ToString(server.EndPoint), configuration.TieBreaker);
Message msg = Message.Create(0, flags, RedisCommand.GET, tieBreakerKey);
msg.SetInternalCall();
msg = LoggingMessage.Create(log, msg);
tieBreakers[i] = server.QueueDirectAsync(msg, ResultProcessor.String);
Trace("Testing: " + Format.ToString(endpoints[i]));
var server = GetServerEndPoint(endpoints[i]);
//server.ReportNextFailure();
servers[i] = server;
if (reconfigureAll && server.IsConnected)
{
LogLocked(log, "Refreshing {0}...", Format.ToString(server.EndPoint));
// note that these will be processed synchronously *BEFORE* the tracer is processed,
// so we know that the configuration will be up to date if we see the tracer
server.AutoConfigure(null);
}
available[i] = server.SendTracer(log);
if (useTieBreakers)
{
LogLocked(log, "Requesting tie-break from {0} > {1}...", Format.ToString(server.EndPoint), configuration.TieBreaker);
Message msg = Message.Create(0, flags, RedisCommand.GET, tieBreakerKey);
msg.SetInternalCall();
msg = LoggingMessage.Create(log, msg);
tieBreakers[i] = server.QueueDirectAsync(msg, ResultProcessor.String);
}
}
}
LogLocked(log, "Allowing endpoints {0} to respond...", TimeSpan.FromMilliseconds(configuration.ConnectTimeout));
Trace("Allowing endpoints " + TimeSpan.FromMilliseconds(configuration.ConnectTimeout) + " to respond...");
await WaitAllIgnoreErrorsAsync(available, configuration.ConnectTimeout, log).ForAwait();
List<ServerEndPoint> masters = new List<ServerEndPoint>(available.Length);
watch = watch ?? Stopwatch.StartNew();
var remaining = configuration.ConnectTimeout - checked((int)watch.ElapsedMilliseconds);
LogLocked(log, "Allowing endpoints {0} to respond...", TimeSpan.FromMilliseconds(remaining));
Trace("Allowing endpoints " + TimeSpan.FromMilliseconds(remaining) + " to respond...");
await WaitAllIgnoreErrorsAsync(available, remaining, log).ForAwait();
for (int i = 0; i < available.Length; i++)
{
var task = available[i];
Trace(Format.ToString(endpoints[i]) + ": " + task.Status);
if (task.IsFaulted)
EndPointCollection updatedEndpointCollection = null;
for (int i = 0; i < available.Length; i++)
{
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
var aex = task.Exception;
foreach (var ex in aex.InnerExceptions)
var task = available[i];
Trace(Format.ToString(endpoints[i]) + ": " + task.Status);
if (task.IsFaulted)
{
LogLocked(log, "{0} faulted: {1}", Format.ToString(endpoints[i]), ex.Message);
failureMessage = ex.Message;
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
var aex = task.Exception;
foreach (var ex in aex.InnerExceptions)
{
LogLocked(log, "{0} faulted: {1}", Format.ToString(endpoints[i]), ex.Message);
failureMessage = ex.Message;
}
}
}
else if (task.IsCanceled)
{
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} was canceled", Format.ToString(endpoints[i]));
}
else if (task.IsCompleted)
{
var server = servers[i];
if (task.Result)
else if (task.IsCanceled)
{
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} was canceled", Format.ToString(endpoints[i]));
}
else if (task.IsCompleted)
{
servers[i].ClearUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} returned with success", Format.ToString(endpoints[i]));
UpdateClusterConfigIfNeeded(server, log);
// count the server types
switch (server.ServerType)
var server = servers[i];
if (task.Result)
{
case ServerType.Twemproxy:
case ServerType.Standalone:
standaloneCount++;
break;
case ServerType.Sentinel:
sentinelCount++;
break;
case ServerType.Cluster:
clusterCount++;
break;
}
servers[i].ClearUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} returned with success", Format.ToString(endpoints[i]));
if (!encounteredConnectedServer)
{
// we have encountered a connected server for the first time.
// so we will get list of other nodes from this server using "CLUSTER NODES" command
// and try to connect to these other nodes in the next iteration
encounteredConnectedServer = true;
updatedEndpointCollection = GetEndpointsFromClusterNodes(server, log);
}
// count the server types
switch (server.ServerType)
{
case ServerType.Twemproxy:
case ServerType.Standalone:
standaloneCount++;
break;
case ServerType.Sentinel:
sentinelCount++;
break;
case ServerType.Cluster:
clusterCount++;
break;
}
// set the server UnselectableFlags and update masters list
switch (server.ServerType)
// set the server UnselectableFlags and update masters list
switch (server.ServerType)
{
case ServerType.Twemproxy:
case ServerType.Sentinel:
case ServerType.Standalone:
case ServerType.Cluster:
servers[i].ClearUnselectable(UnselectableFlags.ServerType);
if (server.IsSlave)
{
servers[i].ClearUnselectable(UnselectableFlags.RedundantMaster);
}
else
{
masters.Add(server);
}
break;
default:
servers[i].SetUnselectable(UnselectableFlags.ServerType);
break;
}
}
else
{
case ServerType.Twemproxy:
case ServerType.Sentinel:
case ServerType.Standalone:
case ServerType.Cluster:
servers[i].ClearUnselectable(UnselectableFlags.ServerType);
if (server.IsSlave)
{
servers[i].ClearUnselectable(UnselectableFlags.RedundantMaster);
}
else
{
masters.Add(server);
}
break;
default:
servers[i].SetUnselectable(UnselectableFlags.ServerType);
break;
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} returned, but incorrectly", Format.ToString(endpoints[i]));
}
}
else
{
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} returned, but incorrectly", Format.ToString(endpoints[i]));
LogLocked(log, "{0} did not respond", Format.ToString(endpoints[i]));
}
}
if (encounteredConnectedServer)
{
endpoints = updatedEndpointCollection;
}
else
{
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} did not respond", Format.ToString(endpoints[i]));
break; // will be retried by the outer do while loop
}
}
......@@ -1419,33 +1458,23 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
}
private void UpdateClusterConfigIfNeeded(ServerEndPoint server, TextWriter log)
private EndPointCollection GetEndpointsFromClusterNodes(ServerEndPoint server, TextWriter log)
{
var message = Message.Create(-1, CommandFlags.None, RedisCommand.CLUSTER, RedisLiterals.NODES);
ClusterConfiguration clusterConfig = null;
try
try
{
clusterConfig = this.ExecuteSyncImpl(message, ResultProcessor.ClusterNodes, server);
}
catch (Exception ex)
{
if (ex.Message.Contains("ERR This instance has cluster support disabled"))
{
LogLocked(log, "Cluster support disabled. Continuing without updating cluster config...");
return;
}
LogLocked(log, "Encountered error while updating cluster config: " + ex.Message);
return new EndPointCollection(clusterConfig.Nodes.Select(node => node.EndPoint).ToList());
}
if (clusterConfig != null)
catch (Exception ex)
{
this.UpdateClusterRange(clusterConfig);
LogLocked(log, "Updated cluster config");
LogLocked(log, "Encountered error while updating cluster config: " + ex.Message);
return null;
}
}
private void ResetAllNonConnected()
{
var snapshot = serverSnapshot;
......
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Net;
......@@ -9,6 +10,14 @@ namespace StackExchange.Redis
/// </summary>
public sealed class EndPointCollection : Collection<EndPoint>
{
public EndPointCollection() : base()
{
}
public EndPointCollection(IList<EndPoint> endpoints) : base(endpoints)
{
}
/// <summary>
/// Format an endpoint
/// </summary>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment