Commit cf91c206 authored by Jon Cole's avatar Jon Cole
parents 0cd9148c 2bc2fb8f
......@@ -243,7 +243,7 @@ private static RedisConnection GetOldStyleConnection(string host, int port, bool
protected static TimeSpan RunConcurrent(Action work, int threads, int timeout = 10000, [CallerMemberName] string caller = null)
{
if (work == null) throw new ArgumentNullException("work");
if (threads < 1) throw new ArgumentOutOfRangeException("theads");
if (threads < 1) throw new ArgumentOutOfRangeException("threads");
if(string.IsNullOrWhiteSpace(caller)) caller = Me();
Stopwatch watch = null;
ManualResetEvent allDone = new ManualResetEvent(false);
......
......@@ -172,6 +172,7 @@ public sealed class ClusterConfiguration
private readonly ServerSelectionStrategy serverSelectionStrategy;
internal ClusterConfiguration(ServerSelectionStrategy serverSelectionStrategy, string nodes, EndPoint origin)
{
// Beware: Any exception thrown here will wreak silent havoc like inability to connect to cluster nodes or non returning calls
this.serverSelectionStrategy = serverSelectionStrategy;
this.origin = origin;
using (var reader = new StringReader(nodes))
......@@ -180,12 +181,39 @@ internal ClusterConfiguration(ServerSelectionStrategy serverSelectionStrategy, s
while ((line = reader.ReadLine()) != null)
{
if (string.IsNullOrWhiteSpace(line)) continue;
var node = new ClusterNode(this, line, origin);
// Be resilient to ":0 {master,slave},fail,noaddr" nodes
if (node.IsNoAddr)
continue;
if (nodeLookup.ContainsKey(node.EndPoint))
{
// Deal with conflicting node entries for the same endpoint
// This can happen in dynamic environments when a node goes down and a new one is created
// to replace it.
if (!node.IsConnected)
{
// The node we're trying to add is probably about to become stale. Ignore it.
continue;
}
else if (!nodeLookup[node.EndPoint].IsConnected)
{
// The node we registered previously is probably stale. Replace it with a known good node.
nodeLookup[node.EndPoint] = node;
}
else
{
// We have conflicting connected nodes. There's nothing much we can do other than
// wait for the cluster state to converge and refresh on the next pass.
// The same is true if we have multiple disconnected nodes.
}
}
else
{
nodeLookup.Add(node.EndPoint, node);
}
}
}
}
/// <summary>
/// Gets all nodes contained in the configuration
......@@ -262,6 +290,10 @@ public sealed class ClusterNode : IEquatable<ClusterNode>, IComparable<ClusterN
private readonly bool isSlave;
private readonly bool isNoAddr;
private readonly bool isConnected;
private readonly string nodeId, parentNodeId, raw;
private readonly IList<SlotRange> slots;
......@@ -275,22 +307,18 @@ public sealed class ClusterNode : IEquatable<ClusterNode>, IComparable<ClusterN
internal ClusterNode() { }
internal ClusterNode(ClusterConfiguration configuration, string raw, EndPoint origin)
{
// http://redis.io/commands/cluster-nodes
this.configuration = configuration;
this.raw = raw;
var parts = raw.Split(StringSplits.Space);
var flags = parts[2].Split(StringSplits.Comma);
if (flags.Contains("myself"))
{
endpoint = origin;
}
else
{
endpoint = Format.TryParseEndPoint(parts[1]);
}
nodeId = parts[0];
isSlave = flags.Contains("slave");
isNoAddr = flags.Contains("noaddr");
parentNodeId = string.IsNullOrWhiteSpace(parts[3]) ? null : parts[3];
List<SlotRange> slots = null;
......@@ -305,6 +333,7 @@ internal ClusterNode(ClusterConfiguration configuration, string raw, EndPoint or
}
}
this.slots = slots == null ? NoSlots : slots.AsReadOnly();
this.isConnected = parts[7] == "connected"; // Can be "connected" or "disconnected"
}
/// <summary>
/// Gets all child nodes of the current node
......@@ -339,6 +368,16 @@ public IList<ClusterNode> Children
/// </summary>
public bool IsSlave { get { return isSlave; } }
/// <summary>
/// Gets whether this node is flagged as noaddr
/// </summary>
public bool IsNoAddr { get { return isNoAddr; } }
/// <summary>
/// Gets the node's connection status
/// </summary>
public bool IsConnected { get { return isConnected; } }
/// <summary>
/// Gets the unique node-id of the current node
/// </summary>
......
......@@ -33,7 +33,7 @@ private static readonly CommandMap
RedisCommand.SCRIPT,
RedisCommand.AUTH, RedisCommand.ECHO, RedisCommand.PING, RedisCommand.QUIT, RedisCommand.SELECT,
RedisCommand.ECHO, RedisCommand.PING, RedisCommand.QUIT, RedisCommand.SELECT,
RedisCommand.BGREWRITEAOF, RedisCommand.BGSAVE, RedisCommand.CLIENT, RedisCommand.CLUSTER, RedisCommand.CONFIG, RedisCommand.DBSIZE,
RedisCommand.DEBUG, RedisCommand.FLUSHALL, RedisCommand.FLUSHDB, RedisCommand.INFO, RedisCommand.LASTSAVE, RedisCommand.MONITOR, RedisCommand.SAVE,
......
......@@ -1203,11 +1203,31 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
const CommandFlags flags = CommandFlags.NoRedirect | CommandFlags.HighPriority;
var available = new Task<bool>[endpoints.Count];
var servers = new ServerEndPoint[available.Length];
List<ServerEndPoint> masters = new List<ServerEndPoint>(endpoints.Count);
bool useTieBreakers = !string.IsNullOrWhiteSpace(configuration.TieBreaker);
var tieBreakers = useTieBreakers ? new Task<string>[endpoints.Count] : null;
ServerEndPoint[] servers = null;
Task<string>[] tieBreakers = null;
bool encounteredConnectedClusterServer = false;
Stopwatch watch = null;
int iterCount = first ? 2 : 1;
// this is fix for https://github.com/StackExchange/StackExchange.Redis/issues/300
// auto discoverability of cluster nodes is made synchronous.
// we try to connect to endpoints specified inside the user provided configuration
// and when we encounter one such endpoint to which we are able to successfully connect,
// we get the list of cluster nodes from this endpoint and try to proactively connect
// to these nodes instead of relying on auto configure
for (int iter = 0; iter < iterCount; ++iter)
{
if (endpoints == null) break;
var available = new Task<bool>[endpoints.Count];
tieBreakers = useTieBreakers ? new Task<string>[endpoints.Count] : null;
servers = new ServerEndPoint[available.Length];
RedisKey tieBreakerKey = useTieBreakers ? (RedisKey)configuration.TieBreaker : default(RedisKey);
for (int i = 0; i < available.Length; i++)
{
Trace("Testing: " + Format.ToString(endpoints[i]));
......@@ -1232,11 +1252,13 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
}
LogLocked(log, "Allowing endpoints {0} to respond...", TimeSpan.FromMilliseconds(configuration.ConnectTimeout));
Trace("Allowing endpoints " + TimeSpan.FromMilliseconds(configuration.ConnectTimeout) + " to respond...");
await WaitAllIgnoreErrorsAsync(available, configuration.ConnectTimeout, log).ForAwait();
List<ServerEndPoint> masters = new List<ServerEndPoint>(available.Length);
watch = watch ?? Stopwatch.StartNew();
var remaining = configuration.ConnectTimeout - checked((int)watch.ElapsedMilliseconds);
LogLocked(log, "Allowing endpoints {0} to respond...", TimeSpan.FromMilliseconds(remaining));
Trace("Allowing endpoints " + TimeSpan.FromMilliseconds(remaining) + " to respond...");
await WaitAllIgnoreErrorsAsync(available, remaining, log).ForAwait();
EndPointCollection updatedClusterEndpointCollection = null;
for (int i = 0; i < available.Length; i++)
{
var task = available[i];
......@@ -1279,6 +1301,15 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
break;
}
if (clusterCount > 0 && !encounteredConnectedClusterServer)
{
// we have encountered a connected server with clustertype for the first time.
// so we will get list of other nodes from this server using "CLUSTER NODES" command
// and try to connect to these other nodes in the next iteration
encounteredConnectedClusterServer = true;
updatedClusterEndpointCollection = GetEndpointsFromClusterNodes(server, log);
}
// set the server UnselectableFlags and update masters list
switch (server.ServerType)
{
......@@ -1314,6 +1345,16 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
}
if (encounteredConnectedClusterServer)
{
endpoints = updatedClusterEndpointCollection;
}
else
{
break; // we do not want to repeat the second iteration
}
}
if (clusterCount == 0)
{
// set the serverSelectionStrategy
......@@ -1419,6 +1460,23 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
}
}
private EndPointCollection GetEndpointsFromClusterNodes(ServerEndPoint server, TextWriter log)
{
var message = Message.Create(-1, CommandFlags.None, RedisCommand.CLUSTER, RedisLiterals.NODES);
ClusterConfiguration clusterConfig = null;
try
{
clusterConfig = this.ExecuteSyncImpl(message, ResultProcessor.ClusterNodes, server);
return new EndPointCollection(clusterConfig.Nodes.Select(node => node.EndPoint).ToList());
}
catch (Exception ex)
{
LogLocked(log, "Encountered error while updating cluster config: " + ex.Message);
return null;
}
}
private void ResetAllNonConnected()
{
var snapshot = serverSnapshot;
......
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Net;
......@@ -9,6 +10,14 @@ namespace StackExchange.Redis
/// </summary>
public sealed class EndPointCollection : Collection<EndPoint>
{
public EndPointCollection() : base()
{
}
public EndPointCollection(IList<EndPoint> endpoints) : base(endpoints)
{
}
/// <summary>
/// Format an endpoint
/// </summary>
......
......@@ -133,7 +133,7 @@ internal static bool TryGetHostPort(EndPoint endpoint, out string host, out int
internal static bool TryParseDouble(string s, out double value)
{
if(s == null || s.Length == 0)
if(string.IsNullOrEmpty(s))
{
value = 0;
return false;
......
......@@ -484,7 +484,7 @@ sealed class KeyMigrateCommandMessage : Message.CommandKeyBase // MIGRATE is aty
public KeyMigrateCommandMessage(int db, RedisKey key, EndPoint toServer, int toDatabase, int timeoutMilliseconds, MigrateOptions migrateOptions, CommandFlags flags)
: base(db, flags, RedisCommand.MIGRATE, key)
{
if (toServer == null) throw new ArgumentNullException("server");
if (toServer == null) throw new ArgumentNullException("toServer");
string toHost;
int toPort;
if (!Format.TryGetHostPort(toServer, out toHost, out toPort)) throw new ArgumentException("toServer");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment