Commit 9c0adcbd authored by Marc Gravell's avatar Marc Gravell

Automatic retry during connect (configurable)

parent ef618429
...@@ -6,6 +6,7 @@ namespace StackExchange.Redis.Tests ...@@ -6,6 +6,7 @@ namespace StackExchange.Redis.Tests
[TestFixture] [TestFixture]
public class ConnectFailTimeout : TestBase public class ConnectFailTimeout : TestBase
{ {
#if DEBUG
[TestCase] [TestCase]
public void NoticesConnectFail() public void NoticesConnectFail()
{ {
...@@ -34,6 +35,6 @@ public void NoticesConnectFail() ...@@ -34,6 +35,6 @@ public void NoticesConnectFail()
System.Console.WriteLine(time); System.Console.WriteLine(time);
} }
} }
#endif
} }
} }
...@@ -99,6 +99,7 @@ ...@@ -99,6 +99,7 @@
<Compile Include="TaskTests.cs" /> <Compile Include="TaskTests.cs" />
<Compile Include="TestBase.cs" /> <Compile Include="TestBase.cs" />
<Compile Include="Transactions.cs" /> <Compile Include="Transactions.cs" />
<Compile Include="VPNTest.cs" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="packages.config" /> <None Include="packages.config" />
......
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace StackExchange.Redis.Tests
{
[TestFixture]
public class VPNTest : TestBase
{
[Test]
[MaxTime(100000)]
[TestCase("or-devredis01.ds.stackexchange.com:6379")]
public void Execute(string config)
{
for (int i = 0; i < 50; i++)
{
var log = new StringWriter();
try
{
var options = ConfigurationOptions.Parse(config);
options.SyncTimeout = 3000;
options.ConnectRetry = 5;
using (var conn = ConnectionMultiplexer.Connect(options, log))
{
var ttl = conn.GetDatabase().Ping();
Console.WriteLine(ttl);
}
}
catch
{
Console.WriteLine(log);
Assert.Fail();
}
Console.WriteLine();
Console.WriteLine("===");
Console.WriteLine();
}
}
}
}
...@@ -72,7 +72,7 @@ internal static void Unknown(string key) ...@@ -72,7 +72,7 @@ internal static void Unknown(string key)
Version = "version", ConnectTimeout = "connectTimeout", Password = "password", Version = "version", ConnectTimeout = "connectTimeout", Password = "password",
TieBreaker = "tiebreaker", WriteBuffer = "writeBuffer", Ssl = "ssl", SslHost = "sslHost", TieBreaker = "tiebreaker", WriteBuffer = "writeBuffer", Ssl = "ssl", SslHost = "sslHost",
ConfigChannel = "configChannel", AbortOnConnectFail = "abortConnect", ResolveDns = "resolveDns", ConfigChannel = "configChannel", AbortOnConnectFail = "abortConnect", ResolveDns = "resolveDns",
ChannelPrefix = "channelPrefix", Proxy = "proxy"; ChannelPrefix = "channelPrefix", Proxy = "proxy", ConnectRetry = "connectRetry";
private static readonly Dictionary<string, string> normalizedOptions = new[] private static readonly Dictionary<string, string> normalizedOptions = new[]
{ {
AllowAdmin, SyncTimeout, AllowAdmin, SyncTimeout,
...@@ -80,7 +80,7 @@ internal static void Unknown(string key) ...@@ -80,7 +80,7 @@ internal static void Unknown(string key)
Version, ConnectTimeout, Password, Version, ConnectTimeout, Password,
TieBreaker, WriteBuffer, Ssl, SslHost, TieBreaker, WriteBuffer, Ssl, SslHost,
ConfigChannel, AbortOnConnectFail, ResolveDns, ConfigChannel, AbortOnConnectFail, ResolveDns,
ChannelPrefix, Proxy ChannelPrefix, Proxy, ConnectRetry
}.ToDictionary(x => x, StringComparer.InvariantCultureIgnoreCase); }.ToDictionary(x => x, StringComparer.InvariantCultureIgnoreCase);
public static string TryNormalize(string value) public static string TryNormalize(string value)
...@@ -105,7 +105,7 @@ public static string TryNormalize(string value) ...@@ -105,7 +105,7 @@ public static string TryNormalize(string value)
private Version defaultVersion; private Version defaultVersion;
private int? keepAlive, syncTimeout, connectTimeout, writeBuffer; private int? keepAlive, syncTimeout, connectTimeout, writeBuffer, connectRetry;
private Proxy? proxy; private Proxy? proxy;
...@@ -153,6 +153,11 @@ public static string TryNormalize(string value) ...@@ -153,6 +153,11 @@ public static string TryNormalize(string value)
/// </summary> /// </summary>
public string ClientName { get { return clientName; } set { clientName = value; } } public string ClientName { get { return clientName; } set { clientName = value; } }
/// <summary>
/// The number of times to repeat the initial connect cycle if no servers respond promptly
/// </summary>
public int ConnectRetry { get { return connectRetry ?? 3; } set { connectRetry = value; } }
/// <summary> /// <summary>
/// The command-map associated with this configuration /// The command-map associated with this configuration
/// </summary> /// </summary>
...@@ -302,6 +307,7 @@ public ConfigurationOptions Clone() ...@@ -302,6 +307,7 @@ public ConfigurationOptions Clone()
CertificateSelectionCallback = CertificateSelectionCallback, CertificateSelectionCallback = CertificateSelectionCallback,
ChannelPrefix = ChannelPrefix.Clone(), ChannelPrefix = ChannelPrefix.Clone(),
SocketManager = SocketManager, SocketManager = SocketManager,
connectRetry = connectRetry
}; };
foreach (var item in endpoints) foreach (var item in endpoints)
options.endpoints.Add(item); options.endpoints.Add(item);
...@@ -343,6 +349,7 @@ public override string ToString() ...@@ -343,6 +349,7 @@ public override string ToString()
Append(sb, OptionKeys.AbortOnConnectFail, abortOnConnectFail); Append(sb, OptionKeys.AbortOnConnectFail, abortOnConnectFail);
Append(sb, OptionKeys.ResolveDns, resolveDns); Append(sb, OptionKeys.ResolveDns, resolveDns);
Append(sb, OptionKeys.ChannelPrefix, (string)ChannelPrefix); Append(sb, OptionKeys.ChannelPrefix, (string)ChannelPrefix);
Append(sb, OptionKeys.ConnectRetry, connectRetry);
Append(sb, OptionKeys.Proxy, proxy); Append(sb, OptionKeys.Proxy, proxy);
if(commandMap != null) commandMap.AppendDeltas(sb); if(commandMap != null) commandMap.AppendDeltas(sb);
return sb.ToString(); return sb.ToString();
...@@ -433,7 +440,7 @@ static bool IsOption(string option, string prefix) ...@@ -433,7 +440,7 @@ static bool IsOption(string option, string prefix)
void Clear() void Clear()
{ {
clientName = serviceName = password = tieBreaker = sslHost = configChannel = null; clientName = serviceName = password = tieBreaker = sslHost = configChannel = null;
keepAlive = syncTimeout = connectTimeout = writeBuffer = null; keepAlive = syncTimeout = connectTimeout = writeBuffer = connectRetry = null;
allowAdmin = abortOnConnectFail = resolveDns = ssl = null; allowAdmin = abortOnConnectFail = resolveDns = ssl = null;
defaultVersion = null; defaultVersion = null;
endpoints.Clear(); endpoints.Clear();
...@@ -500,6 +507,9 @@ private void DoParse(string configuration, bool ignoreUnknown) ...@@ -500,6 +507,9 @@ private void DoParse(string configuration, bool ignoreUnknown)
case OptionKeys.ConnectTimeout: case OptionKeys.ConnectTimeout:
ConnectTimeout = OptionKeys.ParseInt32(key, value); ConnectTimeout = OptionKeys.ParseInt32(key, value);
break; break;
case OptionKeys.ConnectRetry:
ConnectRetry = OptionKeys.ParseInt32(key, value);
break;
case OptionKeys.Version: case OptionKeys.Version:
DefaultVersion = OptionKeys.ParseVersion(key, value); DefaultVersion = OptionKeys.ParseVersion(key, value);
break; break;
......
...@@ -51,7 +51,6 @@ public static ConfiguredTaskAwaitable<T> ForAwait<T>(this Task<T> task) ...@@ -51,7 +51,6 @@ public static ConfiguredTaskAwaitable<T> ForAwait<T>(this Task<T> task)
/// </summary> /// </summary>
public sealed partial class ConnectionMultiplexer : IDisposable public sealed partial class ConnectionMultiplexer : IDisposable
{ {
/// <summary> /// <summary>
/// Get summary statistics associates with this server /// Get summary statistics associates with this server
/// </summary> /// </summary>
...@@ -726,7 +725,7 @@ public static ConnectionMultiplexer Connect(string configuration, TextWriter log ...@@ -726,7 +725,7 @@ public static ConnectionMultiplexer Connect(string configuration, TextWriter log
killMe = muxer; killMe = muxer;
// note that task has timeouts internally, so it might take *just over* the reegular timeout // note that task has timeouts internally, so it might take *just over* the reegular timeout
var task = muxer.ReconfigureAsync(true, false, log, null, "connect"); var task = muxer.ReconfigureAsync(true, false, log, null, "connect");
if (!task.Wait(muxer.SyncConnectTimeout)) if (!task.Wait(muxer.SyncConnectTimeout(true)))
{ {
task.ObserveErrors(); task.ObserveErrors();
if (muxer.RawConfig.AbortOnConnectFail) if (muxer.RawConfig.AbortOnConnectFail)
...@@ -755,7 +754,7 @@ public static ConnectionMultiplexer Connect(ConfigurationOptions configuration, ...@@ -755,7 +754,7 @@ public static ConnectionMultiplexer Connect(ConfigurationOptions configuration,
killMe = muxer; killMe = muxer;
// note that task has timeouts internally, so it might take *just over* the reegular timeout // note that task has timeouts internally, so it might take *just over* the reegular timeout
var task = muxer.ReconfigureAsync(true, false, log, null, "connect"); var task = muxer.ReconfigureAsync(true, false, log, null, "connect");
if (!task.Wait(muxer.SyncConnectTimeout)) if (!task.Wait(muxer.SyncConnectTimeout(true)))
{ {
task.ObserveErrors(); task.ObserveErrors();
if (muxer.RawConfig.AbortOnConnectFail) if (muxer.RawConfig.AbortOnConnectFail)
...@@ -1005,7 +1004,7 @@ public bool Configure(TextWriter log = null) ...@@ -1005,7 +1004,7 @@ public bool Configure(TextWriter log = null)
// note we expect ReconfigureAsync to internally allow [n] duration, // note we expect ReconfigureAsync to internally allow [n] duration,
// so to avoid near misses, here we wait 2*[n] // so to avoid near misses, here we wait 2*[n]
var task = ReconfigureAsync(false, true, log, null, "configure"); var task = ReconfigureAsync(false, true, log, null, "configure");
if (!task.Wait(SyncConnectTimeout)) if (!task.Wait(SyncConnectTimeout(false)))
{ {
task.ObserveErrors(); task.ObserveErrors();
if (configuration.AbortOnConnectFail) if (configuration.AbortOnConnectFail)
...@@ -1017,15 +1016,18 @@ public bool Configure(TextWriter log = null) ...@@ -1017,15 +1016,18 @@ public bool Configure(TextWriter log = null)
return task.Result; return task.Result;
} }
internal int SyncConnectTimeout internal int SyncConnectTimeout(bool forConnect)
{
get
{ {
int retryCount = forConnect ? RawConfig.ConnectRetry : 1;
if (retryCount <= 0) retryCount = 1;
int timeout = configuration.ConnectTimeout; int timeout = configuration.ConnectTimeout;
if (timeout >= int.MaxValue / retryCount) return int.MaxValue;
timeout *= retryCount;
if (timeout >= int.MaxValue - 500) return int.MaxValue; if (timeout >= int.MaxValue - 500) return int.MaxValue;
return timeout + Math.Min(500, timeout); return timeout + Math.Min(500, timeout);
} }
}
/// <summary> /// <summary>
/// Provides a text overview of the status of all connections /// Provides a text overview of the status of all connections
/// </summary> /// </summary>
...@@ -1117,6 +1119,16 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1117,6 +1119,16 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
} }
} }
int attemptsLeft = first ? configuration.ConnectRetry : 1;
bool healthy = false;
do
{
if (first)
{
attemptsLeft--;
}
int standaloneCount = 0, clusterCount = 0;
var endpoints = configuration.EndPoints; var endpoints = configuration.EndPoints;
LogLocked(log, "{0} unique nodes specified", endpoints.Count); LogLocked(log, "{0} unique nodes specified", endpoints.Count);
...@@ -1160,7 +1172,6 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1160,7 +1172,6 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
await WaitAllIgnoreErrorsAsync(available, configuration.ConnectTimeout).ForAwait(); await WaitAllIgnoreErrorsAsync(available, configuration.ConnectTimeout).ForAwait();
List<ServerEndPoint> masters = new List<ServerEndPoint>(available.Length); List<ServerEndPoint> masters = new List<ServerEndPoint>(available.Length);
int standaloneCount = 0, clusterCount = 0;
for (int i = 0; i < available.Length; i++) for (int i = 0; i < available.Length; i++)
{ {
var task = available[i]; var task = available[i];
...@@ -1218,7 +1229,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1218,7 +1229,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
servers[i].SetUnselectable(UnselectableFlags.ServerType); servers[i].SetUnselectable(UnselectableFlags.ServerType);
break; break;
} }
} else }
else
{ {
servers[i].SetUnselectable(UnselectableFlags.DidNotRespond); servers[i].SetUnselectable(UnselectableFlags.DidNotRespond);
LogLocked(log, "{0} returned, but incorrectly", Format.ToString(endpoints[i])); LogLocked(log, "{0} returned, but incorrectly", Format.ToString(endpoints[i]));
...@@ -1240,7 +1252,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1240,7 +1252,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
if (master == preferred) if (master == preferred)
{ {
master.ClearUnselectable(UnselectableFlags.RedundantMaster); master.ClearUnselectable(UnselectableFlags.RedundantMaster);
} else }
else
{ {
master.SetUnselectable(UnselectableFlags.RedundantMaster); master.SetUnselectable(UnselectableFlags.RedundantMaster);
} }
...@@ -1260,7 +1273,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1260,7 +1273,8 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
if (subscriptionChanges == 0) if (subscriptionChanges == 0)
{ {
LogLocked(log, "No subscription changes necessary"); LogLocked(log, "No subscription changes necessary");
} else }
else
{ {
LogLocked(log, "Subscriptions reconfigured: {0}", subscriptionChanges); LogLocked(log, "Subscriptions reconfigured: {0}", subscriptionChanges);
} }
...@@ -1269,23 +1283,32 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1269,23 +1283,32 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
{ {
GetStatus(log); GetStatus(log);
} }
if (first)
{
LogLocked(log, "Starting heartbeat...");
pulse = new Timer(heartbeat, this, MillisecondsPerHeartbeat, MillisecondsPerHeartbeat);
}
string stormLog = GetStormLog(); string stormLog = GetStormLog();
if (!string.IsNullOrWhiteSpace(stormLog)) if (!string.IsNullOrWhiteSpace(stormLog))
{ {
LogLocked(log, ""); LogLocked(log, "");
LogLocked(log, stormLog); LogLocked(log, stormLog);
} }
if(first && configuration.AbortOnConnectFail && (standaloneCount == 0 && clusterCount == 0)) healthy = standaloneCount != 0 || clusterCount != 0;
if (first && !healthy && attemptsLeft > 0)
{
LogLocked(log, "resetting failing connections to retry...");
ResetAllNonConnected();
LogLocked(log, "retrying; attempts left: " + attemptsLeft + "...");
}
//WTF("?: " + attempts);
} while (first && !healthy && attemptsLeft > 0);
if(first && configuration.AbortOnConnectFail && !healthy)
{ {
return false; return false;
} }
if (first)
{
LogLocked(log, "Starting heartbeat...");
pulse = new Timer(heartbeat, this, MillisecondsPerHeartbeat, MillisecondsPerHeartbeat);
}
return true; return true;
} catch (Exception ex) } catch (Exception ex)
...@@ -1303,6 +1326,15 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text ...@@ -1303,6 +1326,15 @@ internal async Task<bool> ReconfigureAsync(bool first, bool reconfigureAll, Text
} }
} }
private void ResetAllNonConnected()
{
var snapshot = serverSnapshot;
foreach(var server in snapshot)
{
server.ResetNonConnected();
}
}
partial void OnTraceLog(TextWriter log, [System.Runtime.CompilerServices.CallerMemberName] string caller = null); partial void OnTraceLog(TextWriter log, [System.Runtime.CompilerServices.CallerMemberName] string caller = null);
private async Task<ServerEndPoint> NominatePreferredMaster(TextWriter log, ServerEndPoint[] servers, bool useTieBreakers, Task<string>[] tieBreakers, List<ServerEndPoint> masters) private async Task<ServerEndPoint> NominatePreferredMaster(TextWriter log, ServerEndPoint[] servers, bool useTieBreakers, Task<string>[] tieBreakers, List<ServerEndPoint> masters)
{ {
......
...@@ -301,6 +301,17 @@ internal void OnConnected(PhysicalConnection connection) ...@@ -301,6 +301,17 @@ internal void OnConnected(PhysicalConnection connection)
} }
} }
internal void ResetNonConnected()
{
var tmp = physical;
if (tmp != null && state != (int)State.ConnectedEstablished)
{
tmp.RecordConnectionFailed(ConnectionFailureType.UnableToConnect);
}
GetConnection();
}
internal void OnConnectionFailed(PhysicalConnection connection, ConnectionFailureType failureType, Exception innerException) internal void OnConnectionFailed(PhysicalConnection connection, ConnectionFailureType failureType, Exception innerException)
{ {
if (reportNextFailure) if (reportNextFailure)
......
...@@ -46,6 +46,14 @@ internal sealed partial class ServerEndPoint : IDisposable ...@@ -46,6 +46,14 @@ internal sealed partial class ServerEndPoint : IDisposable
private Version version; private Version version;
internal void ResetNonConnected()
{
var tmp = interactive;
if (tmp != null) tmp.ResetNonConnected();
tmp = subscription;
if (tmp != null) tmp.ResetNonConnected();
}
public ServerEndPoint(ConnectionMultiplexer multiplexer, EndPoint endpoint) public ServerEndPoint(ConnectionMultiplexer multiplexer, EndPoint endpoint)
{ {
this.multiplexer = multiplexer; this.multiplexer = multiplexer;
......
...@@ -138,7 +138,8 @@ internal SocketToken BeginConnect(EndPoint endpoint, ISocketCallback callback) ...@@ -138,7 +138,8 @@ internal SocketToken BeginConnect(EndPoint endpoint, ISocketCallback callback)
} }
throw; throw;
} }
return new SocketToken(socket); var token = new SocketToken(socket);
return token;
} }
internal void SetFastLoopbackOption(Socket socket) internal void SetFastLoopbackOption(Socket socket)
{ {
...@@ -228,6 +229,19 @@ private void EndConnectImpl(IAsyncResult ar) ...@@ -228,6 +229,19 @@ private void EndConnectImpl(IAsyncResult ar)
break; break;
} }
} }
catch(ObjectDisposedException)
{
ConnectionMultiplexer.TraceWithoutContext("(socket shutdown)");
if (tuple != null)
{
try
{ tuple.Item2.Error(); }
catch (Exception inner)
{
ConnectionMultiplexer.TraceWithoutContext(inner.Message);
}
}
}
catch(Exception outer) catch(Exception outer)
{ {
ConnectionMultiplexer.TraceWithoutContext(outer.Message); ConnectionMultiplexer.TraceWithoutContext(outer.Message);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment