Unverified Commit a01cd041 authored by Nick Craver's avatar Nick Craver Committed by GitHub

Better Connection Counts & Error Messages (#1397)

* ConnectionMultiplexer: track global counts for deebugging

* Build dammit

* Move to per-multiplexer/add tests

Note: I know mutiplexer isn't spelled right - will fix that in a follow-up to avoid noise.

* Fix test key names

Broken since the 2.1 bump, oops

* Simplify the NoConnectionAvailable static

Simpifies usage for all callers. Also shares code and adds diagnostics to the "no connection" case.

* Add tests for NoConnectionException

* Failover: fix tests and debug some

SubscriptionsSurviveMasterSwitchAsync is a thorn in our side - moving to DEBUG.

* Remove bad check

Inner is irrelevant here - can be not-null depending on the connection race.

* Update message and add more tests!

* Bump pipelines to 2.1.6
parent 2ab9b375
......@@ -105,6 +105,11 @@ bool IInternalConnectionMultiplexer.IgnoreConnect
/// </summary>
internal volatile bool IgnoreConnect;
/// <summary>
/// Tracks overall connection multiplexer counts
/// </summary>
internal int _connectAttemptCount = 0, _connectCompletedCount = 0, _connectionCloseCount = 0;
/// <summary>
/// Provides a way of overriding the default Task Factory. If not set, it will use the default Task.Factory.
/// Useful when top level code sets it's own factory which may interfere with Redis queries.
......@@ -384,7 +389,7 @@ internal void MakeMaster(ServerEndPoint server, ReplicationChangeOptions options
if (server == null) throw new ArgumentNullException(nameof(server));
var srv = new RedisServer(this, server, null);
if (!srv.IsConnected) throw ExceptionFactory.NoConnectionAvailable(IncludeDetailInExceptions, IncludePerformanceCountersInExceptions, RedisCommand.SLAVEOF, null, server, GetServerSnapshot());
if (!srv.IsConnected) throw ExceptionFactory.NoConnectionAvailable(this, null, server, GetServerSnapshot(), command: RedisCommand.SLAVEOF);
CommandMap.AssertAvailable(RedisCommand.SLAVEOF);
#pragma warning disable CS0618
......@@ -848,12 +853,14 @@ private static async Task<ConnectionMultiplexer> ConnectImplAsync(object configu
{
muxer = CreateMultiplexer(configuration, logProxy, out connectHandler);
killMe = muxer;
Interlocked.Increment(ref muxer._connectAttemptCount);
bool configured = await muxer.ReconfigureAsync(true, false, logProxy, null, "connect").ObserveErrors().ForAwait();
if (!configured)
{
throw ExceptionFactory.UnableToConnect(muxer, muxer.failureMessage);
}
killMe = null;
Interlocked.Increment(ref muxer._connectCompletedCount);
return muxer;
}
finally
......@@ -1006,6 +1013,7 @@ private static ConnectionMultiplexer ConnectImpl(object configuration, TextWrite
{
muxer = CreateMultiplexer(configuration, logProxy, out connectHandler);
killMe = muxer;
Interlocked.Increment(ref muxer._connectAttemptCount);
// note that task has timeouts internally, so it might take *just over* the regular timeout
var task = muxer.ReconfigureAsync(true, false, logProxy, null, "connect");
......@@ -1024,6 +1032,7 @@ private static ConnectionMultiplexer ConnectImpl(object configuration, TextWrite
if (!task.Result) throw ExceptionFactory.UnableToConnect(muxer, muxer.failureMessage);
killMe = null;
Interlocked.Increment(ref muxer._connectCompletedCount);
if (muxer.ServerSelectionStrategy.ServerType == ServerType.Sentinel)
{
......@@ -2466,6 +2475,7 @@ public void Close(bool allowCommandsToComplete = true)
DisposeAndClearServers();
OnCloseReaderWriter();
OnClosing(true);
Interlocked.Increment(ref _connectionCloseCount);
}
partial void OnCloseReaderWriter();
......@@ -2582,7 +2592,7 @@ internal Exception GetException(WriteResult result, Message message, ServerEndPo
{
case WriteResult.Success: return null;
case WriteResult.NoConnectionAvailable:
return ExceptionFactory.NoConnectionAvailable(IncludeDetailInExceptions, IncludePerformanceCountersInExceptions, message.Command, message, server, GetServerSnapshot());
return ExceptionFactory.NoConnectionAvailable(this, message, server);
case WriteResult.TimeoutBeforeWrite:
return ExceptionFactory.Timeout(this, "The timeout was reached before the message could be written to the output buffer, and it was not sent", message, server, result);
case WriteResult.WriteFailure:
......
This diff is collapsed.
......@@ -28,13 +28,13 @@ public void Execute()
if (server == null)
{
FailNoServer(snapshot);
throw ExceptionFactory.NoConnectionAvailable(multiplexer.IncludeDetailInExceptions, multiplexer.IncludePerformanceCountersInExceptions, message.Command, message, server,multiplexer.GetServerSnapshot());
throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server);
}
var bridge = server.GetBridge(message.Command);
if (bridge == null)
{
FailNoServer(snapshot);
throw ExceptionFactory.NoConnectionAvailable(multiplexer.IncludeDetailInExceptions, multiplexer.IncludePerformanceCountersInExceptions, message.Command, message, server, multiplexer.GetServerSnapshot());
throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server);
}
// identity a list
......
......@@ -577,7 +577,7 @@ internal override Task<T> ExecuteAsync<T>(Message message, ResultProcessor<T> pr
// no need to deny exec-sync here; will be complete before they see if
var tcs = TaskSource.Create<T>(asyncState);
ConnectionMultiplexer.ThrowFailed(tcs, ExceptionFactory.NoConnectionAvailable(multiplexer.IncludeDetailInExceptions, multiplexer.IncludePerformanceCountersInExceptions, message.Command, message, server, multiplexer.GetServerSnapshot()));
ConnectionMultiplexer.ThrowFailed(tcs, ExceptionFactory.NoConnectionAvailable(multiplexer, message, server));
return tcs.Task;
}
return base.ExecuteAsync<T>(message, processor, server);
......@@ -590,7 +590,7 @@ internal override T ExecuteSync<T>(Message message, ResultProcessor<T> processor
if (!server.IsConnected)
{
if (message == null || message.IsFireAndForget) return default(T);
throw ExceptionFactory.NoConnectionAvailable(multiplexer.IncludeDetailInExceptions, multiplexer.IncludePerformanceCountersInExceptions, message.Command, message, server, multiplexer.GetServerSnapshot());
throw ExceptionFactory.NoConnectionAvailable(multiplexer, message, server);
}
return base.ExecuteSync<T>(message, processor, server);
}
......
......@@ -13,7 +13,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Pipelines.Sockets.Unofficial" Version="2.1.1" />
<PackageReference Include="Pipelines.Sockets.Unofficial" Version="2.1.6" />
<PackageReference Include="System.Diagnostics.PerformanceCounter" Version="4.7.0" />
<PackageReference Include="System.IO.Pipelines" Version="4.7.0" />
<PackageReference Include="System.Threading.Channels" Version="4.7.0" />
......
......@@ -39,7 +39,7 @@ public void AsyncTasksReportFailureIfServerUnavailable()
Assert.True(c.IsFaulted, "faulted");
var ex = c.Exception.InnerExceptions.Single();
Assert.IsType<RedisConnectionException>(ex);
Assert.StartsWith("No connection is available to service this operation: SADD " + key.ToString(), ex.Message);
Assert.StartsWith("No connection is active/available to service this operation: SADD " + key.ToString(), ex.Message);
}
}
......
using System;
using System.Diagnostics;
using Xunit;
using Xunit.Abstractions;
......@@ -16,7 +15,7 @@ public void NullLastException()
{
var conn = muxer.GetDatabase();
Assert.Null(muxer.GetServerSnapshot()[0].LastException);
var ex = ExceptionFactory.NoConnectionAvailable(true, true, new RedisCommand(), null, null, muxer.GetServerSnapshot());
var ex = ExceptionFactory.NoConnectionAvailable(muxer as ConnectionMultiplexer, null, null);
Assert.Null(ex.InnerException);
}
}
......@@ -28,13 +27,7 @@ public void CanGetVersion()
Assert.Matches(@"2\.[0-9]+\.[0-9]+(\.[0-9]+)?", libVer);
}
[Fact]
public void NullSnapshot()
{
var ex = ExceptionFactory.NoConnectionAvailable(true, true, new RedisCommand(), null, null, null);
Assert.Null(ex.InnerException);
}
#if DEBUG
[Fact]
public void MultipleEndpointsThrowConnectionException()
{
......@@ -50,7 +43,7 @@ public void MultipleEndpointsThrowConnectionException()
muxer.GetServer(endpoint).SimulateConnectionFailure();
}
var ex = ExceptionFactory.NoConnectionAvailable(true, true, new RedisCommand(), null, null, muxer.GetServerSnapshot());
var ex = ExceptionFactory.NoConnectionAvailable(muxer as ConnectionMultiplexer, null, null);
var outer = Assert.IsType<RedisConnectionException>(ex);
Assert.Equal(ConnectionFailureType.UnableToResolvePhysicalConnection, outer.FailureType);
var inner = Assert.IsType<RedisConnectionException>(outer.InnerException);
......@@ -63,6 +56,7 @@ public void MultipleEndpointsThrowConnectionException()
ClearAmbientFailures();
}
}
#endif
[Fact]
public void ServerTakesPrecendenceOverSnapshot()
......@@ -76,7 +70,7 @@ public void ServerTakesPrecendenceOverSnapshot()
muxer.GetServer(muxer.GetEndPoints()[0]).SimulateConnectionFailure();
var ex = ExceptionFactory.NoConnectionAvailable(true, true, new RedisCommand(), null, muxer.GetServerSnapshot()[0], muxer.GetServerSnapshot());
var ex = ExceptionFactory.NoConnectionAvailable(muxer as ConnectionMultiplexer, null, muxer.GetServerSnapshot()[0]);
Assert.IsType<RedisConnectionException>(ex);
Assert.IsType<RedisConnectionException>(ex.InnerException);
Assert.Equal(ex.InnerException, muxer.GetServerSnapshot()[0].LastException);
......@@ -97,7 +91,7 @@ public void NullInnerExceptionForMultipleEndpointsWithNoLastException()
{
var conn = muxer.GetDatabase();
muxer.AllowConnect = false;
var ex = ExceptionFactory.NoConnectionAvailable(true, true, new RedisCommand(), null, null, muxer.GetServerSnapshot());
var ex = ExceptionFactory.NoConnectionAvailable(muxer as ConnectionMultiplexer, null, null);
Assert.IsType<RedisConnectionException>(ex);
Assert.Null(ex.InnerException);
}
......@@ -115,7 +109,6 @@ public void TimeoutException()
{
using (var muxer = Create(keepAlive: 1, connectTimeout: 10000, allowAdmin: true, shared: false) as ConnectionMultiplexer)
{
var conn = muxer.GetDatabase();
var server = GetServer(muxer);
muxer.AllowConnect = false;
var msg = Message.Create(-1, CommandFlags.None, RedisCommand.PING);
......@@ -128,6 +121,7 @@ public void TimeoutException()
Assert.Contains("clientName: " + nameof(TimeoutException), ex.Message);
// Ensure our pipe numbers are in place
Assert.Contains("inst: 0, qu: 0, qs: 0, aw: False, in: 0, in-pipe: 0, out-pipe: 0", ex.Message);
Assert.Contains("mc: 1/1/0", ex.Message);
Assert.Contains("serverEndpoint: " + server.EndPoint.ToString(), ex.Message);
Assert.DoesNotContain("Unspecified/", ex.Message);
Assert.EndsWith(" (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts)", ex.Message);
......@@ -139,5 +133,78 @@ public void TimeoutException()
ClearAmbientFailures();
}
}
[Theory]
[InlineData(false, 0, 0, true, "Connection to Redis never succeeded (attempts: 0 - connection likely in-progress), unable to service operation: PING")]
[InlineData(false, 1, 0, true, "Connection to Redis never succeeded (attempts: 1 - connection likely in-progress), unable to service operation: PING")]
[InlineData(false, 12, 0, true, "Connection to Redis never succeeded (attempts: 12 - check your config), unable to service operation: PING")]
[InlineData(false, 0, 0, false, "Connection to Redis never succeeded (attempts: 0 - connection likely in-progress), unable to service operation: PING")]
[InlineData(false, 1, 0, false, "Connection to Redis never succeeded (attempts: 1 - connection likely in-progress), unable to service operation: PING")]
[InlineData(false, 12, 0, false, "Connection to Redis never succeeded (attempts: 12 - check your config), unable to service operation: PING")]
[InlineData(true, 0, 0, true, "No connection is active/available to service this operation: PING")]
[InlineData(true, 1, 0, true, "No connection is active/available to service this operation: PING")]
[InlineData(true, 12, 0, true, "No connection is active/available to service this operation: PING")]
public void NoConnectionException(bool abortOnConnect, int connCount, int completeCount, bool hasDetail, string messageStart)
{
try
{
var options = new ConfigurationOptions()
{
AbortOnConnectFail = abortOnConnect,
ConnectTimeout = 500,
SyncTimeout = 500,
KeepAlive = 5000
};
ConnectionMultiplexer muxer;
if (abortOnConnect)
{
options.EndPoints.Add(TestConfig.Current.MasterServerAndPort);
muxer = ConnectionMultiplexer.Connect(options);
}
else
{
options.EndPoints.Add($"doesnot.exist.{Guid.NewGuid():N}:6379");
muxer = ConnectionMultiplexer.Connect(options);
}
using (muxer)
{
var server = muxer.GetServer(muxer.GetEndPoints()[0]);
muxer.AllowConnect = false;
muxer._connectAttemptCount = connCount;
muxer._connectCompletedCount = completeCount;
muxer.IncludeDetailInExceptions = hasDetail;
muxer.IncludePerformanceCountersInExceptions = hasDetail;
var msg = Message.Create(-1, CommandFlags.None, RedisCommand.PING);
var rawEx = ExceptionFactory.NoConnectionAvailable(muxer, msg, new ServerEndPoint(muxer, server.EndPoint));
var ex = Assert.IsType<RedisConnectionException>(rawEx);
Writer.WriteLine("Exception: " + ex.Message);
// Example format: "Exception: No connection is active/available to service this operation: PING, inst: 0, qu: 0, qs: 0, aw: False, in: 0, in-pipe: 0, out-pipe: 0, serverEndpoint: 127.0.0.1:6379, mc: 1/1/0, mgr: 10 of 10 available, clientName: NoConnectionException, IOCP: (Busy=0,Free=1000,Min=8,Max=1000), WORKER: (Busy=2,Free=2045,Min=8,Max=2047), Local-CPU: 100%, v: 2.1.0.5";
Assert.StartsWith(messageStart, ex.Message);
// Ensure our pipe numbers are in place if they should be
if (hasDetail)
{
Assert.Contains("inst: 0, qu: 0, qs: 0, aw: False, in: 0, in-pipe: 0, out-pipe: 0", ex.Message);
Assert.Contains($"mc: {connCount}/{completeCount}/0", ex.Message);
Assert.Contains("serverEndpoint: " + server.EndPoint.ToString().Replace("Unspecified/", ""), ex.Message);
}
else
{
Assert.DoesNotContain("inst: 0, qu: 0, qs: 0, aw: False, in: 0, in-pipe: 0, out-pipe: 0", ex.Message);
Assert.DoesNotContain($"mc: {connCount}/{completeCount}/0", ex.Message);
Assert.DoesNotContain("serverEndpoint: " + server.EndPoint.ToString().Replace("Unspecified/", ""), ex.Message);
}
Assert.DoesNotContain("Unspecified/", ex.Message);
}
}
finally
{
ClearAmbientFailures();
}
}
}
}
......@@ -143,7 +143,7 @@ public async Task DeslaveGoesToPrimary()
Assert.Equal(primary.EndPoint, db.IdentifyEndpoint(key, CommandFlags.PreferSlave));
var ex = Assert.Throws<RedisConnectionException>(() => db.IdentifyEndpoint(key, CommandFlags.DemandSlave));
Assert.StartsWith("No connection is available to service this operation: EXISTS " + Me(), ex.Message);
Assert.StartsWith("No connection is active/available to service this operation: EXISTS " + Me(), ex.Message);
Writer.WriteLine("Invoking MakeMaster()...");
primary.MakeMaster(ReplicationChangeOptions.Broadcast | ReplicationChangeOptions.EnslaveSubordinates | ReplicationChangeOptions.SetTiebreaker, Writer);
Writer.WriteLine("Finished MakeMaster() call.");
......@@ -195,6 +195,7 @@ public async Task DeslaveGoesToPrimary()
}
}
#if DEBUG
[Fact]
public async Task SubscriptionsSurviveMasterSwitchAsync()
{
......@@ -353,5 +354,6 @@ public async Task SubscriptionsSurviveMasterSwitchAsync()
}
}
}
#endif
}
}
......@@ -356,12 +356,13 @@ protected IServer GetAnyMaster(IConnectionMultiplexer muxer)
public static string Me([CallerFilePath] string filePath = null, [CallerMemberName] string caller = null) =>
#if NET462
"net462-" + Path.GetFileNameWithoutExtension(filePath) + "-" + caller;
#elif NETCOREAPP2_0
"netcoreapp2.0-" + Path.GetFileNameWithoutExtension(filePath) + "-" + caller;
"net462-"
#elif NETCOREAPP2_1
"netcoreapp2.1-"
#else
"unknown-" + Path.GetFileNameWithoutExtension(filePath) + "-" + caller;
"unknown-"
#endif
+ Path.GetFileNameWithoutExtension(filePath) + "-" + caller;
protected static TimeSpan RunConcurrent(Action work, int threads, int timeout = 10000, [CallerMemberName] string caller = null)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment