Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix potential grain timer deadlock during disposal #8950

Merged
merged 1 commit into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions src/Orleans.Core/Timers/SafeTimerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ internal SafeTimerBase(ILogger logger, TimerCallback syncCallback, object state,
Start(dueTime, period);
}

public bool IsStarted => timerStarted;

public void Start(TimeSpan due, TimeSpan period)
{
if (timerStarted) throw new InvalidOperationException(string.Format("Calling start on timer {0} is not allowed, since it was already created in a started mode with specified due.", GetFullName()));
Expand All @@ -66,10 +68,18 @@ public void Start(TimeSpan due, TimeSpan period)
timer.Change(due, Constants.INFINITE_TIMESPAN);
}

private void Init(ILogger logger, Func<object, Task> asynCallback, TimerCallback synCallback, object state, TimeSpan due, TimeSpan period)
public void Stop()
{
timerFrequency = Constants.INFINITE_TIMESPAN;
dueTime = Constants.INFINITE_TIMESPAN;
timerStarted = false;
timer.Change(Constants.INFINITE_TIMESPAN, Constants.INFINITE_TIMESPAN);
}

private void Init(ILogger logger, Func<object, Task> asyncCallback, TimerCallback synCallback, object state, TimeSpan due, TimeSpan period)
{
if (synCallback == null && asynCallback == null) throw new ArgumentNullException(nameof(synCallback), "Cannot use null for both sync and asyncTask timer callbacks.");
int numNonNulls = (asynCallback != null ? 1 : 0) + (synCallback != null ? 1 : 0);
if (synCallback == null && asyncCallback == null) throw new ArgumentNullException(nameof(synCallback), "Cannot use null for both sync and asyncTask timer callbacks.");
int numNonNulls = (asyncCallback != null ? 1 : 0) + (synCallback != null ? 1 : 0);
if (numNonNulls > 1) throw new ArgumentNullException(nameof(synCallback), "Cannot define more than one timer callbacks. Pick one.");
if (period == TimeSpan.Zero) throw new ArgumentOutOfRangeException(nameof(period), period, "Cannot use TimeSpan.Zero for timer period");

Expand All @@ -81,7 +91,7 @@ private void Init(ILogger logger, Func<object, Task> asynCallback, TimerCallback
if (periodTm < -1) throw new ArgumentOutOfRangeException(nameof(period), "The period must not be less than -1.");
if (periodTm > MaxSupportedTimeout) throw new ArgumentOutOfRangeException(nameof(period), "The period interval must be less than 2^32-2.");

this.asyncTaskCallback = asynCallback;
this.asyncTaskCallback = asyncCallback;
syncCallbackFunc = synCallback;
timerFrequency = period;
this.dueTime = due;
Expand Down
2 changes: 1 addition & 1 deletion src/Orleans.Runtime/Core/SystemTarget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ internal IGrainTimer RegisterGrainTimer(Func<object, Task> asyncCallback, object
var ctxt = RuntimeContext.Current;
name = name ?? ctxt.GrainId + "Timer";

var timer = GrainTimer.FromTaskCallback(this.timerLogger, asyncCallback, state, dueTime, period, name);
var timer = GrainTimer.FromTaskCallback(this.timerLogger, asyncCallback, state, dueTime, period, name, grainContext: this);
timer.Start();
return timer;
}
Expand Down
178 changes: 83 additions & 95 deletions src/Orleans.Runtime/Timers/GrainTimer.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#nullable enable
using System;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Orleans.Runtime.Scheduler;
Expand All @@ -7,39 +9,36 @@ namespace Orleans.Runtime
{
internal sealed class GrainTimer : IGrainTimer
{
private Func<object, Task> asyncCallback;
private AsyncTaskSafeTimer timer;
private readonly Func<object?, Task> asyncCallback;
private readonly TimeSpan dueTime;
private readonly TimeSpan timerFrequency;
private DateTime previousTickTime;
private int totalNumTicks;
private readonly ILogger logger;
private volatile Task currentlyExecutingTickTask;
private readonly object currentlyExecutingTickTaskLock = new();
private readonly IGrainContext grainContext;
private readonly string name;
private DateTime previousTickTime;
private int totalNumTicks;
private volatile AsyncTaskSafeTimer? timer;
private readonly object? state;
private volatile Task? currentlyExecutingTickTask;

public string Name { get; }

private bool TimerAlreadyStopped { get { return timer == null || asyncCallback == null; } }

private GrainTimer(IGrainContext activationData, ILogger logger, Func<object, Task> asyncCallback, object state, TimeSpan dueTime, TimeSpan period, string name)
private GrainTimer(IGrainContext grainContext, ILogger logger, Func<object?, Task> asyncCallback, object? state, TimeSpan dueTime, TimeSpan period, string? name)
{
var ctxt = RuntimeContext.Current;
if (ctxt is null)
if (RuntimeContext.Current is null)
{
throw new InvalidSchedulingContextException(
"Current grain context is null. "
+ "Please make sure you are not trying to create a Timer from outside Orleans Task Scheduler, "
+ "which will be the case if you create it inside Task.Run.");
}

this.grainContext = activationData;
this.grainContext = grainContext;
this.logger = logger;
this.Name = name;
this.name = name ?? string.Empty;
this.asyncCallback = asyncCallback;
timer = new AsyncTaskSafeTimer(logger,
stateObj => TimerTick(stateObj, ctxt),
state);
static stateObj => ((GrainTimer)stateObj).TimerTick(),
this);
this.state = state;
this.dueTime = dueTime;
timerFrequency = period;
previousTickTime = DateTime.UtcNow;
Expand All @@ -48,141 +47,130 @@ private GrainTimer(IGrainContext activationData, ILogger logger, Func<object, Ta

internal static IGrainTimer FromTaskCallback(
ILogger logger,
Func<object, Task> asyncCallback,
Func<object?, Task> asyncCallback,
object state,
TimeSpan dueTime,
TimeSpan period,
string name = null,
IGrainContext grainContext = null)
string name,
IGrainContext grainContext)
{
return new GrainTimer(grainContext, logger, asyncCallback, state, dueTime, period, name);
}

public void Start()
{
if (TimerAlreadyStopped)
throw new ObjectDisposedException(string.Format("The timer {0} was already disposed.", GetFullName()));
if (timer is not { } asyncTimer)
{
throw new ObjectDisposedException(GetDiagnosticName(), "The timer was already disposed.");
}

timer.Start(dueTime, timerFrequency);
asyncTimer.Start(dueTime, timerFrequency);
}

public void Stop()
{
asyncCallback = null;
// Stop the timer from ticking, but don't dispose it yet since it might be mid-tick.
timer?.Stop();
}

private async Task TimerTick(object state, IGrainContext context)
private async Task TimerTick()
{
if (TimerAlreadyStopped)
return;
try
{
// Schedule call back to grain context
var workItem = new AsyncClosureWorkItem(() => ForwardToAsyncCallback(state), this.Name, context);
context.Scheduler.QueueWorkItem(workItem);
await workItem.Task;
}
catch (InvalidSchedulingContextException exc)
{
logger.LogError(
(int)ErrorCode.Timer_InvalidContext,
exc,
"Caught an InvalidSchedulingContextException on timer {TimerName}, context is {GrainContext}. Going to dispose this timer!",
GetFullName(),
context);
DisposeTimer();
}
// Schedule call back to grain context
// AsyncSafeTimer ensures that calls to this method are serialized.
var workItem = new AsyncClosureWorkItem(ForwardToAsyncCallback, this.name, grainContext);
grainContext.Scheduler.QueueWorkItem(workItem);
await workItem.Task;
}

private async Task ForwardToAsyncCallback(object state)
private async Task ForwardToAsyncCallback()
{
// AsyncSafeTimer ensures that calls to this method are serialized.
if (TimerAlreadyStopped) return;

try
{
RequestContext.Clear(); // Clear any previous RC, so it does not leak into this call by mistake.
lock (this.currentlyExecutingTickTaskLock)
{
if (TimerAlreadyStopped) return;

totalNumTicks++;

if (logger.IsEnabled(LogLevel.Trace))
logger.LogTrace((int)ErrorCode.TimerBeforeCallback, "About to make timer callback for timer {TimerName}", GetFullName());

currentlyExecutingTickTask = asyncCallback(state);
}
await currentlyExecutingTickTask;

if (logger.IsEnabled(LogLevel.Trace)) logger.LogTrace((int)ErrorCode.TimerAfterCallback, "Completed timer callback for timer {TimerName}", GetFullName());
var tickTask = currentlyExecutingTickTask = InvokeTimerCallback();
await tickTask;
}
catch (Exception exc)
{
logger.LogError(
(int)ErrorCode.Timer_GrainTimerCallbackError,
exc,
"Caught and ignored exception thrown from timer callback for timer {TimerName}",
GetFullName());
GetDiagnosticName());
}
finally
{
previousTickTime = DateTime.UtcNow;
currentlyExecutingTickTask = null;
// if this is not a repeating timer, then we can
// dispose of the timer.

// If this is not a repeating timer, then we can dispose of the timer.
if (timerFrequency == Constants.INFINITE_TIMESPAN)
{
DisposeTimer();
}
}
}

public Task GetCurrentlyExecutingTickTask()
private async Task InvokeTimerCallback()
{
return currentlyExecutingTickTask ?? Task.CompletedTask;
}
// This is called under a lock, so ensure that the method yields before invoking a callback
// which could take a different lock and potentially cause a deadlock.
await Task.Yield();

// If the timer was stopped or disposed since this was scheduled, terminate without invoking the callback.
if (timer is not { IsStarted: true })
{
return;
}

private string GetFullName() => $"GrainTimer.{Name} TimerCallbackHandler:{asyncCallback?.Target}->{asyncCallback?.Method}";
// Clear any previous RequestContext, so it does not leak into this call by mistake.
RequestContext.Clear();
totalNumTicks++;

// The reason we need to check CheckTimerFreeze on both the SafeTimer and this GrainTimer
// is that SafeTimer may tick OK (no starvation by .NET thread pool), but then scheduler.QueueWorkItem
// may not execute and starve this GrainTimer callback.
public bool CheckTimerFreeze(DateTime lastCheckTime)
{
if (TimerAlreadyStopped) return true;
// check underlying SafeTimer (checking that .NET thread pool does not starve this timer)
if (!timer.CheckTimerFreeze(lastCheckTime, () => Name)) return false;
// if SafeTimer failed the check, no need to check GrainTimer too, since it will fail as well.

// check myself (checking that scheduler.QueueWorkItem does not starve this timer)
return SafeTimerBase.CheckTimerDelay(previousTickTime, totalNumTicks,
dueTime, timerFrequency, logger, GetFullName, ErrorCode.Timer_TimerInsideGrainIsNotTicking, true);
if (logger.IsEnabled(LogLevel.Trace))
{
logger.LogTrace((int)ErrorCode.TimerBeforeCallback, "About to make timer callback for timer {TimerName}", GetDiagnosticName());
}

await asyncCallback(state);

if (logger.IsEnabled(LogLevel.Trace))
{
logger.LogTrace((int)ErrorCode.TimerAfterCallback, "Completed timer callback for timer {TimerName}", GetDiagnosticName());
}
}

public bool CheckTimerDelay()
public Task GetCurrentlyExecutingTickTask() => currentlyExecutingTickTask ?? Task.CompletedTask;

private string GetDiagnosticName() => name switch
{
return SafeTimerBase.CheckTimerDelay(previousTickTime, totalNumTicks,
dueTime, timerFrequency, logger, GetFullName, ErrorCode.Timer_TimerInsideGrainIsNotTicking, false);
}
{ Length: > 0 } => $"GrainTimer.{name} TimerCallbackHandler:{asyncCallback?.Target}->{asyncCallback?.Method}",
_ => $"GrainTimer TimerCallbackHandler:{asyncCallback?.Target}->{asyncCallback?.Method}"
};

public void Dispose()
{
DisposeTimer();
asyncCallback = null;
}

private void DisposeTimer()
{
var tmp = timer;
if (tmp == null) return;

Utils.SafeExecute(tmp.Dispose);
timer = null;
lock (this.currentlyExecutingTickTaskLock)
var asyncTimer = Interlocked.CompareExchange(ref timer, null, timer);
if (asyncTimer == null)
{
asyncCallback = null;
return;
}

grainContext?.GetComponent<IGrainTimerRegistry>().OnTimerDisposed(this);
try
{
asyncTimer.Dispose();
}
catch (Exception ex)
{
logger.LogError(ex, "Error disposing timer {TimerName}", GetDiagnosticName());
}

grainContext.GetComponent<IGrainTimerRegistry>()?.OnTimerDisposed(this);
}
}
}
2 changes: 1 addition & 1 deletion src/Orleans.Runtime/Timers/TimerRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public TimerRegistry(ILoggerFactory loggerFactory)

public IDisposable RegisterTimer(IGrainContext grainContext, Func<object, Task> asyncCallback, object state, TimeSpan dueTime, TimeSpan period)
{
var timer = GrainTimer.FromTaskCallback(this.timerLogger, asyncCallback, state, dueTime, period, grainContext: grainContext);
var timer = GrainTimer.FromTaskCallback(this.timerLogger, asyncCallback, state, dueTime, period, name: string.Empty, grainContext: grainContext);
grainContext?.GetComponent<IGrainTimerRegistry>().OnTimerCreated(timer);
timer.Start();
return timer;
Expand Down
Loading