Skip to content

More pulse sanity checking #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 50 additions & 27 deletions internal/mon/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@ import (
const sampleWindowSize = 60

type Monitor struct {
samples *sampleWindow
lastSampleTime time.Time
leapSecond ptime.LeapSecond
servo Servo // never nil
lg *slog.Logger
gm *Grandmaster // maybe nil
rc *ProxyRefClock // maybe nil
inSync bool
lastRefTime ptime.Time
ppsStopped bool
sseCh chan<- sse.Event
stats stats
lf logfile.LogFile
samples *sampleWindow
lastSampleTime time.Time
leapSecond ptime.LeapSecond
servo Servo // never nil
lg *slog.Logger
gm *Grandmaster // maybe nil
rc *ProxyRefClock // maybe nil
inSync bool
lastRefTime ptime.Time
lastSyncRefTime ptime.Time
ppsStopped bool
sseCh chan<- sse.Event
stats stats
lf logfile.LogFile
}

type stats struct {
Expand Down Expand Up @@ -91,23 +92,27 @@ func (mon *Monitor) ReopenLog() {

func (mon *Monitor) Sample(ref ptime.Time, local ptime.ClockTime, delayed bool) {
mon.addMissingOffsets(ref)
off := local.T.Sub(ref)
kind := sampleOK
if !delayed && mon.isOutlier(off, local.Era) {
kind = sampleOutlier
if !delayed && mon.isInvalid(ref, local) {
kind = sampleInvalid
} else {
mon.servo.Sample(ref, local, delayed)
}
freq := mon.servo.FreqOffset()
off := local.T.Sub(ref)
mon.recordSample(kind, off, local.Era, freq)
mon.writeLogEntry(kind, ref, off, local.Era, freq)
mon.sendEvent(kind, off, local.Era, freq)
if delayed {
return
}
inSync := mon.isInSync()
now := time.Now()
mon.lastSampleTime = now
if kind == sampleOK {
mon.lastSampleTime = time.Now()
if inSync {
mon.lastSyncRefTime = ref
}
}
if mon.ppsStopped {
mon.lg.Warn("1PPS signal restored")
mon.ppsStopped = false
Expand Down Expand Up @@ -151,7 +156,7 @@ func (mon *Monitor) recordSample(kind sampleKind, off time.Duration, era ptime.E
mon.lg.Info("missed 1PPS sample")
return
}
if kind == sampleOutlier {
if kind == sampleInvalid {
mon.lg.Info("outlier sample", "off", off, "freq", freq)
return
}
Expand All @@ -172,7 +177,7 @@ func (mon *Monitor) writeLogEntry(kind sampleKind, ref ptime.Time, off time.Dura
return
}
outlierFlag := 0
if kind == sampleOutlier {
if kind == sampleInvalid {
outlierFlag = 1
}
// Stable32 treats 0 as meaning a gap, so we output 1e-99 for 0.
Expand Down Expand Up @@ -209,7 +214,7 @@ func (mon *Monitor) sendEvent(kind sampleKind, off time.Duration, era ptime.Era,
StepCount: uint32(stepCount),
StepCountChanging: changing,
Freq: freq,
Outlier: kind == sampleOutlier,
Outlier: kind == sampleInvalid,
})
if err != nil {
mon.lg.Error("error creating sample event", "err", err)
Expand Down Expand Up @@ -257,20 +262,38 @@ func (mon *Monitor) isInSync() bool {
return mon.samples.isInSync(mon.inSync, &defaultSampleConfig)
}

func (mon *Monitor) isOutlier(off time.Duration, era ptime.Era) bool {
offSecs := off.Seconds()
// maxValidDriftPPM is the maximum drift in PPM before considering sample invalid
// This should kick in if there is some sort of hardware problem giving us crazy offsets
const maxValidDriftPPM = 50

func (mon *Monitor) isInvalid(ref ptime.Time, local ptime.ClockTime) bool {
off := local.T.Sub(ref).Seconds()

// if this offset isn't bad enough to take use out of sync,
// then there's no need to consider it as an outlier
// this should be a quick check that succeeds most of the time
if math.Abs(offSecs) <= defaultSampleConfig.maxOffset {
if math.Abs(off) <= defaultSampleConfig.maxOffset {
return false
}
// don't do outlier detection unless we are using the PI controller
if !mon.servo.Locked(era) {
if !mon.servo.Locked(local.Era) {
return false
}
return mon.samples.madIsOutlier(offSecs, &defaultSampleConfig)
return mon.isInsane(off, ref) || mon.samples.madIsOutlier(off, &defaultSampleConfig)
}

func (mon *Monitor) isInsane(off float64, ref ptime.Time) bool {
if mon.lastSyncRefTime.IsZero() {
return false
}
if math.Abs(mon.samples.last(0).off - off) < defaultSampleConfig.maxOffset {
return false
}
syncDiff := ref.Sub(mon.lastSyncRefTime).Seconds()
if math.Abs(off) > syncDiff*(maxValidDriftPPM/1e6) {
return true
}
return false
}

func (mon *Monitor) updateInSync(inSync bool) {
Expand Down Expand Up @@ -342,7 +365,7 @@ func (a *accumPhase) add(kind sampleKind, v float64) {
a.maxAbs = math.Max(a.maxAbs, av)
case sampleMissing:
a.nMissing++
case sampleOutlier:
case sampleInvalid:
a.nOutliers++
}
}
Expand Down
2 changes: 1 addition & 1 deletion internal/mon/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ type sampleKind int
const (
sampleMissing sampleKind = iota
sampleOK
sampleOutlier
sampleInvalid
)

type sampleData struct {
Expand Down
16 changes: 8 additions & 8 deletions internal/mon/sample_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ var inSyncTests = [][]sampleData{
sampleData{-19e-9, sampleOK},
sampleData{12e-9, sampleOK},
sampleData{-4e-9, sampleOK},
sampleData{-1e6, sampleOutlier},
sampleData{-1e6, sampleInvalid},
sampleData{0, sampleMissing},
sampleData{1e3, sampleOutlier},
sampleData{1e3, sampleInvalid},
sampleData{10e-9, sampleOK},
sampleData{-4e-9, sampleOK},
},
Expand All @@ -36,9 +36,9 @@ var inSyncTests = [][]sampleData{
sampleData{49e-9, sampleOK},
sampleData{3e-9, sampleOK},
sampleData{-4e-9, sampleOK},
sampleData{-1e6, sampleOutlier},
sampleData{-1e6, sampleInvalid},
sampleData{0, sampleMissing},
sampleData{1e3, sampleOutlier},
sampleData{1e3, sampleInvalid},
sampleData{10e-9, sampleOK},
sampleData{-4e-9, sampleOK},
sampleData{0, sampleMissing},
Expand All @@ -48,8 +48,8 @@ var inSyncTests = [][]sampleData{
sampleData{20e-9, sampleOK},
sampleData{3e-9, sampleOK},
sampleData{-4e-9, sampleOK},
sampleData{-2e6, sampleOutlier},
sampleData{-2e6, sampleOutlier},
sampleData{-2e6, sampleInvalid},
sampleData{-2e6, sampleInvalid},
sampleData{19e-9, sampleOK},
sampleData{-2e-9, sampleOK},
},
Expand All @@ -60,9 +60,9 @@ func TestInSync(t *testing.T) {
inSync := false
w := newSampleWindow(sampleWindowSize)
for j, s := range test {
if s.kind != sampleMissing && w.madIsOutlier(s.off, &defaultSampleConfig) != (s.kind == sampleOutlier) {
if s.kind != sampleMissing && w.madIsOutlier(s.off, &defaultSampleConfig) != (s.kind == sampleInvalid) {
n, min, max := w.mad(defaultSampleConfig.madMultiple)
t.Errorf("Test %d, sample %d, expected madIsOutlier == %v (n = %d, min = %v, max = %v)", i, j, s.kind == sampleOutlier, n, min, max)
t.Errorf("Test %d, sample %d, expected madIsOutlier == %v (n = %d, min = %v, max = %v)", i, j, s.kind == sampleInvalid, n, min, max)
}
w.append(s.kind, s.off, 1)
inSync = w.isInSync(inSync, &defaultSampleConfig)
Expand Down