From 7da62010183ff343d7fadd72a2fb12ab5dea9733 Mon Sep 17 00:00:00 2001 From: Benjamin Wenger <ben@elixxir.ioo> Date: Thu, 6 May 2021 17:36:01 -0700 Subject: [PATCH] added a hack to fix stop network follower crashes --- interfaces/IsRunning.go | 8 ++++++++ network/follow.go | 17 ++++++++++++++--- network/manager.go | 2 +- stoppable/single.go | 2 +- 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 interfaces/IsRunning.go diff --git a/interfaces/IsRunning.go b/interfaces/IsRunning.go new file mode 100644 index 000000000..950b84286 --- /dev/null +++ b/interfaces/IsRunning.go @@ -0,0 +1,8 @@ +package interfaces + +// this interface is used to allow the follower to to be stopped later if it +// fails + +type Running interface{ + IsRunning()bool +} diff --git a/network/follow.go b/network/follow.go index 83e82ec0b..71eee53ed 100644 --- a/network/follow.go +++ b/network/follow.go @@ -46,7 +46,7 @@ type followNetworkComms interface { // followNetwork polls the network to get updated on the state of nodes, the // round status, and informs the client when messages can be retrieved. -func (m *manager) followNetwork(report interfaces.ClientErrorReport, quitCh <-chan struct{}) { +func (m *manager) followNetwork(report interfaces.ClientErrorReport, quitCh <-chan struct{}, isRunning interfaces.Running) { ticker := time.NewTicker(m.param.TrackNetworkPeriod) TrackTicker := time.NewTicker(debugTrackPeriod) rng := m.Rng.GetStream() @@ -58,17 +58,23 @@ func (m *manager) followNetwork(report interfaces.ClientErrorReport, quitCh <-ch rng.Close() done = true case <-ticker.C: - m.follow(report, rng, m.Comms) + m.follow(report, rng, m.Comms, isRunning) case <-TrackTicker.C: numPolls := atomic.SwapUint64(m.tracker, 0) jww.INFO.Printf("Polled the network %d times in the "+ "last %s", numPolls, debugTrackPeriod) } + if !isRunning.IsRunning(){ + jww.ERROR.Printf("Killing network follower " + + "due to failed exit") + return + } } } // executes each iteration of the follower -func (m *manager) follow(report interfaces.ClientErrorReport, rng csprng.Source, comms followNetworkComms) { +func (m *manager) follow(report interfaces.ClientErrorReport, rng csprng.Source, + comms followNetworkComms, isRunning interfaces.Running) { //get the identity we will poll for identity, err := m.Session.Reception().GetIdentity(rng) @@ -100,6 +106,11 @@ func (m *manager) follow(report interfaces.ClientErrorReport, rng csprng.Source, identity.EndRequest, identity.EndRequest.Sub(identity.StartRequest), host.GetId()) return comms.SendPoll(host, &pollReq) }) + if !isRunning.IsRunning(){ + jww.ERROR.Printf("Killing network follower " + + "due to failed exit") + return + } if err != nil { if report != nil { report( diff --git a/network/manager.go b/network/manager.go index 31adb4447..1cf065d69 100644 --- a/network/manager.go +++ b/network/manager.go @@ -130,7 +130,7 @@ func (m *manager) Follow(report interfaces.ClientErrorReport) (stoppable.Stoppab // Start the Network Tracker trackNetworkStopper := stoppable.NewSingle("TrackNetwork") - go m.followNetwork(report, trackNetworkStopper.Quit()) + go m.followNetwork(report, trackNetworkStopper.Quit(), trackNetworkStopper) multi.Add(trackNetworkStopper) // Message reception diff --git a/stoppable/single.go b/stoppable/single.go index 8821db88e..2e8fa78a2 100644 --- a/stoppable/single.go +++ b/stoppable/single.go @@ -52,7 +52,6 @@ func (s *Single) Name() string { func (s *Single) Close(timeout time.Duration) error { var err error s.once.Do(func() { - atomic.StoreUint32(&s.running, 0) timer := time.NewTimer(timeout) select { case <-timer.C: @@ -61,6 +60,7 @@ func (s *Single) Close(timeout time.Duration) error { err = errors.Errorf("%s failed to close", s.name) case s.quit <- struct{}{}: } + atomic.StoreUint32(&s.running, 0) }) return err } -- GitLab