From f44700c352e7420b80e374b045c30428a6576bdc Mon Sep 17 00:00:00 2001 From: James Elliott Date: Mon, 20 Feb 2023 16:37:22 +1100 Subject: [PATCH] fix(commands): internal services not cleaned up properly (#4966) This fixes a race condition which in some circumstances (seemed to only affect a deliberately under provisioned VM in testing, however it could still theoretically occur on any system) can cause the process to hang during a shutdown. While unrelated this also adds additional trace logging to the shutdown process to better capture each stage to better facilitate debugging in the future specifically when one particular service is taking time to stop. Fixes #4963 --- internal/commands/const.go | 9 +++ internal/commands/services.go | 106 +++++++++++++++++++++++++-------- internal/suites/environment.go | 2 +- 3 files changed, 92 insertions(+), 25 deletions(-) diff --git a/internal/commands/const.go b/internal/commands/const.go index 1e0f8e320..d3a03e8f5 100644 --- a/internal/commands/const.go +++ b/internal/commands/const.go @@ -777,3 +777,12 @@ Layouts: const ( fmtLogServerListening = "Server is listening for %s connections on '%s' path '%s'" ) + +const ( + logFieldService = "service" + logFieldFile = "file" + logFieldOP = "op" + + serviceTypeServer = "server" + serviceTypeWatcher = "watcher" +) diff --git a/internal/commands/services.go b/internal/commands/services.go index 3d5223ed8..10eef7e7d 100644 --- a/internal/commands/services.go +++ b/internal/commands/services.go @@ -10,6 +10,7 @@ import ( "strings" "sync" "syscall" + "time" "github.com/fsnotify/fsnotify" "github.com/sirupsen/logrus" @@ -23,11 +24,12 @@ import ( // NewServerService creates a new ServerService with the appropriate logger etc. func NewServerService(name string, server *fasthttp.Server, listener net.Listener, paths []string, isTLS bool, log *logrus.Logger) (service *ServerService) { return &ServerService{ + name: name, server: server, listener: listener, paths: paths, isTLS: isTLS, - log: log.WithFields(map[string]any{"service": "server", "server": name}), + log: log.WithFields(map[string]any{logFieldService: serviceTypeServer, serviceTypeServer: name}), } } @@ -53,10 +55,11 @@ func NewFileWatcherService(name, path string, reload ProviderReload, log *logrus return nil, err } - entry := log.WithFields(map[string]any{"service": "watcher", "watcher": name}) + entry := log.WithFields(map[string]any{logFieldService: serviceTypeWatcher, serviceTypeWatcher: name}) if info.IsDir() { service = &FileWatcherService{ + name: name, watcher: watcher, reload: reload, log: entry, @@ -64,6 +67,7 @@ func NewFileWatcherService(name, path string, reload ProviderReload, log *logrus } } else { service = &FileWatcherService{ + name: name, watcher: watcher, reload: reload, log: entry, @@ -86,12 +90,25 @@ type ProviderReload interface { // Service represents the required methods to support handling a service. type Service interface { + // ServiceType returns the type name for the Service. + ServiceType() string + + // ServiceName returns the individual name for the Service. + ServiceName() string + + // Run performs the running operations for the Service. Run() (err error) + + // Shutdown perform the shutdown cleanup and termination operations for the Service. Shutdown() + + // Log returns the logger configured for the service. + Log() *logrus.Entry } // ServerService is a Service which runs a webserver. type ServerService struct { + name string server *fasthttp.Server paths []string isTLS bool @@ -99,6 +116,16 @@ type ServerService struct { log *logrus.Entry } +// ServiceType returns the service type for this service, which is always 'server'. +func (service *ServerService) ServiceType() string { + return serviceTypeServer +} + +// ServiceName returns the individual name for this service. +func (service *ServerService) ServiceName() string { + return service.name +} + // Run the ServerService. func (service *ServerService) Run() (err error) { defer func() { @@ -120,13 +147,24 @@ func (service *ServerService) Run() (err error) { // Shutdown the ServerService. func (service *ServerService) Shutdown() { - if err := service.server.Shutdown(); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + + defer cancel() + + if err := service.server.ShutdownWithContext(ctx); err != nil { service.log.WithError(err).Error("Error occurred during shutdown") } } +// Log returns the *logrus.Entry of the ServerService. +func (service *ServerService) Log() *logrus.Entry { + return service.log +} + // FileWatcherService is a Service that watches files for changes. type FileWatcherService struct { + name string + watcher *fsnotify.Watcher reload ProviderReload @@ -135,6 +173,16 @@ type FileWatcherService struct { directory string } +// ServiceType returns the service type for this service, which is always 'watcher'. +func (service *FileWatcherService) ServiceType() string { + return serviceTypeWatcher +} + +// ServiceName returns the individual name for this service. +func (service *FileWatcherService) ServiceName() string { + return service.name +} + // Run the FileWatcherService. func (service *FileWatcherService) Run() (err error) { defer func() { @@ -143,7 +191,7 @@ func (service *FileWatcherService) Run() (err error) { } }() - service.log.WithField("file", filepath.Join(service.directory, service.file)).Info("Watching for file changes to the file") + service.log.WithField(logFieldFile, filepath.Join(service.directory, service.file)).Info("Watching file for changes") for { select { @@ -152,34 +200,36 @@ func (service *FileWatcherService) Run() (err error) { return nil } + log := service.log.WithFields(map[string]any{logFieldFile: event.Name, logFieldOP: event.Op}) + if service.file != "" && service.file != filepath.Base(event.Name) { - service.log.WithFields(map[string]any{"file": event.Name, "op": event.Op}).Tracef("File modification detected to irrelevant file") + log.Trace("File modification detected to irrelevant file") break } switch { case event.Op&fsnotify.Write == fsnotify.Write, event.Op&fsnotify.Create == fsnotify.Create: - service.log.WithFields(map[string]any{"file": event.Name, "op": event.Op}).Debug("File modification was detected") + log.Debug("File modification was detected") var reloaded bool switch reloaded, err = service.reload.Reload(); { case err != nil: - service.log.WithFields(map[string]any{"file": event.Name, "op": event.Op}).WithError(err).Error("Error occurred during reload") + log.WithError(err).Error("Error occurred during reload") case reloaded: - service.log.WithField("file", event.Name).Info("Reloaded successfully") + log.Info("Reloaded successfully") default: - service.log.WithField("file", event.Name).Debug("Reload of was triggered but it was skipped") + log.Debug("Reload was triggered but it was skipped") } case event.Op&fsnotify.Remove == fsnotify.Remove: - service.log.WithFields(map[string]any{"file": event.Name, "op": event.Op}).Debug("File remove was detected") + log.Debug("File remove was detected") } case err, ok := <-service.watcher.Errors: if !ok { return nil } - service.log.WithError(err).Errorf("Error while watching files") + service.log.WithError(err).Error("Error while watching file for changes") } } } @@ -191,6 +241,11 @@ func (service *FileWatcherService) Shutdown() { } } +// Log returns the *logrus.Entry of the FileWatcherService. +func (service *FileWatcherService) Log() *logrus.Entry { + return service.log +} + func svcSvrMainFunc(ctx *CmdCtx) (service Service) { switch svr, listener, paths, isTLS, err := server.CreateDefaultServer(ctx.config, ctx.providers); { case err != nil: @@ -267,34 +322,35 @@ func servicesRun(ctx *CmdCtx) { } } - ctx.log.Info("Startup Complete") + ctx.log.Info("Startup complete") select { case s := <-quit: - switch s { - case syscall.SIGINT: - ctx.log.WithField("signal", "SIGINT").Debugf("Shutdown started due to signal") - case syscall.SIGTERM: - ctx.log.WithField("signal", "SIGTERM").Debugf("Shutdown started due to signal") - } + ctx.log.WithField("signal", s.String()).Debug("Shutdown initiated due to process signal") case <-cctx.Done(): - ctx.log.Debugf("Shutdown started due to context completion") + ctx.log.Debug("Shutdown initiated due to context completion") } cancel() - ctx.log.Infof("Shutting down") + ctx.log.Info("Shutdown initiated") wgShutdown := &sync.WaitGroup{} + ctx.log.Tracef("Shutdown of %d services is required", len(services)) + for _, service := range services { - go func() { + wgShutdown.Add(1) + + go func(service Service) { + service.Log().Trace("Shutdown of service initiated") + service.Shutdown() wgShutdown.Done() - }() - wgShutdown.Add(1) + service.Log().Trace("Shutdown of service complete") + }(service) } wgShutdown.Wait() @@ -306,6 +362,8 @@ func servicesRun(ctx *CmdCtx) { } if err = group.Wait(); err != nil { - ctx.log.WithError(err).Errorf("Error occurred waiting for shutdown") + ctx.log.WithError(err).Error("Error occurred waiting for shutdown") } + + ctx.log.Info("Shutdown complete") } diff --git a/internal/suites/environment.go b/internal/suites/environment.go index 0c79b898d..1149ac9f0 100644 --- a/internal/suites/environment.go +++ b/internal/suites/environment.go @@ -42,7 +42,7 @@ func waitUntilAutheliaBackendIsReady(dockerEnvironment *DockerEnvironment) error 90*time.Second, dockerEnvironment, "authelia-backend", - []string{"Startup Complete"}) + []string{"Startup complete"}) } func waitUntilAutheliaFrontendIsReady(dockerEnvironment *DockerEnvironment) error {