From 15f1c5445f606a45446af387d22c21b2b033d284 Mon Sep 17 00:00:00 2001 From: rmanach Date: Wed, 30 Apr 2025 13:46:13 +0200 Subject: [PATCH] add command to check deployment state --- deployers/commons.go | 14 +++++++ deployers/swarm.go | 93 ++++++++++++++++++++++++++++++++++++++++++-- docker/client.go | 42 ++++++++++++++++---- docker/models.go | 4 +- main.go | 9 ++++- 5 files changed, 147 insertions(+), 15 deletions(-) diff --git a/deployers/commons.go b/deployers/commons.go index 08d051b..5907b49 100644 --- a/deployers/commons.go +++ b/deployers/commons.go @@ -29,8 +29,22 @@ const ( Swarm DeployerType = "swarm" GracefulTimeout = 10 * time.Second + + DefaultStateTimeout = 30 * time.Second ) +type checkStateOption struct { + timeout *time.Duration +} + +type fnStateOption func(c *checkStateOption) + +func WithTimeout(duration time.Duration) fnStateOption { + return func(c *checkStateOption) { + c.timeout = &duration + } +} + // Base struct of the deployers. // It handles the main informations to build a deployer. // diff --git a/deployers/swarm.go b/deployers/swarm.go index 8d9cccd..a44ebf9 100644 --- a/deployers/swarm.go +++ b/deployers/swarm.go @@ -6,6 +6,8 @@ import ( "fmt" "os" "path/filepath" + "sync" + "time" "gitea.thegux.fr/hmdeploy/connection" "gitea.thegux.fr/hmdeploy/docker" @@ -14,13 +16,16 @@ import ( "github.com/rs/zerolog/log" ) +const stateTickDuration = 4 * time.Second + var ErrSwarmDeployerNoArchive = errors.New("no archive found to be deployed") // SwarmDeployer handles the deployment of a Docker service on the swarm instance. type SwarmDeployer struct { *deployer conn connection.IConnection - dcli docker.IClient + dloc docker.IClient + drem *docker.RemoteClient archivePath string } @@ -30,7 +35,8 @@ func NewSwarmDeployer( ctx context.Context, project *models.Project, netInfo *models.HMNetInfo, - dockerClient docker.IClient, + dloc docker.IClient, + drem *docker.RemoteClient, ) (SwarmDeployer, error) { var sd SwarmDeployer @@ -45,7 +51,8 @@ func NewSwarmDeployer( } sd.conn = &conn - sd.dcli = dockerClient + sd.dloc = dloc + sd.drem = drem sd.deployer = newDeployer(ctx, Swarm, project) return sd, nil @@ -100,7 +107,7 @@ func (sd *SwarmDeployer) Build() error { filesToArchive := []string{} for idx := range sd.project.ImageNames { - tarFile, err := sd.dcli.Save(sd.project.ImageNames[idx], sd.project.Dir) + tarFile, err := sd.dloc.Save(sd.project.ImageNames[idx], sd.project.Dir) if err != nil { sd.setDone(err) return err @@ -189,12 +196,85 @@ func (sd *SwarmDeployer) Deploy() error { return err } + if err := sd.checkState(docker.Running); err != nil { + sd.setDone(err) + return err + } + log.Info().Msg("swarm deployment done with success") sd.setDone(nil) return nil } +// checkState checks the state of the deployment. +// It loops over all the services deployed for the project (replicas included) and +// checks if the `target` state match the services states. +// +// There's a timeout (default: 30s) that you can set with the options: `WithTimeout`. +func (sd *SwarmDeployer) checkState(target docker.ServiceStatus, options ...fnStateOption) error { + var opts checkStateOption + for _, opt := range options { + opt(&opts) + } + + var checkErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + + timeoutDuration := DefaultStateTimeout + if opts.timeout != nil { + timeoutDuration = *opts.timeout + } + + ticker := time.NewTicker(stateTickDuration) + ctx, fnCancel := context.WithDeadline(sd.ctx, time.Now().UTC().Add(timeoutDuration)) + defer fnCancel() + + for { + select { + case <-ticker.C: + log.Info(). + Str("project", sd.project.Name). + Str("state", string(target)). + Msg("checking project state...") + srvs, err := sd.drem.ExtractServicesDetails(docker.WithName(sd.project.Name)) + if err != nil { + checkErr = err + return + } + + ready := true + mainloop: + for idx := range srvs { + for idy := range srvs[idx].Replicas { + if srvs[idx].Replicas[idy].State != docker.ServiceStatus(target) { + log.Info().Dur("retry (ms)", stateTickDuration).Msg("project not in good state yet, retrying...") + ready = false + break mainloop + } + } + } + if ready { + return + } + case <-ctx.Done(): + msg := "swarm deployment skipped" + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + msg = "swarm check state timeout" + } + checkErr = fmt.Errorf("%w, %s", ErrContextDone, msg) + return + } + } + }() + + wg.Wait() + return checkErr +} + func (sd *SwarmDeployer) Destroy() error { sd.processing.Store(true) defer sd.processing.Store(false) @@ -205,6 +285,11 @@ func (sd *SwarmDeployer) Destroy() error { return err } + if err := sd.checkState(docker.Shutdown); err != nil { + sd.setDone(err) + return err + } + log.Info().Msg("swarm undeployment done with success") sd.setDone(nil) diff --git a/docker/client.go b/docker/client.go index 7b5a7d2..844535a 100644 --- a/docker/client.go +++ b/docker/client.go @@ -105,8 +105,25 @@ func NewRemoteClient(netInfo *models.HMNetInfo) (RemoteClient, error) { return rc, nil } -func (c *RemoteClient) getIDS() ([]string, error) { - output, err := c.conn.Execute("docker service ls -q") +type extractOption struct { + filter string +} + +type fnExtractOption func(*extractOption) + +func WithName(name string) fnExtractOption { + return func(o *extractOption) { + o.filter = name + } +} + +func (c *RemoteClient) getIDS(name string) ([]string, error) { + cmd := "docker service ls -q" + if name != "" { + cmd += " --filter name=" + name + } + + output, err := c.conn.Execute(cmd) if err != nil { return nil, err } @@ -134,12 +151,7 @@ func (c *RemoteClient) getServiceDetails(id string) (Service, error) { return sc, nil } -func (c *RemoteClient) ExtractServicesDetails() (Services, error) { - ids, err := c.getIDS() - if err != nil { - return nil, err - } - +func (c *RemoteClient) extractServicesDetails(ids ...string) (Services, error) { services := Services{} for _, id := range ids { srv, err := c.getServiceDetails(id) @@ -151,3 +163,17 @@ func (c *RemoteClient) ExtractServicesDetails() (Services, error) { return services, nil } + +func (c *RemoteClient) ExtractServicesDetails(options ...fnExtractOption) (Services, error) { + var opts extractOption + for _, opt := range options { + opt(&opts) + } + + ids, err := c.getIDS(opts.filter) + if err != nil { + return nil, err + } + + return c.extractServicesDetails(ids...) +} diff --git a/docker/models.go b/docker/models.go index 0feebdc..04e6e7a 100644 --- a/docker/models.go +++ b/docker/models.go @@ -6,6 +6,8 @@ import ( "strconv" "strings" "time" + + "github.com/rs/zerolog/log" ) const nbImageParts = 2 @@ -170,7 +172,7 @@ func (s *Service) UnmarshalJSON(data []byte) error { nbReplicas := ci.Details[0].Spec.Mode.Replicated.Replicas if len(ci.States) < nbReplicas { - return fmt.Errorf("must have %d replicas but have %d", nbReplicas, len(ci.States)) + log.Warn().Msg(fmt.Sprintf("must have %d replicas but have %d", nbReplicas, len(ci.States))) } networks := []string{} diff --git a/main.go b/main.go index 1eef239..5a0dc75 100644 --- a/main.go +++ b/main.go @@ -208,8 +208,13 @@ func initDeployers( return deps, fmt.Errorf("%w, swarm net info does not exist", ErrNetInfoNotFound) } - dcli := docker.NewLocalClient() - sd, err := deployers.NewSwarmDeployer(ctx, project, swarmNet, &dcli) + dloc := docker.NewLocalClient() + drem, err := docker.NewRemoteClient(swarmNet) + if err != nil { + return deps, err + } + + sd, err := deployers.NewSwarmDeployer(ctx, project, swarmNet, &dloc, &drem) if err != nil { return deps, fmt.Errorf("%w, unable to init swarm deployer, err=%v", ErrDeployerInit, err) }