add command to check deployment state

This commit is contained in:
rmanach 2025-04-30 13:46:13 +02:00
parent 57834a1b3c
commit 15f1c5445f
5 changed files with 147 additions and 15 deletions

View File

@ -29,8 +29,22 @@ const (
Swarm DeployerType = "swarm" Swarm DeployerType = "swarm"
GracefulTimeout = 10 * time.Second GracefulTimeout = 10 * time.Second
DefaultStateTimeout = 30 * time.Second
) )
type checkStateOption struct {
timeout *time.Duration
}
type fnStateOption func(c *checkStateOption)
func WithTimeout(duration time.Duration) fnStateOption {
return func(c *checkStateOption) {
c.timeout = &duration
}
}
// Base struct of the deployers. // Base struct of the deployers.
// It handles the main informations to build a deployer. // It handles the main informations to build a deployer.
// //

View File

@ -6,6 +6,8 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"sync"
"time"
"gitea.thegux.fr/hmdeploy/connection" "gitea.thegux.fr/hmdeploy/connection"
"gitea.thegux.fr/hmdeploy/docker" "gitea.thegux.fr/hmdeploy/docker"
@ -14,13 +16,16 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
const stateTickDuration = 4 * time.Second
var ErrSwarmDeployerNoArchive = errors.New("no archive found to be deployed") var ErrSwarmDeployerNoArchive = errors.New("no archive found to be deployed")
// SwarmDeployer handles the deployment of a Docker service on the swarm instance. // SwarmDeployer handles the deployment of a Docker service on the swarm instance.
type SwarmDeployer struct { type SwarmDeployer struct {
*deployer *deployer
conn connection.IConnection conn connection.IConnection
dcli docker.IClient dloc docker.IClient
drem *docker.RemoteClient
archivePath string archivePath string
} }
@ -30,7 +35,8 @@ func NewSwarmDeployer(
ctx context.Context, ctx context.Context,
project *models.Project, project *models.Project,
netInfo *models.HMNetInfo, netInfo *models.HMNetInfo,
dockerClient docker.IClient, dloc docker.IClient,
drem *docker.RemoteClient,
) (SwarmDeployer, error) { ) (SwarmDeployer, error) {
var sd SwarmDeployer var sd SwarmDeployer
@ -45,7 +51,8 @@ func NewSwarmDeployer(
} }
sd.conn = &conn sd.conn = &conn
sd.dcli = dockerClient sd.dloc = dloc
sd.drem = drem
sd.deployer = newDeployer(ctx, Swarm, project) sd.deployer = newDeployer(ctx, Swarm, project)
return sd, nil return sd, nil
@ -100,7 +107,7 @@ func (sd *SwarmDeployer) Build() error {
filesToArchive := []string{} filesToArchive := []string{}
for idx := range sd.project.ImageNames { for idx := range sd.project.ImageNames {
tarFile, err := sd.dcli.Save(sd.project.ImageNames[idx], sd.project.Dir) tarFile, err := sd.dloc.Save(sd.project.ImageNames[idx], sd.project.Dir)
if err != nil { if err != nil {
sd.setDone(err) sd.setDone(err)
return err return err
@ -189,12 +196,85 @@ func (sd *SwarmDeployer) Deploy() error {
return err return err
} }
if err := sd.checkState(docker.Running); err != nil {
sd.setDone(err)
return err
}
log.Info().Msg("swarm deployment done with success") log.Info().Msg("swarm deployment done with success")
sd.setDone(nil) sd.setDone(nil)
return nil return nil
} }
// checkState checks the state of the deployment.
// It loops over all the services deployed for the project (replicas included) and
// checks if the `target` state match the services states.
//
// There's a timeout (default: 30s) that you can set with the options: `WithTimeout`.
func (sd *SwarmDeployer) checkState(target docker.ServiceStatus, options ...fnStateOption) error {
var opts checkStateOption
for _, opt := range options {
opt(&opts)
}
var checkErr error
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
timeoutDuration := DefaultStateTimeout
if opts.timeout != nil {
timeoutDuration = *opts.timeout
}
ticker := time.NewTicker(stateTickDuration)
ctx, fnCancel := context.WithDeadline(sd.ctx, time.Now().UTC().Add(timeoutDuration))
defer fnCancel()
for {
select {
case <-ticker.C:
log.Info().
Str("project", sd.project.Name).
Str("state", string(target)).
Msg("checking project state...")
srvs, err := sd.drem.ExtractServicesDetails(docker.WithName(sd.project.Name))
if err != nil {
checkErr = err
return
}
ready := true
mainloop:
for idx := range srvs {
for idy := range srvs[idx].Replicas {
if srvs[idx].Replicas[idy].State != docker.ServiceStatus(target) {
log.Info().Dur("retry (ms)", stateTickDuration).Msg("project not in good state yet, retrying...")
ready = false
break mainloop
}
}
}
if ready {
return
}
case <-ctx.Done():
msg := "swarm deployment skipped"
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
msg = "swarm check state timeout"
}
checkErr = fmt.Errorf("%w, %s", ErrContextDone, msg)
return
}
}
}()
wg.Wait()
return checkErr
}
func (sd *SwarmDeployer) Destroy() error { func (sd *SwarmDeployer) Destroy() error {
sd.processing.Store(true) sd.processing.Store(true)
defer sd.processing.Store(false) defer sd.processing.Store(false)
@ -205,6 +285,11 @@ func (sd *SwarmDeployer) Destroy() error {
return err return err
} }
if err := sd.checkState(docker.Shutdown); err != nil {
sd.setDone(err)
return err
}
log.Info().Msg("swarm undeployment done with success") log.Info().Msg("swarm undeployment done with success")
sd.setDone(nil) sd.setDone(nil)

View File

@ -105,8 +105,25 @@ func NewRemoteClient(netInfo *models.HMNetInfo) (RemoteClient, error) {
return rc, nil return rc, nil
} }
func (c *RemoteClient) getIDS() ([]string, error) { type extractOption struct {
output, err := c.conn.Execute("docker service ls -q") filter string
}
type fnExtractOption func(*extractOption)
func WithName(name string) fnExtractOption {
return func(o *extractOption) {
o.filter = name
}
}
func (c *RemoteClient) getIDS(name string) ([]string, error) {
cmd := "docker service ls -q"
if name != "" {
cmd += " --filter name=" + name
}
output, err := c.conn.Execute(cmd)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -134,12 +151,7 @@ func (c *RemoteClient) getServiceDetails(id string) (Service, error) {
return sc, nil return sc, nil
} }
func (c *RemoteClient) ExtractServicesDetails() (Services, error) { func (c *RemoteClient) extractServicesDetails(ids ...string) (Services, error) {
ids, err := c.getIDS()
if err != nil {
return nil, err
}
services := Services{} services := Services{}
for _, id := range ids { for _, id := range ids {
srv, err := c.getServiceDetails(id) srv, err := c.getServiceDetails(id)
@ -151,3 +163,17 @@ func (c *RemoteClient) ExtractServicesDetails() (Services, error) {
return services, nil return services, nil
} }
func (c *RemoteClient) ExtractServicesDetails(options ...fnExtractOption) (Services, error) {
var opts extractOption
for _, opt := range options {
opt(&opts)
}
ids, err := c.getIDS(opts.filter)
if err != nil {
return nil, err
}
return c.extractServicesDetails(ids...)
}

View File

@ -6,6 +6,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/rs/zerolog/log"
) )
const nbImageParts = 2 const nbImageParts = 2
@ -170,7 +172,7 @@ func (s *Service) UnmarshalJSON(data []byte) error {
nbReplicas := ci.Details[0].Spec.Mode.Replicated.Replicas nbReplicas := ci.Details[0].Spec.Mode.Replicated.Replicas
if len(ci.States) < nbReplicas { if len(ci.States) < nbReplicas {
return fmt.Errorf("must have %d replicas but have %d", nbReplicas, len(ci.States)) log.Warn().Msg(fmt.Sprintf("must have %d replicas but have %d", nbReplicas, len(ci.States)))
} }
networks := []string{} networks := []string{}

View File

@ -208,8 +208,13 @@ func initDeployers(
return deps, fmt.Errorf("%w, swarm net info does not exist", ErrNetInfoNotFound) return deps, fmt.Errorf("%w, swarm net info does not exist", ErrNetInfoNotFound)
} }
dcli := docker.NewLocalClient() dloc := docker.NewLocalClient()
sd, err := deployers.NewSwarmDeployer(ctx, project, swarmNet, &dcli) drem, err := docker.NewRemoteClient(swarmNet)
if err != nil {
return deps, err
}
sd, err := deployers.NewSwarmDeployer(ctx, project, swarmNet, &dloc, &drem)
if err != nil { if err != nil {
return deps, fmt.Errorf("%w, unable to init swarm deployer, err=%v", ErrDeployerInit, err) return deps, fmt.Errorf("%w, unable to init swarm deployer, err=%v", ErrDeployerInit, err)
} }