Merge pull request 'feat/engine' (#1) from feat/engine into main

Reviewed-on: https://gitea.thegux.fr/rmanach/cycle-scheduler/pulls/1
2024-09-24 14:47:21 +00:00 · 2024-09-24 14:47:21 +00:00 · 3f1afb63d4
commit 3f1afb63d4
parent 052958eb7e df9a0ffbbc
6 changed files with 274 additions and 355 deletions
--- a/README.md
+++ b/README.md
@ -1,48 +1,18 @@
 # cycle-scheduler

-cycle-scheduler is a simple scheduler handling jobs and executes them at regular interval.
-
-Here a simple representation:
-```ascii
-+------------------------------------------------------+
-| +---+ +---+ +---+ +---+ +---+                  +---+ |
-| |   | |   | |   | |   | |   |                  |   | |
-| |   | |   | |   | |   | |   |                  |   | |
-| |   | |   | |   | |   | |   |                  |   | |
-| |   | |   | |   | |   | |   |                  |   | |
-| |   | |   | |   | |   | |   |                  |   | |
-| |   | |   | |   | |   | |   |                  |   | |
-| |s1 | |s2 | |s3 | |s4 | |   |                  |s60| |
-| +---+ +---+ +---+ +---+ +---+                  +---+ |
-+---------------^--------------------------------------+
-```
-Jobs are handle in a array of job slices.
-
-At each interval (clock), the cursor `^` moves to the next slot (s*).
-If there are jobs, they are sent to workers to be executed
-and the slot is cleaned.
-At the end of the slot (s60), the cursor re-starts a new cycle from s1.
-
-If a job is not in a desire state, the job is re-scheduled in the current slot to be re-executed in the next cycle.
-
-**NOTE**: This scheduler does not accept long running tasks. Job execution have a fixed timeout of 10s.
-Pooling tasks are more suitable for this kind of scheduler.
+cycle-scheduler is a simple scheduler handling tasks and executes them at regular interval. If a task is not in desired state, the task is re-scheduled with a backoff.

 ## Run
 You can run sample tests from `main.go` to see the scheduler in action:
 ```bash
 make run
 ```
-If all goes well, you should see this kind of output in the stdout:
-```ascii
-# cycle-scheduler (slot: 7)
-_ P _ _ _ _ _ _ _ _ _ _ _ _
- - - - - - ^ - - - - - - - 
-```
-> **P** means *pending* state

-You can adjust the clock interval as needed in `main.go`:
+You can adjust the clock interval and the number of workers as needed in `main.go` constants section:
 ```go
-interval := 200 * time.Millisecond
+const (
+	MaxWorkers = 5
+	Interval   = 2000 * time.Millisecond
+)
 ```

--- a/internal/job/job.go
+++ b/internal/job/job.go
@ -59,7 +59,6 @@ type JobDetails struct {
 	Err       string     `json:"error"`
 }

-// TODO(rmanach): add priority level
 type Job struct {
 	l         sync.RWMutex
 	id        uuid.UUID
@ -71,7 +70,7 @@ type Job struct {
 	chAbort   chan struct{}
 }

-func NewJob(task FnJob, row, col int) Job {
+func NewJob(task FnJob) Job {
 	return Job{
 		id:        uuid.New(),
 		createdAt: time.Now().UTC(),
@ -130,7 +129,10 @@ func (j *Job) setFail(err error) {
 	now := time.Now().UTC()
 	j.updatedAt = &now

+	if j.state != Abort {
 		j.state = Failed
+	}
+
 	j.err = err
 }

--- a/internal/scheduler/scheduler.go
+++ b/internal/scheduler/scheduler.go
@ -3,8 +3,7 @@ package scheduler
 import (
 	"context"
 	"cycle-scheduler/internal/job"
-	"fmt"
-	"strings"
+	"math"
 	"sync"
 	"time"

@ -12,45 +11,23 @@ import (
 	"github.com/rs/zerolog/log"
 )

-const (
-	TableTitle  = "# cycle-scheduler"
-	Cursor      = "^"
-	CycleLength = 60
-	MaxWorkers  = 5
-)
+const ExponentialFactor = 1.8

-const MaxSlotsIdx = 59
-
-type JobSlot struct {
-	*job.Job
-	row int
-}
-
-// SchedulerCycle is a dumb scheduler.
-// It handle job and executes it at each cycle (60 * interval).
-//
-// Jobs are handle in a array of job slices.
-// At each interval (clock), the cursor moves to the next slot (s*).
-// If there are jobs, they are sent to workers to be executed
-// and the slot is cleaned.
-//
-// At the end of the slot (s60), the cursor re-starts a cycle at s1.
+// SchedulerCycle is a simple scheduler handling jobs and executes them at regular interval.
+// If a task is not in desired state, the task is re-scheduled with a backoff.
 type SchedulerCycle struct {
-	l  sync.RWMutex
 	wg sync.WaitGroup

 	ctx      context.Context
 	fnCancel context.CancelFunc

 	interval time.Duration
-	currentSlot int
-	slots       [60][]*job.Job
-	jobs        map[uuid.UUID]*job.Job
+	tasks    tasks

-	chJobs chan *JobSlot
+	chTasks chan *task
 }

-func NewSchedulerCycle(ctx context.Context, interval time.Duration) *SchedulerCycle {
+func NewSchedulerCycle(ctx context.Context, interval time.Duration, workers uint32) *SchedulerCycle {
 	ctxChild, fnCancel := context.WithCancel(ctx)

 	c := SchedulerCycle{
@ -58,17 +35,71 @@ func NewSchedulerCycle(ctx context.Context, interval time.Duration) *SchedulerCy
 		ctx:      ctxChild,
 		fnCancel: fnCancel,
 		interval: interval,
-		currentSlot: 0,
-		slots:       [60][]*job.Job{},
-		jobs:        make(map[uuid.UUID]*job.Job),
-		chJobs:      make(chan *JobSlot),
+		tasks:    newTasks(),
+		chTasks:  make(chan *task),
 	}

-	c.run()
+	c.run(workers)

 	return &c
 }

+func (c *SchedulerCycle) backoff(t *task) {
+	backoff := c.interval + time.Duration(math.Pow(ExponentialFactor, float64(t.attempts.Load())))
+
+	t.timer.set(
+		time.AfterFunc(backoff, func() {
+			select {
+			case c.chTasks <- t:
+			default:
+				log.Error().Str("task id", t.GetID().String()).Msg("unable to execute task to the worker, delayed it")
+				c.backoff(t)
+			}
+		}),
+	)
+}
+
+// exec runs the task now or if all the workers are in use, delayed it.
+func (c *SchedulerCycle) exec(t *task) {
+	select {
+	case c.chTasks <- t:
+	default:
+		log.Error().Str("task id", t.GetID().String()).Msg("unable to execute the task to a worker now, delayed it")
+		c.backoff(t)
+	}
+}
+
+func (c *SchedulerCycle) getTask(id uuid.UUID) *task {
+	return c.tasks.get(id)
+}
+
+// run launches a number of worker to execute tasks.
+// If a task returns `ErrJobNotCompletedYet`, it re-schedules with a backoff.
+func (c *SchedulerCycle) run(n uint32) {
+	for i := 0; i < int(n); i++ {
+		c.wg.Add(1)
+		go func() {
+			defer c.wg.Done()
+			for {
+				select {
+				case t := <-c.chTasks:
+					c.execute(t, c.backoff)
+				case <-c.ctx.Done():
+					log.Error().Msg("context done, worker is stopping...")
+					return
+				}
+			}
+		}()
+	}
+}
+
+func (c *SchedulerCycle) execute(t *task, fnFallBack func(*task)) {
+	t.run(c.ctx)
+	if t.GetState() == job.Pending {
+		fnFallBack(t)
+	}
+}
+
 func (c *SchedulerCycle) Stop() {
 	c.fnCancel()
 }
@ -77,295 +108,56 @@ func (c *SchedulerCycle) Done() <-chan struct{} {
 	done := make(chan struct{})
 	go func() {
 		<-c.ctx.Done()
-		c.wg.Done()
+		c.wg.Wait()
 		done <- struct{}{}
 	}()
 	return done
 }

 func (c *SchedulerCycle) Len() int {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	return len(c.jobs)
+	return c.tasks.len()
 }

-func (c *SchedulerCycle) HasAllJobsDone() bool {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	for _, j := range c.jobs {
-		if j.GetState() == job.Pending || j.GetState() == job.Running {
-			return false
-		}
+// TasksDone checks whether all the tasks has been completed.
+func (c *SchedulerCycle) TasksDone() bool {
+	return c.tasks.completed()
 }

-	return true
+func (c *SchedulerCycle) GetTasksDetails() []TaskDetails {
+	return c.tasks.getAllDetails()
 }

-func (c *SchedulerCycle) GetJobsDetails() []job.JobDetails {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	details := []job.JobDetails{}
-	for _, j := range c.jobs {
-		details = append(details, j.IntoDetails())
+// GetTaskDetails returns the task details by id.
+func (c *SchedulerCycle) GetTaskDetails(id uuid.UUID) TaskDetails {
+	return c.tasks.getDetails(id)
 }

-	return details
-}
-
-// Delay builds a job and add it to the scheduler engine.
+// Delay builds a task and add it to the scheduler engine.
 func (c *SchedulerCycle) Delay(fnJob job.FnJob) uuid.UUID {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	nextSlot := c.currentSlot + 1
-	if nextSlot > MaxSlotsIdx {
-		nextSlot = 0
+	select {
+	case <-c.Done():
+		log.Error().Msg("context done unable to add new job")
+	default:
 	}

-	j := job.NewJob(fnJob, nextSlot, len(c.slots[nextSlot]))
+	t := newTask(fnJob)

-	c.slots[nextSlot] = append(c.slots[nextSlot], &j)
-	c.jobs[j.GetID()] = &j
+	c.tasks.add(t)

-	log.Info().Str("job", j.GetID().String()).Msg("job added successfully")
-	return j.GetID()
+	c.exec(t)
+
+	log.Info().Str("task", t.GetID().String()).Msg("task added successfully")
+	return t.GetID()
 }

-// Abort aborts the job given by its id if it exists..
+// Abort aborts the task given by its id if it exists.
 func (c *SchedulerCycle) Abort(id uuid.UUID) bool {
-	if j := c.getJob(id); j != nil {
-		j.Abort()
+	if t := c.getTask(id); t != nil {
+		t.abort()

-		log.Info().Str("job", j.GetID().String()).Msg("abort job done")
+		log.Info().Str("task id", t.GetID().String()).Msg("abort task done")
 		return true
 	}

 	return false
 }
-
-// GetJobDetails returns the job details by .
-func (c *SchedulerCycle) GetJobDetails(id uuid.UUID) job.JobDetails {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	j, ok := c.jobs[id]
-	if !ok {
-		return job.JobDetails{
-			State: job.Unknown.String(),
-		}
-	}
-
-	return j.IntoDetails()
-}
-
-// Display outputs earch interval the scheduler state.
-func (c *SchedulerCycle) Display() {
-	ticker := time.NewTicker(c.interval)
-	go func() {
-		for range ticker.C {
-			c.display()
-		}
-	}()
-}
-
-// display writes to stdout the state of the scheduler as a table.
-func (c *SchedulerCycle) display() { //nolint:gocyclo // not complex
-	c.l.RLock()
-	defer c.l.RUnlock()
-
-	var maxCols int
-	for i := range c.slots {
-		if l := len(c.slots[i]); l > maxCols {
-			maxCols = l
-		}
-	}
-
-	table := [][]string{}
-	title := fmt.Sprintf("%s (slot: %d)", TableTitle, c.currentSlot+1)
-	table = append(table, []string{title})
-	for {
-		if maxCols == 0 {
-			break
-		}
-
-		row := make([]string, CycleLength)
-		for i := 0; i <= MaxSlotsIdx; i++ {
-			row[i] = "_"
-		}
-
-		for i := range c.slots {
-			if len(c.slots[i]) < maxCols {
-				continue
-			}
-
-			j := c.slots[i][maxCols-1]
-			switch j.GetState() {
-			case job.Pending:
-				row[i] = "P"
-			case job.Running:
-				row[i] = "R"
-			case job.Failed:
-				row[i] = "X"
-			case job.Abort:
-				row[i] = "A"
-			case job.Unknown:
-				row[i] = "?"
-			case job.Success:
-				row[i] = "O"
-			}
-		}
-
-		table = append(table, row)
-		maxCols--
-	}
-
-	row := make([]string, CycleLength)
-	for i := 0; i <= MaxSlotsIdx; i++ {
-		row[i] = "-"
-	}
-	table = append(table, row)
-
-	if l := len(table); l > 0 {
-		table[l-1][c.currentSlot] = Cursor
-	}
-
-	tableFormat := ""
-	for _, r := range table {
-		tableFormat += strings.Join(r, " ")
-		tableFormat += "\n"
-	}
-
-	fmt.Println(tableFormat)
-}
-
-func (c *SchedulerCycle) getJob(id uuid.UUID) *job.Job {
-	c.l.RLock()
-	defer c.l.RUnlock()
-
-	j, ok := c.jobs[id]
-	if !ok {
-		return nil
-	}
-
-	return j
-}
-
-// getCurrentSlotJobs collects all the current slot jobs
-// and clean the slot.
-func (c *SchedulerCycle) getCurrentSlotJobs() (int, []*job.Job) {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	jobs := c.slots[c.currentSlot]
-
-	c.slots[c.currentSlot] = []*job.Job{}
-
-	return c.currentSlot, jobs
-}
-
-// updateSlot add a job to the slot where it was before.
-func (c *SchedulerCycle) updateSlot(row int, j *job.Job) {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	c.slots[row] = append(c.slots[row], j)
-}
-
-// updateCurrentSlot add a job to the current slot.
-func (c *SchedulerCycle) updateCurrentSlot(j *job.Job) {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	c.slots[c.currentSlot] = append(c.slots[c.currentSlot], j)
-}
-
-// incr increments the slot cursor.
-// It the cursor reaches `MaxSlotsIdx`, it goes back to 0.
-func (c *SchedulerCycle) incr() {
-	c.l.Lock()
-	defer c.l.Unlock()
-
-	nextSlot := c.currentSlot + 1
-	if nextSlot > MaxSlotsIdx {
-		nextSlot = 0
-	}
-
-	c.currentSlot = nextSlot
-}
-
-// dispatch gets jobs from the current slot, resets the slot
-// and dispatch all jobs to the workers.
-//
-// It all the workers are busy, the jobs are re-schedule in the same slot
-// to be executed in the next cycle.
-func (c *SchedulerCycle) dispatch() {
-	row, jobs := c.getCurrentSlotJobs()
-	for _, j := range jobs {
-		if j.GetState() == job.Abort {
-			continue
-		}
-
-		select {
-		case c.chJobs <- &JobSlot{row: row, Job: j}:
-		default:
-			log.Warn().Msg("unable to put job in workers, trying next cycle")
-			c.updateSlot(row, j)
-		}
-	}
-}
-
-// run launches the workers and the ticker.
-func (c *SchedulerCycle) run() {
-	c.workers()
-	c.tick()
-}
-
-// workers launches `MaxWorkers` number of worker to execute job.
-// If job returns `ErrJobNotCompletedYet`, it re-schedules in the same slot.
-func (c *SchedulerCycle) workers() {
-	for i := 0; i < MaxWorkers; i++ {
-		c.wg.Add(1)
-		go func() {
-			defer c.wg.Done()
-			for {
-				select {
-				case j := <-c.chJobs:
-					c.executeJob(j.Job, c.updateCurrentSlot)
-				case <-c.ctx.Done():
-					log.Error().Msg("context done, worker is stopping...")
-					return
-				}
-			}
-		}()
-	}
-}
-
-func (c *SchedulerCycle) executeJob(j *job.Job, fnFallBack func(*job.Job)) {
-	j.Run(c.ctx)
-	if j.GetState() == job.Pending {
-		fnFallBack(j)
-	}
-}
-
-// tick is a simple ticker incrementing at each scheduler interval,
-// the slot cursor and dispatch jobs to the workers.
-func (c *SchedulerCycle) tick() {
-	c.wg.Add(1)
-	go func() {
-		defer c.wg.Done()
-		for {
-			select {
-			case <-c.ctx.Done():
-				log.Error().Msg("context done, ticker is stopping...")
-				return
-			default:
-				time.Sleep(c.interval)
-				c.incr()
-				c.dispatch()
-			}
-		}
-	}()
-}
--- a/internal/scheduler/scheduler_test.go
+++ b/internal/scheduler/scheduler_test.go
@ -14,7 +14,7 @@ func TestSlot(t *testing.T) {
 	ctx, fnCancel := context.WithCancel(context.Background())
 	defer fnCancel()

-	s := NewSchedulerCycle(ctx, 1*time.Millisecond)
+	s := NewSchedulerCycle(ctx, 1*time.Millisecond, 5)

 	s.Delay(func(ctx context.Context) error {
 		return nil
@ -29,5 +29,5 @@ func TestSlot(t *testing.T) {
 	time.Sleep(2 * time.Millisecond)

 	assert.Equal(t, 3, s.Len())
-	assert.Equal(t, job.Failed.String(), s.GetJobDetails(j3).State)
+	assert.Equal(t, job.Failed.String(), s.GetTaskDetails(j3).State)
 }
--- a/internal/scheduler/task.go
+++ b/internal/scheduler/task.go
@ -0,0 +1,152 @@
+package scheduler
+
+import (
+	"context"
+	"cycle-scheduler/internal/job"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// atomicTimer wraps a `time.Timer`.
+type atomicTimer struct {
+	atomic.Pointer[time.Timer]
+}
+
+func (at *atomicTimer) stop() {
+	timer := at.Load()
+	if timer != nil {
+		timer.Stop()
+	}
+}
+
+// set replaces the current timer.
+// It also ensures that the current timer is stopped.
+func (at *atomicTimer) set(t *time.Timer) {
+	timer := at.Load()
+	if timer != nil {
+		timer.Stop()
+		at.Swap(t)
+		return
+	}
+
+	at.Swap(t)
+}
+
+type TaskDetails struct {
+	job.JobDetails
+	Attempts int `json:"attempts"`
+}
+
+type task struct {
+	*job.Job
+	attempts atomic.Uint32
+	timer    atomicTimer
+}
+
+func newTask(f job.FnJob) *task {
+	j := job.NewJob(f)
+	t := task{
+		Job:   &j,
+		timer: atomicTimer{},
+	}
+
+	return &t
+}
+
+func (t *task) abort() {
+	t.timer.stop()
+	t.Job.Abort()
+}
+
+func (t *task) run(ctx context.Context) {
+	t.attempts.Add(1)
+	t.Job.Run(ctx)
+}
+
+func (t *task) getDetails() TaskDetails {
+	return TaskDetails{
+		JobDetails: t.IntoDetails(),
+		Attempts:   int(t.attempts.Load()),
+	}
+}
+
+type tasks struct {
+	l sync.RWMutex
+	s map[uuid.UUID]*task
+}
+
+func newTasks() tasks {
+	return tasks{
+		s: make(map[uuid.UUID]*task),
+	}
+}
+
+func (ts *tasks) add(t *task) {
+	ts.l.Lock()
+	defer ts.l.Unlock()
+
+	ts.s[t.GetID()] = t
+}
+
+func (ts *tasks) get(id uuid.UUID) *task {
+	ts.l.RLock()
+	defer ts.l.RUnlock()
+
+	j, ok := ts.s[id]
+	if !ok {
+		return nil
+	}
+
+	return j
+}
+
+func (ts *tasks) len() int {
+	ts.l.RLock()
+	defer ts.l.RUnlock()
+
+	return len(ts.s)
+}
+
+func (ts *tasks) completed() bool {
+	ts.l.RLock()
+	defer ts.l.RUnlock()
+
+	for _, t := range ts.s {
+		if t.GetState() == job.Pending || t.GetState() == job.Running {
+			return false
+		}
+	}
+
+	return true
+}
+
+func (ts *tasks) getAllDetails() []TaskDetails {
+	ts.l.RLock()
+	defer ts.l.RUnlock()
+
+	details := []TaskDetails{}
+	for _, t := range ts.s {
+		details = append(details, t.getDetails())
+	}
+
+	return details
+}
+
+func (ts *tasks) getDetails(id uuid.UUID) TaskDetails {
+	ts.l.RLock()
+	defer ts.l.RUnlock()
+
+	t, ok := ts.s[id]
+	if !ok {
+		return TaskDetails{
+			JobDetails: job.JobDetails{
+				State: job.UnknownState,
+			},
+		}
+	}
+
+	return t.getDetails()
+}
--- a/main.go
+++ b/main.go
@ -16,6 +16,11 @@ import (
 	"github.com/rs/zerolog/log"
 )

+const (
+	MaxWorkers = 5
+	Interval   = 2000 * time.Millisecond
+)
+
 func initLogger() {
 	zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
 	log.Logger = log.With().Caller().Logger().Output(zerolog.ConsoleWriter{Out: os.Stderr})
@ -31,9 +36,7 @@ func main() {
 	)
 	defer stop()

-	interval := 200 * time.Millisecond
-	s := scheduler.NewSchedulerCycle(ctx, interval)
-	s.Display()
+	s := scheduler.NewSchedulerCycle(ctx, Interval, MaxWorkers)

 	// pending test
 	for i := 0; i < 20; i++ {
@ -101,7 +104,7 @@ func main() {
 	go func() {
 		for {
 			time.Sleep(2 * time.Second) //nolint:mnd // test purpose
-			if s.HasAllJobsDone() {
+			if s.TasksDone() {
 				s.Stop()
 				return
 			}
@ -110,11 +113,11 @@ func main() {

 	<-s.Done()

-	jds := s.GetJobsDetails()
-	for _, jd := range jds {
-		c, err := json.Marshal(&jd)
+	ts := s.GetTasksDetails()
+	for _, t := range ts {
+		c, err := json.Marshal(&t)
 		if err != nil {
-			log.Err(err).Str("job", jd.ID.String()).Msg("unable to parse job details into JSON")
+			log.Err(err).Str("task", t.ID.String()).Msg("unable to parse task details into JSON")
 			continue
 		}
 		fmt.Println(string(c))