rework all the engine

This commit is contained in:
rmanach 2024-09-24 16:10:43 +02:00
parent 3cbea438f6
commit 152c4f925a
5 changed files with 188 additions and 351 deletions

View File

@ -1,48 +1,18 @@
# cycle-scheduler # cycle-scheduler
cycle-scheduler is a simple scheduler handling jobs and executes them at regular interval. cycle-scheduler is a simple scheduler handling jobs and executes them at regular interval. If a task is not in desired state, the task is re-scheduled with a backoff.
Here a simple representation:
```ascii
+------------------------------------------------------+
| +---+ +---+ +---+ +---+ +---+ +---+ |
| | | | | | | | | | | | | |
| | | | | | | | | | | | | |
| | | | | | | | | | | | | |
| | | | | | | | | | | | | |
| | | | | | | | | | | | | |
| | | | | | | | | | | | | |
| |s1 | |s2 | |s3 | |s4 | | | |s60| |
| +---+ +---+ +---+ +---+ +---+ +---+ |
+---------------^--------------------------------------+
```
Jobs are handle in a array of job slices.
At each interval (clock), the cursor `^` moves to the next slot (s*).
If there are jobs, they are sent to workers to be executed
and the slot is cleaned.
At the end of the slot (s60), the cursor re-starts a new cycle from s1.
If a job is not in a desire state, the job is re-scheduled in the current slot to be re-executed in the next cycle.
**NOTE**: This scheduler does not accept long running tasks. Job execution have a fixed timeout of 10s.
Pooling tasks are more suitable for this kind of scheduler.
## Run ## Run
You can run sample tests from `main.go` to see the scheduler in action: You can run sample tests from `main.go` to see the scheduler in action:
```bash ```bash
make run make run
``` ```
If all goes well, you should see this kind of output in the stdout:
```ascii
# cycle-scheduler (slot: 7)
_ P _ _ _ _ _ _ _ _ _ _ _ _
- - - - - - ^ - - - - - - -
```
> **P** means *pending* state
You can adjust the clock interval as needed in `main.go`: You can adjust the clock interval and the number of workers as needed in `main.go` constants section:
```go ```go
interval := 200 * time.Millisecond const (
MaxWorkers = 5
Interval = 2000 * time.Millisecond
)
``` ```

View File

@ -3,77 +3,18 @@ package scheduler
import ( import (
"context" "context"
"cycle-scheduler/internal/job" "cycle-scheduler/internal/job"
"fmt" "math"
"strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
const ( const ExponentialFactor = 1.8
TableTitle = "# cycle-scheduler"
Cursor = "^"
CycleLength = 60
MaxWorkers = 5
)
const MaxSlotsIdx = 59 // SchedulerCycle is a simple scheduler handling jobs and executes them at regular interval.
// If a task is not in desired state, the task is re-scheduled with a backoff.
type Priority int
const (
Low Priority = iota
Medium
High
)
type JobSlotDetails struct {
job.JobDetails
Attempts int `json:"attempts"`
}
type jobSlot struct {
*job.Job
slot atomic.Uint32
attempts atomic.Uint32
// priority Priority
}
func newJobSlot(task job.FnJob, slot int) *jobSlot {
j := job.NewJob(task)
js := jobSlot{
Job: &j,
}
js.slot.Add(uint32(slot))
return &js
}
func (j *jobSlot) run(ctx context.Context) {
j.attempts.Add(1)
j.Job.Run(ctx)
}
func (j *jobSlot) getDetails() JobSlotDetails {
return JobSlotDetails{
JobDetails: j.IntoDetails(),
Attempts: int(j.attempts.Load()),
}
}
// SchedulerCycle is a dumb scheduler.
// It handle job and executes it at each cycle (60 * interval).
//
// Jobs are handle in a array of job slices.
// At each interval (clock), the cursor moves to the next slot (s*).
// If there are jobs, they are sent to workers to be executed
// and the slot is cleaned.
//
// At the end of the slot (s60), the cursor re-starts a cycle at s1.
type SchedulerCycle struct { type SchedulerCycle struct {
l sync.RWMutex l sync.RWMutex
wg sync.WaitGroup wg sync.WaitGroup
@ -81,33 +22,93 @@ type SchedulerCycle struct {
ctx context.Context ctx context.Context
fnCancel context.CancelFunc fnCancel context.CancelFunc
interval time.Duration interval time.Duration
currentSlot int tasks map[uuid.UUID]*task
slots [60][]*jobSlot
jobs map[uuid.UUID]*jobSlot
chJobs chan *jobSlot chTasks chan *task
} }
func NewSchedulerCycle(ctx context.Context, interval time.Duration) *SchedulerCycle { func NewSchedulerCycle(ctx context.Context, interval time.Duration, workers uint32) *SchedulerCycle {
ctxChild, fnCancel := context.WithCancel(ctx) ctxChild, fnCancel := context.WithCancel(ctx)
c := SchedulerCycle{ c := SchedulerCycle{
wg: sync.WaitGroup{}, wg: sync.WaitGroup{},
ctx: ctxChild, ctx: ctxChild,
fnCancel: fnCancel, fnCancel: fnCancel,
interval: interval, interval: interval,
currentSlot: 0, tasks: make(map[uuid.UUID]*task),
slots: [60][]*jobSlot{}, chTasks: make(chan *task),
jobs: make(map[uuid.UUID]*jobSlot),
chJobs: make(chan *jobSlot),
} }
c.run() c.run(workers)
return &c return &c
} }
func (c *SchedulerCycle) backoff(j *task) {
backoff := c.interval + time.Duration(math.Pow(ExponentialFactor, float64(j.attempts.Load())))
j.timer.set(
time.AfterFunc(backoff, func() {
select {
case c.chTasks <- j:
default:
log.Error().Str("job id", j.GetID().String()).Msg("unable to execute job to a worker")
c.backoff(j)
}
}),
)
}
// exec runs the job now or if all the workers are in use, delayed it.
func (c *SchedulerCycle) exec(j *task) {
select {
case c.chTasks <- j:
default:
log.Error().Str("job id", j.GetID().String()).Msg("unable to execute job to a worker now, delayed it")
c.backoff(j)
}
}
func (c *SchedulerCycle) getTask(id uuid.UUID) *task {
c.l.RLock()
defer c.l.RUnlock()
j, ok := c.tasks[id]
if !ok {
return nil
}
return j
}
// run launches a number of worker to execute job.
// If job returns `ErrJobNotCompletedYet`, it re-schedules with a backoff.
func (c *SchedulerCycle) run(n uint32) {
for i := 0; i < int(n); i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
for {
select {
case j := <-c.chTasks:
c.executeJob(j, c.backoff)
case <-c.ctx.Done():
log.Error().Msg("context done, worker is stopping...")
return
}
}
}()
}
}
func (c *SchedulerCycle) executeJob(j *task, fnFallBack func(*task)) {
j.run(c.ctx)
if j.GetState() == job.Pending {
fnFallBack(j)
}
}
func (c *SchedulerCycle) Stop() { func (c *SchedulerCycle) Stop() {
c.fnCancel() c.fnCancel()
} }
@ -126,15 +127,16 @@ func (c *SchedulerCycle) Len() int {
c.l.RLock() c.l.RLock()
defer c.l.RUnlock() defer c.l.RUnlock()
return len(c.jobs) return len(c.tasks)
} }
// HasAllJobsDone checks whether all the jobs has been executed.
func (c *SchedulerCycle) HasAllJobsDone() bool { func (c *SchedulerCycle) HasAllJobsDone() bool {
c.l.RLock() c.l.RLock()
defer c.l.RUnlock() defer c.l.RUnlock()
for _, j := range c.jobs { for _, t := range c.tasks {
if j.GetState() == job.Pending || j.GetState() == job.Running { if t.GetState() == job.Pending || t.GetState() == job.Running {
return false return false
} }
} }
@ -142,19 +144,19 @@ func (c *SchedulerCycle) HasAllJobsDone() bool {
return true return true
} }
func (c *SchedulerCycle) GetJobsDetails() []JobSlotDetails { func (c *SchedulerCycle) GetJobsDetails() []TaskDetails {
c.l.RLock() c.l.RLock()
defer c.l.RUnlock() defer c.l.RUnlock()
details := []JobSlotDetails{} details := []TaskDetails{}
for _, j := range c.jobs { for _, t := range c.tasks {
details = append(details, j.getDetails()) details = append(details, t.getDetails())
} }
return details return details
} }
// Delay builds a job and add it to the scheduler engine. // Delay builds a task and add it to the scheduler engine.
func (c *SchedulerCycle) Delay(fnJob job.FnJob) uuid.UUID { func (c *SchedulerCycle) Delay(fnJob job.FnJob) uuid.UUID {
select { select {
case <-c.Done(): case <-c.Done():
@ -162,27 +164,23 @@ func (c *SchedulerCycle) Delay(fnJob job.FnJob) uuid.UUID {
default: default:
} }
j := newTask(fnJob)
c.l.Lock() c.l.Lock()
defer c.l.Unlock() defer c.l.Unlock()
nextSlot := c.currentSlot + 1 c.tasks[j.GetID()] = j
if nextSlot > MaxSlotsIdx {
nextSlot = 0
}
j := newJobSlot(fnJob, nextSlot) c.exec(j)
c.slots[nextSlot] = append(c.slots[nextSlot], j)
c.jobs[j.GetID()] = j
log.Info().Str("job", j.GetID().String()).Msg("job added successfully") log.Info().Str("job", j.GetID().String()).Msg("job added successfully")
return j.GetID() return j.GetID()
} }
// Abort aborts the job given by its id if it exists.. // Abort aborts the job given by its id if it exists.
func (c *SchedulerCycle) Abort(id uuid.UUID) bool { func (c *SchedulerCycle) Abort(id uuid.UUID) bool {
if j := c.getJob(id); j != nil { if j := c.getTask(id); j != nil {
j.Abort() j.abort()
log.Info().Str("job", j.GetID().String()).Msg("abort job done") log.Info().Str("job", j.GetID().String()).Msg("abort job done")
return true return true
@ -191,14 +189,14 @@ func (c *SchedulerCycle) Abort(id uuid.UUID) bool {
return false return false
} }
// GetJobDetails returns the job details by . // GetJobDetails returns the task details by id.
func (c *SchedulerCycle) GetJobDetails(id uuid.UUID) JobSlotDetails { func (c *SchedulerCycle) GetJobDetails(id uuid.UUID) TaskDetails {
c.l.RLock() c.l.RLock()
defer c.l.RUnlock() defer c.l.RUnlock()
j, ok := c.jobs[id] j, ok := c.tasks[id]
if !ok { if !ok {
return JobSlotDetails{ return TaskDetails{
JobDetails: job.JobDetails{ JobDetails: job.JobDetails{
State: job.UnknownState, State: job.UnknownState,
}, },
@ -207,213 +205,3 @@ func (c *SchedulerCycle) GetJobDetails(id uuid.UUID) JobSlotDetails {
return j.getDetails() return j.getDetails()
} }
// Display outputs earch interval the scheduler state.
func (c *SchedulerCycle) Display() {
ticker := time.NewTicker(c.interval)
go func() {
for range ticker.C {
c.display()
}
}()
}
// display writes to stdout the state of the scheduler as a table.
func (c *SchedulerCycle) display() { //nolint:gocyclo // not complex
c.l.RLock()
defer c.l.RUnlock()
var maxCols int
for i := range c.slots {
if l := len(c.slots[i]); l > maxCols {
maxCols = l
}
}
table := [][]string{}
title := fmt.Sprintf("%s (slot: %d)", TableTitle, c.currentSlot+1)
table = append(table, []string{title})
for {
if maxCols == 0 {
break
}
row := make([]string, CycleLength)
for i := 0; i <= MaxSlotsIdx; i++ {
row[i] = "_"
}
for i := range c.slots {
if len(c.slots[i]) < maxCols {
continue
}
j := c.slots[i][maxCols-1]
switch j.GetState() {
case job.Pending:
row[i] = "P"
case job.Running:
row[i] = "R"
case job.Failed:
row[i] = "X"
case job.Abort:
row[i] = "A"
case job.Unknown:
row[i] = "?"
case job.Success:
row[i] = "O"
}
}
table = append(table, row)
maxCols--
}
row := make([]string, CycleLength)
for i := 0; i <= MaxSlotsIdx; i++ {
row[i] = "-"
}
table = append(table, row)
if l := len(table); l > 0 {
table[l-1][c.currentSlot] = Cursor
}
tableFormat := ""
for _, r := range table {
tableFormat += strings.Join(r, " ")
tableFormat += "\n"
}
fmt.Println(tableFormat)
}
func (c *SchedulerCycle) getJob(id uuid.UUID) *jobSlot {
c.l.RLock()
defer c.l.RUnlock()
j, ok := c.jobs[id]
if !ok {
return nil
}
return j
}
// getCurrentSlotJobs collects all the current slot jobs
// and clean the slot.
func (c *SchedulerCycle) getCurrentSlotJobs() (int, []*jobSlot) {
c.l.Lock()
defer c.l.Unlock()
jobs := c.slots[c.currentSlot]
c.slots[c.currentSlot] = []*jobSlot{}
return c.currentSlot, jobs
}
// updateSlot add a job to the slot where it was before.
func (c *SchedulerCycle) updateSlot(slot int, j *jobSlot) {
c.l.Lock()
defer c.l.Unlock()
c.slots[slot] = append(c.slots[slot], j)
}
// updateCurrentSlot add a job to the current slot.
func (c *SchedulerCycle) updateCurrentSlot(j *jobSlot) {
c.l.Lock()
defer c.l.Unlock()
j.slot.CompareAndSwap(j.slot.Load(), uint32(c.currentSlot))
c.slots[c.currentSlot] = append(c.slots[c.currentSlot], j)
}
// incr increments the slot cursor.
// It the cursor reaches `MaxSlotsIdx`, it goes back to 0.
func (c *SchedulerCycle) incr() {
c.l.Lock()
defer c.l.Unlock()
nextSlot := c.currentSlot + 1
if nextSlot > MaxSlotsIdx {
nextSlot = 0
}
c.currentSlot = nextSlot
}
// dispatch gets jobs from the current slot, resets the slot
// and dispatch all jobs to the workers.
//
// It all the workers are busy, the jobs are re-schedule in the same slot
// to be executed in the next cycle.
func (c *SchedulerCycle) dispatch() {
slot, jobs := c.getCurrentSlotJobs()
for _, j := range jobs {
if j.GetState() == job.Abort {
continue
}
select {
case c.chJobs <- j:
default:
log.Warn().Msg("unable to put job in workers, trying next cycle")
c.updateSlot(slot, j)
}
}
}
// run launches the workers and the ticker.
func (c *SchedulerCycle) run() {
c.workers()
c.tick()
}
// workers launches `MaxWorkers` number of worker to execute job.
// If job returns `ErrJobNotCompletedYet`, it re-schedules in the current slot.
func (c *SchedulerCycle) workers() {
for i := 0; i < MaxWorkers; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
for {
select {
case j := <-c.chJobs:
c.executeJob(j, c.updateCurrentSlot)
case <-c.ctx.Done():
log.Error().Msg("context done, worker is stopping...")
return
}
}
}()
}
}
func (c *SchedulerCycle) executeJob(j *jobSlot, fnFallBack func(*jobSlot)) {
j.run(c.ctx)
if j.GetState() == job.Pending {
fnFallBack(j)
}
}
// tick is a simple ticker incrementing at each scheduler interval,
// the slot cursor and dispatch jobs to the workers.
func (c *SchedulerCycle) tick() {
c.wg.Add(1)
go func() {
defer c.wg.Done()
for {
select {
case <-c.ctx.Done():
log.Error().Msg("context done, ticker is stopping...")
return
default:
time.Sleep(c.interval)
c.incr()
c.dispatch()
}
}
}()
}

View File

@ -14,7 +14,7 @@ func TestSlot(t *testing.T) {
ctx, fnCancel := context.WithCancel(context.Background()) ctx, fnCancel := context.WithCancel(context.Background())
defer fnCancel() defer fnCancel()
s := NewSchedulerCycle(ctx, 1*time.Millisecond) s := NewSchedulerCycle(ctx, 1*time.Millisecond, 5)
s.Delay(func(ctx context.Context) error { s.Delay(func(ctx context.Context) error {
return nil return nil

View File

@ -0,0 +1,76 @@
package scheduler
import (
"context"
"cycle-scheduler/internal/job"
"sync"
"sync/atomic"
"time"
)
// safeTime wraps a `time.Timer` with a lock to be thread safe.
type safeTimer struct {
l sync.Mutex
timer *time.Timer
}
func (st *safeTimer) stop() {
if st.timer != nil {
st.timer.Stop()
}
}
// set replaces the current timer.
// It also ensures that the current timer is stopped.
func (st *safeTimer) set(t *time.Timer) {
st.l.Lock()
defer st.l.Unlock()
if st.timer != nil {
st.timer.Stop()
st.timer = t
return
}
st.timer = t
}
type TaskDetails struct {
job.JobDetails
Attempts int `json:"attempts"`
}
type task struct {
*job.Job
attempts atomic.Uint32
timer *safeTimer
}
func newTask(f job.FnJob) *task {
j := job.NewJob(f)
t := task{
Job: &j,
timer: &safeTimer{},
}
return &t
}
func (t *task) abort() {
if t.timer != nil {
t.timer.stop()
}
t.Job.Abort()
}
func (t *task) run(ctx context.Context) {
t.attempts.Add(1)
t.Job.Run(ctx)
}
func (t *task) getDetails() TaskDetails {
return TaskDetails{
JobDetails: t.IntoDetails(),
Attempts: int(t.attempts.Load()),
}
}

View File

@ -16,6 +16,11 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
const (
MaxWorkers = 5
Interval = 2000 * time.Millisecond
)
func initLogger() { func initLogger() {
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
log.Logger = log.With().Caller().Logger().Output(zerolog.ConsoleWriter{Out: os.Stderr}) log.Logger = log.With().Caller().Logger().Output(zerolog.ConsoleWriter{Out: os.Stderr})
@ -31,9 +36,7 @@ func main() {
) )
defer stop() defer stop()
interval := 200 * time.Millisecond s := scheduler.NewSchedulerCycle(ctx, Interval, MaxWorkers)
s := scheduler.NewSchedulerCycle(ctx, interval)
s.Display()
// pending test // pending test
for i := 0; i < 20; i++ { for i := 0; i < 20; i++ {