diff --git a/monitor.go b/monitor.go index 368dd57..b1acce8 100644 --- a/monitor.go +++ b/monitor.go @@ -3,8 +3,12 @@ package cachet import ( "crypto/tls" "encoding/json" + "log" "net/http" + "os" + "os/signal" "strconv" + "sync" "time" ) @@ -12,18 +16,22 @@ const timeout = time.Duration(time.Second) // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` - StrictTLS *bool `json:"strict_tls"` + Name string `json:"name"` + URL string `json:"url"` + MetricID int `json:"metric_id"` + Threshold float32 `json:"threshold"` + ComponentID *int `json:"component_id"` + ExpectedStatusCode int `json:"expected_status_code"` + StrictTLS *bool `json:"strict_tls"` + Interval time.Duration `json:"interval"` History []bool `json:"-"` LastFailReason *string `json:"-"` Incident *Incident `json:"-"` config *CachetMonitor + + // Closed when mon.Stop() is called + stopC chan bool } func (cfg *CachetMonitor) Run() { @@ -34,17 +42,47 @@ func (cfg *CachetMonitor) Run() { if mon.MetricID > 0 { cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) } + if mon.ComponentID != nil && *mon.ComponentID > 0 { + cfg.Logger.Printf(" - Updates component id: %d\n", *mon.ComponentID) + } } cfg.Logger.Println() + wg := &sync.WaitGroup{} - ticker := time.NewTicker(time.Duration(cfg.Interval) * time.Second) - for range ticker.C { - for _, mon := range cfg.Monitors { - mon.config = cfg - go mon.Run() - } + for _, mon := range cfg.Monitors { + wg.Add(1) + mon.config = cfg + mon.stopC = make(chan bool) + + go func(mon *Monitor) { + if mon.Interval < 1 { + mon.Interval = time.Duration(cfg.Interval) + } + + ticker := time.NewTicker(mon.Interval * time.Second) + for { + select { + case <-ticker.C: + mon.Run() + case <-mon.StopC(): + wg.Done() + return + } + } + }(mon) } + + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, os.Kill) + <-signals + + log.Println("Waiting monitors to end current operation") + for _, mon := range cfg.Monitors { + mon.Stop() + } + + wg.Wait() } // Run loop @@ -64,6 +102,27 @@ func (monitor *Monitor) Run() { } } +func (monitor *Monitor) Stop() { + if monitor.Stopped() { + return + } + + close(monitor.stopC) +} + +func (monitor *Monitor) StopC() <-chan bool { + return monitor.stopC +} + +func (monitor *Monitor) Stopped() bool { + select { + case <-monitor.stopC: + return true + default: + return false + } +} + func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, diff --git a/readme.md b/readme.md index e306379..0e5d6e8 100644 --- a/readme.md +++ b/readme.md @@ -9,7 +9,7 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag every second * config.Interval +- [x] Posts monitor lag (interval configurable) - [x] Updates Component to Partial Outage - [x] Updates Component to Major Outage if in Partial Outage - [x] Can be run on multiple servers and geo regions @@ -30,7 +30,8 @@ Configuration "component_id": , "threshold": 80, "expected_status_code": 200, - "strict_tls": true + "strict_tls": true, + "interval": 5 } ], "insecure_api": false @@ -45,6 +46,7 @@ Configuration - `component_id` is optional - `threshold` is a percentage - `expected_status_code` is a http response code +- `interval` is the duration in seconds between two checks. - GET request will be performed on the `url` Installation