From 76b897eb05dc0487ca0e78d8048744f5c2c93515 Mon Sep 17 00:00:00 2001 From: Soulou Date: Sun, 23 Aug 2015 17:33:23 +0200 Subject: [PATCH 1/2] Implement 'inteval' config parameter -> number of seconds between checks --- cachet/config.go | 10 +++++++++- cachet/monitor.go | 42 +++++++++++++++++++++++++++++++++--------- main.go | 37 +++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/cachet/config.go b/cachet/config.go index 13b73e9..b809fc2 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -4,13 +4,14 @@ import ( "encoding/json" "flag" "fmt" - "github.com/castawaylabs/cachet-monitor/system" "io" "io/ioutil" "log" "net/http" "net/url" "os" + + "github.com/castawaylabs/cachet-monitor/system" ) // Static config @@ -69,6 +70,13 @@ func init() { os.Exit(1) } + for _, mon := range Config.Monitors { + if mon.Interval <= 0 { + mon.Interval = 1 + } + mon.stopC = make(chan struct{}) + } + if len(systemName) > 0 { Config.SystemName = systemName } diff --git a/cachet/monitor.go b/cachet/monitor.go index 6dc687b..1d9ddb9 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -12,17 +12,21 @@ const timeout = time.Duration(time.Second) // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` - StrictTLS *bool `json:"strict_tls"` + Name string `json:"name"` + URL string `json:"url"` + MetricID int `json:"metric_id"` + Threshold float32 `json:"threshold"` + ComponentID *int `json:"component_id"` + ExpectedStatusCode int `json:"expected_status_code"` + StrictTLS *bool `json:"strict_tls"` + Interval time.Duration `json:"interval"` History []bool `json:"-"` LastFailReason *string `json:"-"` Incident *Incident `json:"-"` + + // Closed when mon.Stop() is called + stopC chan struct{} `json:"-"` } // Run loop @@ -42,6 +46,26 @@ func (monitor *Monitor) Run() { } } +func (monitor *Monitor) Stop() { + if monitor.Stopped() { + return + } + close(monitor.stopC) +} + +func (monitor *Monitor) StopC() <-chan struct{} { + return monitor.stopC +} + +func (monitor *Monitor) Stopped() bool { + select { + case <-monitor.stopC: + return true + default: + return false + } +} + func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, @@ -115,8 +139,8 @@ func (monitor *Monitor) AnalyseData() { component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ - Name: monitor.Incident.Name, - Message: monitor.Name + " check succeeded", + Name: monitor.Incident.Name, + Message: monitor.Name + " check succeeded", ComponentID: &component_id, } diff --git a/main.go b/main.go index e5fde68..a755057 100644 --- a/main.go +++ b/main.go @@ -1,8 +1,12 @@ package main import ( - "github.com/castawaylabs/cachet-monitor/cachet" + "os" + "os/signal" + "sync" "time" + + "github.com/castawaylabs/cachet-monitor/cachet" ) func main() { @@ -20,10 +24,31 @@ func main() { log.Println() - ticker := time.NewTicker(time.Second) - for range ticker.C { - for _, mon := range config.Monitors { - go mon.Run() - } + wg := &sync.WaitGroup{} + for _, mon := range config.Monitors { + wg.Add(1) + go func(mon *cachet.Monitor) { + ticker := time.NewTicker(mon.Interval * time.Second) + for { + select { + case <-ticker.C: + mon.Run() + case <-mon.StopC(): + wg.Done() + return + } + } + }(mon) } + + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, os.Kill) + <-signals + + log.Println("Waiting monitors to end current operation") + for _, mon := range config.Monitors { + mon.Stop() + } + + wg.Wait() } From e4a586b92af6125e1979dd02a79899a69209cae8 Mon Sep 17 00:00:00 2001 From: Soulou Date: Sun, 23 Aug 2015 20:01:41 +0200 Subject: [PATCH 2/2] update readme --- readme.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 7512674..93acd97 100644 --- a/readme.md +++ b/readme.md @@ -9,7 +9,7 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag every second +- [x] Posts monitor lag (interval configurable) - [x] Updates Component to Partial Outage - [x] Updates Component to Major Outage if in Partial Outage - [x] Can be run on multiple servers and geo regions @@ -43,7 +43,8 @@ Configuration "threshold": 80, "component_id": null, "expected_status_code": 200, - "strict_tls": true + "strict_tls": true, + "interval": 5 } ], "insecure_api": false @@ -58,6 +59,7 @@ Configuration - `component_id` is optional - `threshold` is a percentage - `expected_status_code` is a http response code +- `interval` is the duration in seconds between two checks. - GET request will be performed on the `url` How to run