From 0ceccccd459a64fd87f0acd60d38889cb4af71e0 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 18:40:01 +0100 Subject: [PATCH] More config options, markdown in incidents - Specify method to ping with - Body regex match - Markdown in incident messages - Update existing incidents --- cli/main.go | 20 +++- config.go | 25 ++--- http.go | 37 ++++--- incident.go | 163 +++++++++++------------------ metrics.go | 25 ----- monitor.go | 289 +++++++++++++++++++++++++++++----------------------- readme.md | 58 +++++------ 7 files changed, 305 insertions(+), 312 deletions(-) delete mode 100644 metrics.go diff --git a/cli/main.go b/cli/main.go index c394621..4028b4d 100644 --- a/cli/main.go +++ b/cli/main.go @@ -10,6 +10,8 @@ import ( "net/http" "net/url" "os" + "os/signal" + "sync" cachet "github.com/castawaylabs/cachet-monitor" ) @@ -47,7 +49,23 @@ func main() { panic(err) } - cfg.Run() + cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors)) + + wg := &sync.WaitGroup{} + for _, mon := range cfg.Monitors { + go mon.Start(cfg, wg) + } + + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, os.Kill) + <-signals + + cfg.Logger.Println("Abort: Waiting monitors to finish") + for _, mon := range cfg.Monitors { + mon.Stop() + } + + wg.Wait() } func getLogger(logPath string) *log.Logger { diff --git a/config.go b/config.go index b0dc9fa..8a12419 100644 --- a/config.go +++ b/config.go @@ -12,7 +12,6 @@ type CachetMonitor struct { APIUrl string `json:"api_url"` APIToken string `json:"api_token"` - Interval int64 `json:"interval"` SystemName string `json:"system_name"` LogPath string `json:"log_path"` InsecureAPI bool `json:"insecure_api"` @@ -20,28 +19,30 @@ type CachetMonitor struct { Monitors []*Monitor `json:"monitors"` } -func (mon *CachetMonitor) ValidateConfiguration() error { - if mon.Logger == nil { - mon.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) +func (cfg *CachetMonitor) ValidateConfiguration() error { + if cfg.Logger == nil { + cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) } - if len(mon.SystemName) == 0 { + if len(cfg.SystemName) == 0 { // get hostname - mon.SystemName = getHostname() + cfg.SystemName = getHostname() } - if mon.Interval <= 0 { - mon.Interval = 60 - } - - if len(mon.APIToken) == 0 || len(mon.APIUrl) == 0 { + if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 { return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n") } - if len(mon.Monitors) == 0 { + if len(cfg.Monitors) == 0 { return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n") } + for _, monitor := range cfg.Monitors { + if err := monitor.ValidateConfiguration(); err != nil { + return err + } + } + return nil } diff --git a/http.go b/http.go index f6b5b01..602d9d1 100644 --- a/http.go +++ b/http.go @@ -4,22 +4,13 @@ import ( "bytes" "crypto/tls" "encoding/json" + "fmt" "io/ioutil" "net/http" + "strconv" + "time" ) -// Component Cachet model -type Component struct { - ID json.Number `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status json.Number `json:"status_id"` - HumanStatus string `json:"-"` - IncidentCount int `json:"-"` - CreatedAt *string `json:"created_at"` - UpdatedAt *string `json:"updated_at"` -} - func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody)) @@ -43,3 +34,25 @@ func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBod return res, body, nil } + +// SendMetric sends lag metric point +func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { + if metricID <= 0 { + return nil + } + + jsonBytes, _ := json.Marshal(&map[string]interface{}{ + "value": delay, + }) + + resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) + if err != nil || resp.StatusCode != 200 { + return fmt.Errorf("Could not log data point!\n%v\n", err) + } + + return nil +} + +func getMs() int64 { + return time.Now().UnixNano() / int64(time.Millisecond) +} diff --git a/incident.go b/incident.go index 00077ce..3cbb3c4 100644 --- a/incident.go +++ b/incident.go @@ -2,156 +2,113 @@ package cachet import ( "encoding/json" - "errors" "fmt" "strconv" ) // Incident Cachet data model type Incident struct { - ID json.Number `json:"id"` - Name string `json:"name"` - Message string `json:"message"` - Status json.Number `json:"status"` // 4? - HumanStatus string `json:"human_status"` - Component *Component `json:"-"` - ComponentID *json.Number `json:"component_id"` - CreatedAt *string `json:"created_at"` - UpdatedAt *string `json:"updated_at"` -} + ID int `json:"id"` + Name string `json:"name"` + Message string `json:"message"` + Status int `json:"status"` + Visible int `json"visible"` + Notify bool `json:"notify"` -// GetIncidents - Get list of incidents -func (monitor *CachetMonitor) GetIncidents() ([]Incident, error) { - _, body, err := monitor.makeRequest("GET", "/incidents", nil) - if err != nil { - return []Incident{}, fmt.Errorf("Cannot get incidents: %v\n", err) - } - - var data struct { - Incidents []Incident `json:"data"` - } - err = json.Unmarshal(body, &data) - if err != nil { - return []Incident{}, fmt.Errorf("Cannot parse incidents: %v\n", err) - } - - return data.Incidents, nil + ComponentID int `json:"component_id"` + ComponentStatus int `json:"component_status"` } // Send - Create or Update incident -func (monitor *CachetMonitor) SendIncident(incident *Incident) error { - jsonBytes, _ := json.Marshal(map[string]interface{}{ - "name": incident.Name, - "message": incident.Message, - "status": incident.Status, - "component_id": incident.ComponentID, - "notify": true, - }) +func (incident *Incident) Send(cfg *CachetMonitor) error { + switch incident.Status { + case 1, 2, 3: + // partial outage + incident.ComponentStatus = 3 + + componentStatus, err := incident.GetComponentStatus(cfg) + if componentStatus == 3 { + // major outage + incident.ComponentStatus = 4 + } + + if err != nil { + cfg.Logger.Printf("cannot fetch component: %v", err) + } + case 4: + // fixed + incident.ComponentStatus = 1 + } requestType := "POST" requestURL := "/incidents" - if len(incident.ID) > 0 { + if incident.ID > 0 { requestType = "PUT" - requestURL += "/" + string(incident.ID) + requestURL += "/" + strconv.Itoa(incident.ID) } - resp, body, err := monitor.makeRequest(requestType, requestURL, jsonBytes) + jsonBytes, _ := json.Marshal(incident) + + resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes) if err != nil { return err } var data struct { - Incident Incident `json:"data"` + Incident struct { + ID int `json:"id"` + } `json:"data"` } if err := json.Unmarshal(body, &data); err != nil { - return errors.New("Cannot parse incident body." + string(body)) - } else { - incident.ID = data.Incident.ID - incident.Component = data.Incident.Component + return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body)) + } + + incident.ID = data.Incident.ID + if resp.StatusCode != 200 { + return fmt.Errorf("Could not create/update incident!") + } + + return nil +} + +func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) { + resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil) + if err != nil { + return 0, err } if resp.StatusCode != 200 { - return errors.New("Could not create/update incident!") - } - - return nil -} - -func (monitor *CachetMonitor) fetchComponent(componentID string) (*Component, error) { - _, body, err := monitor.makeRequest("GET", "/components/"+componentID, nil) - if err != nil { - return nil, err + return 0, fmt.Errorf("Invalid status code. Received %d", resp.StatusCode) } var data struct { - Component Component `json:"data"` + Component struct { + Status int `json:"status"` + } `json:"data"` } if err := json.Unmarshal(body, &data); err != nil { - return nil, errors.New("Cannot parse component body. " + string(body)) + return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err) } - return &data.Component, nil -} - -func (monitor *CachetMonitor) UpdateComponent(incident *Incident) error { - if incident.ComponentID == nil || len(*incident.ComponentID) == 0 { - return nil - } - - if incident.Component == nil { - // fetch component - component, err := monitor.fetchComponent(string(*incident.ComponentID)) - if err != nil { - return fmt.Errorf("Cannot fetch component for incident. %v\n", err) - } - - incident.Component = component - } - - status, _ := strconv.Atoi(string(incident.Status)) - switch status { - case 1, 2, 3: - if incident.Component.Status == "3" { - incident.Component.Status = "4" - } else { - incident.Component.Status = "3" - } - case 4: - incident.Component.Status = "1" - } - - jsonBytes, _ := json.Marshal(map[string]interface{}{ - "status": incident.Component.Status, - }) - - resp, _, err := monitor.makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes) - if err != nil || resp.StatusCode != 200 { - return fmt.Errorf("Could not update component: (resp code %d) %v", resp.StatusCode, err) - } - - return nil + return data.Component.Status, nil } // SetInvestigating sets status to Investigating func (incident *Incident) SetInvestigating() { - incident.Status = "1" - incident.HumanStatus = "Investigating" + incident.Status = 1 } // SetIdentified sets status to Identified func (incident *Incident) SetIdentified() { - incident.Status = "2" - incident.HumanStatus = "Identified" + incident.Status = 2 } // SetWatching sets status to Watching func (incident *Incident) SetWatching() { - incident.Status = "3" - incident.HumanStatus = "Watching" + incident.Status = 3 } // SetFixed sets status to Fixed func (incident *Incident) SetFixed() { - incident.Status = "4" - incident.HumanStatus = "Fixed" + incident.Status = 4 } diff --git a/metrics.go b/metrics.go deleted file mode 100644 index 25e4973..0000000 --- a/metrics.go +++ /dev/null @@ -1,25 +0,0 @@ -package cachet - -import ( - "encoding/json" - "fmt" - "strconv" -) - -// SendMetric sends lag metric point -func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { - if metricID <= 0 { - return nil - } - - jsonBytes, _ := json.Marshal(&map[string]interface{}{ - "value": delay, - }) - - resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) - if err != nil || resp.StatusCode != 200 { - return fmt.Errorf("Could not log data point!\n%v\n", err) - } - - return nil -} diff --git a/monitor.go b/monitor.go index b1acce8..93b3de5 100644 --- a/monitor.go +++ b/monitor.go @@ -2,103 +2,81 @@ package cachet import ( "crypto/tls" - "encoding/json" - "log" + "errors" + "fmt" + "io/ioutil" "net/http" - "os" - "os/signal" + "regexp" "strconv" + "strings" "sync" "time" ) -const timeout = time.Duration(time.Second) +const HttpTimeout = time.Duration(time.Second) +const DefaultInterval = 60 +const DefaultTimeFormat = "15:04:05 Jan 2 MST" // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` - StrictTLS *bool `json:"strict_tls"` - Interval time.Duration `json:"interval"` + Name string `json:"name"` + URL string `json:"url"` + Method string `json:"method"` + StrictTLS bool `json:"strict_tls"` + CheckInterval time.Duration `json:"interval"` - History []bool `json:"-"` - LastFailReason *string `json:"-"` - Incident *Incident `json:"-"` + MetricID int `json:"metric_id"` + ComponentID int `json:"component_id"` + + // Threshold = percentage + Threshold float32 `json:"threshold"` + // Saturat + ExpectedStatusCode int `json:"expected_status_code"` + // compiled to Regexp + ExpectedBody string `json:"expected_body"` + bodyRegexp *regexp.Regexp + + history []bool + lastFailReason string + incident *Incident config *CachetMonitor // Closed when mon.Stop() is called stopC chan bool } -func (cfg *CachetMonitor) Run() { - cfg.Logger.Printf("System: %s\nInterval: %d second(s)\nAPI: %s\n\n", cfg.SystemName, cfg.Interval, cfg.APIUrl) - cfg.Logger.Printf("Starting %d monitors:\n", len(cfg.Monitors)) - for _, mon := range cfg.Monitors { - cfg.Logger.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) - if mon.MetricID > 0 { - cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) +func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) { + wg.Add(1) + mon.config = cfg + mon.stopC = make(chan bool) + + mon.config.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL) + + // print features + if mon.ExpectedStatusCode > 0 { + mon.config.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode) + } + if len(mon.ExpectedBody) > 0 { + mon.config.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody) + } + if mon.MetricID > 0 { + mon.config.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID) + } + if mon.ComponentID > 0 { + mon.config.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID) + } + + mon.Tick() + + ticker := time.NewTicker(mon.CheckInterval * time.Second) + for { + select { + case <-ticker.C: + mon.Tick() + case <-mon.stopC: + wg.Done() + return } - if mon.ComponentID != nil && *mon.ComponentID > 0 { - cfg.Logger.Printf(" - Updates component id: %d\n", *mon.ComponentID) - } - } - - cfg.Logger.Println() - wg := &sync.WaitGroup{} - - for _, mon := range cfg.Monitors { - wg.Add(1) - mon.config = cfg - mon.stopC = make(chan bool) - - go func(mon *Monitor) { - if mon.Interval < 1 { - mon.Interval = time.Duration(cfg.Interval) - } - - ticker := time.NewTicker(mon.Interval * time.Second) - for { - select { - case <-ticker.C: - mon.Run() - case <-mon.StopC(): - wg.Done() - return - } - } - }(mon) - } - - signals := make(chan os.Signal, 1) - signal.Notify(signals, os.Interrupt, os.Kill) - <-signals - - log.Println("Waiting monitors to end current operation") - for _, mon := range cfg.Monitors { - mon.Stop() - } - - wg.Wait() -} - -// Run loop -func (monitor *Monitor) Run() { - reqStart := getMs() - isUp := monitor.doRequest() - lag := getMs() - reqStart - - if len(monitor.History) >= 10 { - monitor.History = monitor.History[len(monitor.History)-9:] - } - monitor.History = append(monitor.History, isUp) - monitor.AnalyseData() - - if isUp == true && monitor.MetricID > 0 { - monitor.config.SendMetric(monitor.MetricID, lag) } } @@ -110,10 +88,6 @@ func (monitor *Monitor) Stop() { close(monitor.stopC) } -func (monitor *Monitor) StopC() <-chan bool { - return monitor.stopC -} - func (monitor *Monitor) Stopped() bool { select { case <-monitor.stopC: @@ -123,11 +97,30 @@ func (monitor *Monitor) Stopped() bool { } } +func (monitor *Monitor) Tick() { + reqStart := getMs() + isUp := monitor.doRequest() + lag := getMs() - reqStart + + if len(monitor.history) == 9 { + monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name) + } + if len(monitor.history) >= 10 { + monitor.history = monitor.history[len(monitor.history)-9:] + } + monitor.history = append(monitor.history, isUp) + monitor.AnalyseData() + + if isUp == true && monitor.MetricID > 0 { + monitor.config.SendMetric(monitor.MetricID, lag) + } +} + func (monitor *Monitor) doRequest() bool { client := &http.Client{ - Timeout: timeout, + Timeout: HttpTimeout, } - if monitor.StrictTLS != nil && *monitor.StrictTLS == false { + if monitor.StrictTLS == false { client.Transport = &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } @@ -135,19 +128,36 @@ func (monitor *Monitor) doRequest() bool { resp, err := client.Get(monitor.URL) if err != nil { - errString := err.Error() - monitor.LastFailReason = &errString + monitor.lastFailReason = err.Error() + return false } defer resp.Body.Close() - if resp.StatusCode != monitor.ExpectedStatusCode { - failReason := "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode) - monitor.LastFailReason = &failReason + if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode { + monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode) + return false } + if monitor.bodyRegexp != nil { + // check body + responseBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + monitor.lastFailReason = err.Error() + + return false + } + + match := monitor.bodyRegexp.Match(responseBody) + if !match { + monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody + } + + return match + } + return true } @@ -155,60 +165,89 @@ func (monitor *Monitor) doRequest() bool { func (monitor *Monitor) AnalyseData() { // look at the past few incidents numDown := 0 - for _, wasUp := range monitor.History { + for _, wasUp := range monitor.history { if wasUp == false { numDown++ } } - t := (float32(numDown) / float32(len(monitor.History))) * 100 - monitor.config.Logger.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) + t := (float32(numDown) / float32(len(monitor.history))) * 100 + monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second)) - if len(monitor.History) != 10 { - // not enough data + if len(monitor.history) != 10 { + // not saturated return } - if t > monitor.Threshold && monitor.Incident == nil { - // is down, create an incident - monitor.config.Logger.Println("Creating incident...") - - component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) - monitor.Incident = &Incident{ + if t > monitor.Threshold && monitor.incident == nil { + monitor.incident = &Incident{ Name: monitor.Name + " - " + monitor.config.SystemName, - Message: monitor.Name + " check failed", - ComponentID: &component_id, + ComponentID: monitor.ComponentID, + Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat), + Notify: true, } - if monitor.LastFailReason != nil { - monitor.Incident.Message += "\n\n - " + *monitor.LastFailReason + if len(monitor.lastFailReason) > 0 { + monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`" } + // is down, create an incident + monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason) // set investigating status - monitor.Incident.SetInvestigating() - + monitor.incident.SetInvestigating() // create/update incident - monitor.config.SendIncident(monitor.Incident) - monitor.config.UpdateComponent(monitor.Incident) - } else if t < monitor.Threshold && monitor.Incident != nil { - // was down, created an incident, its now ok, make it resolved. - monitor.config.Logger.Println("Updating incident to resolved...") - - component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) - monitor.Incident = &Incident{ - Name: monitor.Incident.Name, - Message: monitor.Name + " check succeeded", - ComponentID: &component_id, + if err := monitor.incident.Send(monitor.config); err != nil { + monitor.config.Logger.Printf("Error sending incident: %v\n", err) } + } else if t < monitor.Threshold && monitor.incident != nil { + // was down, created an incident, its now ok, make it resolved. + monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name) - monitor.Incident.SetFixed() - monitor.config.SendIncident(monitor.Incident) - monitor.config.UpdateComponent(monitor.Incident) + // resolve incident + monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message + monitor.incident.SetFixed() + monitor.incident.Send(monitor.config) - monitor.Incident = nil + monitor.lastFailReason = "" + monitor.incident = nil } } -func getMs() int64 { - return time.Now().UnixNano() / int64(time.Millisecond) +func (monitor *Monitor) ValidateConfiguration() error { + if len(monitor.ExpectedBody) > 0 { + exp, err := regexp.Compile(monitor.ExpectedBody) + if err != nil { + return err + } + + monitor.bodyRegexp = exp + } + + if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 { + return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty") + } + + if monitor.CheckInterval < 1 { + monitor.CheckInterval = DefaultInterval + } + + monitor.Method = strings.ToUpper(monitor.Method) + switch monitor.Method { + case "GET", "POST", "DELETE", "OPTIONS", "HEAD": + break + case "": + monitor.Method = "GET" + default: + return fmt.Errorf("Unsupported check method: %v", monitor.Method) + } + + if monitor.ComponentID == 0 && monitor.MetricID == 0 { + return errors.New("component_id & metric_id are unset") + } + + if monitor.Threshold <= 0 { + monitor.Threshold = 100 + } + + return nil } diff --git a/readme.md b/readme.md index 30831ec..b2fd71d 100644 --- a/readme.md +++ b/readme.md @@ -4,9 +4,9 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag (interval configurable) +- [x] Posts monitor lag to cachet graphs - [x] Updates Component to Partial Outage -- [x] Updates Component to Major Outage if in Partial Outage +- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring) - [x] Can be run on multiple servers and geo regions Configuration @@ -14,36 +14,26 @@ Configuration ``` { - "api_url": "https://demo.cachethq.io/api/v1", - "api_token": "", - "interval": 60, - "monitors": [ - { - "name": "Name of your monitor", - "url": "Ping URL", - "metric_id": , - "component_id": , - "threshold": 80, - "expected_status_code": 200, - "strict_tls": true, - "interval": 5 - } - ], - "insecure_api": false + "api_url": "https:///api/v1", + "api_token": "", + "insecure_api": false, // optional, false default, set if your certificate is self-signed/untrusted + "monitors": [{ + "name": "Name of your monitor", // required, friendly name for your monitor + "url": "Ping URL", // required, url to probe + "method": "get", // optional, http method (defaults GET) + "strict_tls": true, // self-signed ssl certificate + "interval": 10, // seconds between checks + "metric_id": , // post lag to cachet metric (graph) + "component_id": , // post incidents to this component + "threshold": 80, // If % of downtime is over this threshold, open an incident + "expected_status_code": 200, // optional, expected status code (either status code or body must be supplied) + "expected_body": "P.*NG" // optional, regular expression + }], + "system_name": "", // optional, system name to identify bot + "log_path": "" // optional, defaults to stdout } ``` -*Notes:* - -- `metric_id` is optional -- `insecure_api` if true it will ignore HTTPS certificate errors (eg if self-signed) -- `strict_tls` if false (true is default) it will ignore HTTPS certificate errors (eg if monitor uses self-signed certificate) -- `component_id` is optional -- `threshold` is a percentage -- `expected_status_code` is a http response code -- `interval` is the duration in seconds between two checks. -- GET request will be performed on the `url` - Installation ------------ @@ -63,11 +53,11 @@ Usage of cachet-monitor: Environment variables --------------------- -| Name | Example Value | Description | -| ------------ | --------------------------- | --------------------------- | -| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | -| CACHET_TOKEN | randomvalue | API Authentication token | -| CACHET_DEV | 1 | Strips logging | +| Name | Example Value | Description | +| ------------ | ------------------------------ | --------------------------- | +| CACHET_API | http://demo.cachethq.io/api/v1 | URL endpoint for cachet api | +| CACHET_TOKEN | APIToken123 | API Authentication token | +| CACHET_DEV | 1 | Strips logging | Vision and goals ----------------