From 664795a9021f3bf995b5ebfe45e7480ff620d3a4 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 15 Mar 2015 19:49:25 +0100 Subject: [PATCH 01/40] fc --- .gitignore | 1 + main.go | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .gitignore create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d7bad5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +gin-bin \ No newline at end of file diff --git a/main.go b/main.go new file mode 100644 index 0000000..2f7be75 --- /dev/null +++ b/main.go @@ -0,0 +1,69 @@ + +package main + +import ( + "fmt" + // "time" + "net/http" + "bytes" + "io/ioutil" + "encoding/json" + "time" +) + +const timeout = time.Duration(time.Second) + +func main() { + ticker := time.NewTicker(time.Second) + for _ = range ticker.C { + reqStart := time.Now().UnixNano() / int64(time.Millisecond) + doRequest() + reqEnd := time.Now().UnixNano() / int64(time.Millisecond) + go sendMetric(reqEnd - reqStart) + } +} + +func doRequest() error { + client := http.Client{ + Timeout: timeout, + } + resp, err := client.Get("https://nodegear.io/ping") // http://127.0.0.1:1337 + if err != nil { + return err + } + + defer resp.Body.Close() + + return nil +} + +func sendMetric(delay int64) { + js := &map[string]interface{}{ + "value": delay, + } + + jsonBytes, err := json.Marshal(&js) + if err != nil { + panic(err) + } + + req, err := http.NewRequest("POST", "https://demo.cachethq.io/api/metrics/1/points", bytes.NewBuffer(jsonBytes)) + if err != nil { + panic(err) + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", "5wQt9MnJXmhnQsDI8Hmv") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + fmt.Println(string(body)) +} From 7b3d6eba89599641aedf0b20749022d1b16d3dfe Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 15 Mar 2015 19:52:33 +0100 Subject: [PATCH 02/40] (fix): main.go indentation --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 2f7be75..5fbfc68 100644 --- a/main.go +++ b/main.go @@ -26,7 +26,7 @@ func main() { func doRequest() error { client := http.Client{ Timeout: timeout, - } + } resp, err := client.Get("https://nodegear.io/ping") // http://127.0.0.1:1337 if err != nil { return err From 1cadc9b3b3a5d18f4a838c0152a75d9e4aec8083 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 15 Mar 2015 20:58:14 +0100 Subject: [PATCH 03/40] modularize --- cachet/cachet.go | 8 ++++++ cachet/metrics.go | 42 +++++++++++++++++++++++++++++ cachet/monitor.go | 49 +++++++++++++++++++++++++++++++++ main.go | 69 ++++++++--------------------------------------- 4 files changed, 110 insertions(+), 58 deletions(-) create mode 100644 cachet/cachet.go create mode 100644 cachet/metrics.go create mode 100644 cachet/monitor.go diff --git a/cachet/cachet.go b/cachet/cachet.go new file mode 100644 index 0000000..7c2b25d --- /dev/null +++ b/cachet/cachet.go @@ -0,0 +1,8 @@ +package cachet + +import "os" + +// apiUrl -> https://demo.cachethq.io/api +// apiToken -> qwertyuiop +var apiUrl = os.Getenv("CACHET_API") +var apiToken = os.Getenv("CACHET_TOKEN") \ No newline at end of file diff --git a/cachet/metrics.go b/cachet/metrics.go new file mode 100644 index 0000000..791be75 --- /dev/null +++ b/cachet/metrics.go @@ -0,0 +1,42 @@ +package cachet + +import ( + "fmt" + "bytes" + "strconv" + "net/http" + "io/ioutil" + "encoding/json" +) + +func SendMetric(metricId int, delay int64) { + jsonBytes, err := json.Marshal(&map[string]interface{}{ + "value": delay, + }) + if err != nil { + panic(err) + } + + req, err := http.NewRequest("POST", apiUrl + "/metrics/" + strconv.Itoa(metricId) + "/points", bytes.NewBuffer(jsonBytes)) + if err != nil { + panic(err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", apiToken) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) + + if resp.StatusCode != 200 { + fmt.Println("Could not log data point!") + } +} diff --git a/cachet/monitor.go b/cachet/monitor.go new file mode 100644 index 0000000..81d24a3 --- /dev/null +++ b/cachet/monitor.go @@ -0,0 +1,49 @@ +package cachet + +import ( + "fmt" + "time" + "net/http" +) + +const timeout = time.Duration(time.Second) + +type Monitor struct { + Url string `json:"url"` + MetricId int `json:"metric_id"` +} + +func (monitor *Monitor) Run() { + reqStart := getMs() + err := monitor.doRequest() + lag := getMs() - reqStart + + failed := false + if err != nil { + failed = true + } + + if failed == true { + fmt.Println("Req failed") + } + + SendMetric(1, lag) +} + +func (monitor *Monitor) doRequest() error { + client := &http.Client{ + Timeout: timeout, + } + resp, err := client.Get(monitor.Url) // http://127.0.0.1:1337 + if err != nil { + return err + } + + defer resp.Body.Close() + + return nil +} + +func getMs() int64 { + return time.Now().UnixNano() / int64(time.Millisecond) +} diff --git a/main.go b/main.go index 5fbfc68..1534a72 100644 --- a/main.go +++ b/main.go @@ -1,69 +1,22 @@ - package main import ( - "fmt" - // "time" - "net/http" - "bytes" - "io/ioutil" - "encoding/json" "time" + "./cachet" ) -const timeout = time.Duration(time.Second) - func main() { + monitors := []cachet.Monitor{ + cachet.Monitor{ + Url: "https://nodegear.io/ping", + MetricId: 1, + }, + } + ticker := time.NewTicker(time.Second) for _ = range ticker.C { - reqStart := time.Now().UnixNano() / int64(time.Millisecond) - doRequest() - reqEnd := time.Now().UnixNano() / int64(time.Millisecond) - go sendMetric(reqEnd - reqStart) + for _, monitor := range monitors { + go monitor.Run() + } } } - -func doRequest() error { - client := http.Client{ - Timeout: timeout, - } - resp, err := client.Get("https://nodegear.io/ping") // http://127.0.0.1:1337 - if err != nil { - return err - } - - defer resp.Body.Close() - - return nil -} - -func sendMetric(delay int64) { - js := &map[string]interface{}{ - "value": delay, - } - - jsonBytes, err := json.Marshal(&js) - if err != nil { - panic(err) - } - - req, err := http.NewRequest("POST", "https://demo.cachethq.io/api/metrics/1/points", bytes.NewBuffer(jsonBytes)) - if err != nil { - panic(err) - } - - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", "5wQt9MnJXmhnQsDI8Hmv") - - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - panic(err) - } - - defer resp.Body.Close() - - body, _ := ioutil.ReadAll(resp.Body) - fmt.Println(string(body)) -} From f478ad9895901dda590a2a510c0b25d8e60823ab Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Mon, 16 Mar 2015 01:44:48 +0100 Subject: [PATCH 04/40] Beginning of incident reporting --- cachet/component.go | 18 ++++++++++++++ cachet/incident.go | 17 ++++++++++++++ cachet/metrics.go | 4 ++-- cachet/monitor.go | 57 +++++++++++++++++++++++++++++++++++---------- main.go | 11 +++++++-- 5 files changed, 91 insertions(+), 16 deletions(-) create mode 100644 cachet/component.go create mode 100644 cachet/incident.go diff --git a/cachet/component.go b/cachet/component.go new file mode 100644 index 0000000..a6cd677 --- /dev/null +++ b/cachet/component.go @@ -0,0 +1,18 @@ +package cachet + +import ( + "time" +) + +type Component struct { + Id int `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status int `json:"status"` + Link *string `json:"link"` + Order *int `json:"order"` + Group_id *int `json:"group_id"` + Created_at *time.Time `json:"created_at"` + Updated_at *time.Time `json:"updated_at"` + Deleted_at *time.Time `json:"deleted_at"` +} \ No newline at end of file diff --git a/cachet/incident.go b/cachet/incident.go new file mode 100644 index 0000000..c4a177e --- /dev/null +++ b/cachet/incident.go @@ -0,0 +1,17 @@ +package cachet + +import ( + "time" +) + +type Incident struct { + Id int `json:"id"` + Name string `json:"name"` + Message string `json:"message"` + Status int `json:"status"`// 4? + Human_status string `json:"human_status"` + Component *Component `json:"component"` + Component_id *int `json:"component_id"` + Created_at *time.Time `json:"created_at"` + Updated_at *time.Time `json:"updated_at"` +} \ No newline at end of file diff --git a/cachet/metrics.go b/cachet/metrics.go index 791be75..ac34e56 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -33,8 +33,8 @@ func SendMetric(metricId int, delay int64) { defer resp.Body.Close() - body, _ := ioutil.ReadAll(resp.Body) - fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) + _, _ = ioutil.ReadAll(resp.Body) + // fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) if resp.StatusCode != 200 { fmt.Println("Could not log data point!") diff --git a/cachet/monitor.go b/cachet/monitor.go index 81d24a3..e023cbb 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -11,37 +11,70 @@ const timeout = time.Duration(time.Second) type Monitor struct { Url string `json:"url"` MetricId int `json:"metric_id"` + Threshold float32 `json:"threshold"` + ComponentId *int `json:"component_id"` + ExpectedStatusCode int `json:"expected_status_code"` + + History []bool `json:"-"` + Incident *Incident `json:"-"` } func (monitor *Monitor) Run() { reqStart := getMs() - err := monitor.doRequest() + isUp := monitor.doRequest() lag := getMs() - reqStart - failed := false - if err != nil { - failed = true + if len(monitor.History) >= 10 { + monitor.History = monitor.History[len(monitor.History)-9:] } + monitor.History = append(monitor.History, isUp) + monitor.AnalyseData() - if failed == true { - fmt.Println("Req failed") + if isUp == true { + SendMetric(monitor.MetricId, lag) + return } - - SendMetric(1, lag) } -func (monitor *Monitor) doRequest() error { +func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, } - resp, err := client.Get(monitor.Url) // http://127.0.0.1:1337 + resp, err := client.Get(monitor.Url) if err != nil { - return err + return false } defer resp.Body.Close() - return nil + return resp.StatusCode == monitor.ExpectedStatusCode +} + +func (monitor *Monitor) AnalyseData() { + // look at the past few incidents + if len(monitor.History) != 10 { + // not enough data + return + } + + numDown := 0 + for _, wasUp := range monitor.History { + if wasUp == false { + numDown++ + } + } + + t := (float32(numDown) / float32(len(monitor.History))) * 100 + fmt.Printf("%s %.2f%% Down. Threshold: %.2f%%\n", monitor.Url, t, monitor.Threshold) + if t > monitor.Threshold && monitor.Incident == nil { + // is down, create an incident + fmt.Println("Creating incident...") + monitor.Incident = &Incident{} + } else if t < monitor.Threshold && monitor.Incident != nil { + // was down, created an incident, its now ok, make it resolved. + fmt.Println("Updating incident to resolved...") + monitor.Incident = nil + } } func getMs() int64 { diff --git a/main.go b/main.go index 1534a72..c3ce774 100644 --- a/main.go +++ b/main.go @@ -6,10 +6,17 @@ import ( ) func main() { - monitors := []cachet.Monitor{ - cachet.Monitor{ + monitors := []*cachet.Monitor{ + /*&cachet.Monitor{ Url: "https://nodegear.io/ping", MetricId: 1, + Threshold: 80.0, + },*/ + &cachet.Monitor{ + Url: "http://localhost:1337", + MetricId: 1, + Threshold: 80.0, + ExpectedStatusCode: 200, }, } From 92d8791a4f0ec393aabcc1f24e2fa372a896676c Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Mon, 16 Mar 2015 21:02:29 +0100 Subject: [PATCH 05/40] Log & resolve incidents, add readme --- cachet/incident.go | 82 ++++++++++++++++++++++++++++++++++++++++++++-- cachet/monitor.go | 30 ++++++++++++----- main.go | 17 ++++++++-- readme.md | 18 ++++++++++ 4 files changed, 133 insertions(+), 14 deletions(-) create mode 100644 readme.md diff --git a/cachet/incident.go b/cachet/incident.go index c4a177e..a9bc815 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -1,7 +1,12 @@ package cachet import ( - "time" + "fmt" + "bytes" + "io/ioutil" + "strconv" + "net/http" + "encoding/json" ) type Incident struct { @@ -12,6 +17,77 @@ type Incident struct { Human_status string `json:"human_status"` Component *Component `json:"component"` Component_id *int `json:"component_id"` - Created_at *time.Time `json:"created_at"` - Updated_at *time.Time `json:"updated_at"` + Created_at int `json:"created_at"` + Updated_at int `json:"updated_at"` +} + +type IncidentData struct { + Incident Incident `json:"data"` +} + +func (incident *Incident) Send() { + jsonBytes, err := json.Marshal(incident) + if err != nil { + panic(err) + } + + var req *http.Request + if incident.Id == 0 { + req, err = http.NewRequest("POST", apiUrl + "/incidents", bytes.NewBuffer(jsonBytes)) + } else { + req, err = http.NewRequest("PUT", apiUrl + "/incidents/" + strconv.Itoa(incident.Id), bytes.NewBuffer(jsonBytes)) + } + + if err != nil { + panic(err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", apiToken) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) + + var data IncidentData + err = json.Unmarshal(body, &data) + if err != nil { + fmt.Println("Cannot parse incident body.") + panic(err) + } else { + incident.Id = data.Incident.Id + } + + fmt.Println("ID:"+strconv.Itoa(incident.Id)) + + if resp.StatusCode != 200 { + fmt.Println("Could not create/update incident!") + } +} + +func (incident *Incident) SetInvestigating() { + incident.Status = 1 + incident.Human_status = "Investigating" +} + +func (incident *Incident) SetIdentified() { + incident.Status = 2 + incident.Human_status = "Identified" +} + +func (incident *Incident) SetWatching() { + incident.Status = 3 + incident.Human_status = "Watching" +} + +func (incident *Incident) SetFixed() { + incident.Status = 4 + incident.Human_status = "Fixed" } \ No newline at end of file diff --git a/cachet/monitor.go b/cachet/monitor.go index e023cbb..e08cc0c 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -9,6 +9,7 @@ import ( const timeout = time.Duration(time.Second) type Monitor struct { + Name string `json:"name"` Url string `json:"url"` MetricId int `json:"metric_id"` Threshold float32 `json:"threshold"` @@ -30,9 +31,8 @@ func (monitor *Monitor) Run() { monitor.History = append(monitor.History, isUp) monitor.AnalyseData() - if isUp == true { + if isUp == true && monitor.MetricId > 0 { SendMetric(monitor.MetricId, lag) - return } } @@ -52,11 +52,6 @@ func (monitor *Monitor) doRequest() bool { func (monitor *Monitor) AnalyseData() { // look at the past few incidents - if len(monitor.History) != 10 { - // not enough data - return - } - numDown := 0 for _, wasUp := range monitor.History { if wasUp == false { @@ -65,14 +60,31 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - fmt.Printf("%s %.2f%% Down. Threshold: %.2f%%\n", monitor.Url, t, monitor.Threshold) + fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.Url, t, time.Now().UnixNano() / int64(time.Second), monitor.Threshold) + + if len(monitor.History) != 10 { + // not enough data + return + } + if t > monitor.Threshold && monitor.Incident == nil { // is down, create an incident fmt.Println("Creating incident...") - monitor.Incident = &Incident{} + + monitor.Incident = &Incident{ + Name: monitor.Name, + Message: monitor.Name + " is unreachable.", + } + + monitor.Incident.SetInvestigating() + monitor.Incident.Send() } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. fmt.Println("Updating incident to resolved...") + + monitor.Incident.SetFixed() + monitor.Incident.Send() + monitor.Incident = nil } } diff --git a/main.go b/main.go index c3ce774..bb44ac3 100644 --- a/main.go +++ b/main.go @@ -1,25 +1,38 @@ package main import ( + "fmt" "time" - "./cachet" + "github.com/castawaylabs/cachet-monitor/cachet" ) func main() { monitors := []*cachet.Monitor{ /*&cachet.Monitor{ + Name: "nodegear frontend", Url: "https://nodegear.io/ping", MetricId: 1, Threshold: 80.0, + ExpectedStatusCode: 200, },*/ &cachet.Monitor{ + Name: "local test server", Url: "http://localhost:1337", - MetricId: 1, Threshold: 80.0, ExpectedStatusCode: 200, }, } + fmt.Printf("Starting %d monitors:\n", len(monitors)) + for _, monitor := range monitors { + fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.Url, monitor.ExpectedStatusCode) + if monitor.MetricId > 0 { + fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricId) + } + } + + fmt.Println() + ticker := time.NewTicker(time.Second) for _ = range ticker.C { for _, monitor := range monitors { diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..ea29b12 --- /dev/null +++ b/readme.md @@ -0,0 +1,18 @@ +Cachet Monitor plugin +===================== + +This is a monitoring plugin for CachetHQ. + +How to run: +----------- + +1. Set up [Go](https://golang.org) +2. `go install github.com/castawaylabs/cachet-monitor` +3. `cachet-monitor` + +Environment variables: +---------------------- + +| Name | Example Value | Description | +| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | +| CACHET_TOKEN | randomvalue | API Authentication token | \ No newline at end of file From 44a9cf905f2d7647876dc8db06c728f43da65be1 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Mon, 16 Mar 2015 21:04:58 +0100 Subject: [PATCH 06/40] Fix readme table --- readme.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index ea29b12..a42c417 100644 --- a/readme.md +++ b/readme.md @@ -13,6 +13,7 @@ How to run: Environment variables: ---------------------- -| Name | Example Value | Description | -| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | -| CACHET_TOKEN | randomvalue | API Authentication token | \ No newline at end of file +| Name | Example Value | Description | +| ------------ | --------------------------- | --------------------------- | +| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | +| CACHET_TOKEN | randomvalue | API Authentication token | \ No newline at end of file From 42237e9c8601bfabed67d00b1ff6242941e36a79 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Wed, 18 Mar 2015 22:58:45 +0100 Subject: [PATCH 07/40] Load config from disk|url, create incidents - Resolve incidents after the monitor is up - Example configuration - Updated readme --- .gitignore | 3 +- cachet/cachet.go | 6 ++-- cachet/config.go | 74 +++++++++++++++++++++++++++++++++++++++++++++ cachet/incident.go | 6 ++-- cachet/metrics.go | 31 ++++++++----------- cachet/monitor.go | 11 +++++-- example.config.json | 14 +++++++++ main.go | 25 +++------------ readme.md | 9 +++++- 9 files changed, 130 insertions(+), 49 deletions(-) create mode 100644 cachet/config.go create mode 100644 example.config.json diff --git a/.gitignore b/.gitignore index 6d7bad5..2ec9320 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -gin-bin \ No newline at end of file +gin-bin +example.config.local.json \ No newline at end of file diff --git a/cachet/cachet.go b/cachet/cachet.go index 7c2b25d..852a74e 100644 --- a/cachet/cachet.go +++ b/cachet/cachet.go @@ -2,7 +2,5 @@ package cachet import "os" -// apiUrl -> https://demo.cachethq.io/api -// apiToken -> qwertyuiop -var apiUrl = os.Getenv("CACHET_API") -var apiToken = os.Getenv("CACHET_TOKEN") \ No newline at end of file +var ApiUrl = os.Getenv("CACHET_API") +var ApiToken = os.Getenv("CACHET_TOKEN") \ No newline at end of file diff --git a/cachet/config.go b/cachet/config.go new file mode 100644 index 0000000..9db7b6e --- /dev/null +++ b/cachet/config.go @@ -0,0 +1,74 @@ +package cachet + +import ( + "os" + "fmt" + "flag" + "net/url" + "net/http" + "io/ioutil" + "encoding/json" +) + +var Config CachetConfig + +type CachetConfig struct { + API_Url string `json:"api_url"` + API_Token string `json:"api_token"` + Monitors []*Monitor `json:"monitors"` +} + +func init() { + var configPath string + flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") + flag.Parse() + + var data []byte + + // test if its a url + _, err := url.ParseRequestURI(configPath) + if err == nil { + // download config + response, err := http.Get(configPath) + if err != nil { + fmt.Printf("Cannot download network config: %v\n", err) + os.Exit(1) + } + + defer response.Body.Close() + + data, _ = ioutil.ReadAll(response.Body) + + fmt.Println("Downloaded network configuration.") + } else { + data, err = ioutil.ReadFile(configPath) + if err != nil { + fmt.Println("Config file '" + configPath + "' missing!") + os.Exit(1) + } + } + + err = json.Unmarshal(data, &Config) + + if err != nil { + fmt.Println("Cannot parse config!") + os.Exit(1) + } + + if len(os.Getenv("CACHET_API")) > 0 { + Config.API_Url = os.Getenv("CACHET_API") + } + if len(os.Getenv("CACHET_TOKEN")) > 0 { + Config.API_Token = os.Getenv("CACHET_TOKEN") + } + + if len(Config.API_Token) == 0 || len(Config.API_Url) == 0 { + fmt.Printf("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/CastawayLabs/cachet-monitor\n") + os.Exit(1) + } + + if len(Config.Monitors) == 0 { + fmt.Printf("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") + os.Exit(1) + } +} \ No newline at end of file diff --git a/cachet/incident.go b/cachet/incident.go index a9bc815..2d3bc64 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -33,9 +33,9 @@ func (incident *Incident) Send() { var req *http.Request if incident.Id == 0 { - req, err = http.NewRequest("POST", apiUrl + "/incidents", bytes.NewBuffer(jsonBytes)) + req, err = http.NewRequest("POST", Config.API_Url + "/incidents", bytes.NewBuffer(jsonBytes)) } else { - req, err = http.NewRequest("PUT", apiUrl + "/incidents/" + strconv.Itoa(incident.Id), bytes.NewBuffer(jsonBytes)) + req, err = http.NewRequest("PUT", Config.API_Url + "/incidents/" + strconv.Itoa(incident.Id), bytes.NewBuffer(jsonBytes)) } if err != nil { @@ -43,7 +43,7 @@ func (incident *Incident) Send() { } req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", apiToken) + req.Header.Set("X-Cachet-Token", Config.API_Token) client := &http.Client{} resp, err := client.Do(req) diff --git a/cachet/metrics.go b/cachet/metrics.go index ac34e56..b4e6eef 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -5,38 +5,33 @@ import ( "bytes" "strconv" "net/http" - "io/ioutil" "encoding/json" ) func SendMetric(metricId int, delay int64) { - jsonBytes, err := json.Marshal(&map[string]interface{}{ + if metricId <= 0 { + return + } + + jsonBytes, _ := json.Marshal(&map[string]interface{}{ "value": delay, }) - if err != nil { - panic(err) - } - - req, err := http.NewRequest("POST", apiUrl + "/metrics/" + strconv.Itoa(metricId) + "/points", bytes.NewBuffer(jsonBytes)) - if err != nil { - panic(err) - } - - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", apiToken) client := &http.Client{} + req, _ := http.NewRequest("POST", Config.API_Url + "/metrics/" + strconv.Itoa(metricId) + "/points", bytes.NewBuffer(jsonBytes)) + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", Config.API_Token) + resp, err := client.Do(req) if err != nil { - panic(err) + fmt.Printf("Could not log data point!\n%v\n", err) + return } defer resp.Body.Close() - _, _ = ioutil.ReadAll(resp.Body) - // fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) - if resp.StatusCode != 200 { fmt.Println("Could not log data point!") } -} +} \ No newline at end of file diff --git a/cachet/monitor.go b/cachet/monitor.go index e08cc0c..4263cc4 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -17,6 +17,7 @@ type Monitor struct { ExpectedStatusCode int `json:"expected_status_code"` History []bool `json:"-"` + LastFailReason *string `json:"-"` Incident *Incident `json:"-"` } @@ -42,6 +43,8 @@ func (monitor *Monitor) doRequest() bool { } resp, err := client.Get(monitor.Url) if err != nil { + errString := err.Error() + monitor.LastFailReason = &errString return false } @@ -73,7 +76,11 @@ func (monitor *Monitor) AnalyseData() { monitor.Incident = &Incident{ Name: monitor.Name, - Message: monitor.Name + " is unreachable.", + Message: monitor.Name + " failed", + } + + if monitor.LastFailReason != nil { + monitor.Incident.Message += "\n\n" + *monitor.LastFailReason } monitor.Incident.SetInvestigating() @@ -91,4 +98,4 @@ func (monitor *Monitor) AnalyseData() { func getMs() int64 { return time.Now().UnixNano() / int64(time.Millisecond) -} +} \ No newline at end of file diff --git a/example.config.json b/example.config.json new file mode 100644 index 0000000..737a2b1 --- /dev/null +++ b/example.config.json @@ -0,0 +1,14 @@ +{ + "api_url": "https://demo.cachethq.io/api", + "api_token": "9yMHsdioQosnyVK4iCVR", + "monitors": [ + { + "name": "nodegear frontend", + "url": "https://nodegear.io/ping", + "metric_id": 1, + "threshold": 80, + "component_id": null, + "expected_status_code": 200 + } + ] +} \ No newline at end of file diff --git a/main.go b/main.go index bb44ac3..2d5ebf2 100644 --- a/main.go +++ b/main.go @@ -7,24 +7,9 @@ import ( ) func main() { - monitors := []*cachet.Monitor{ - /*&cachet.Monitor{ - Name: "nodegear frontend", - Url: "https://nodegear.io/ping", - MetricId: 1, - Threshold: 80.0, - ExpectedStatusCode: 200, - },*/ - &cachet.Monitor{ - Name: "local test server", - Url: "http://localhost:1337", - Threshold: 80.0, - ExpectedStatusCode: 200, - }, - } - - fmt.Printf("Starting %d monitors:\n", len(monitors)) - for _, monitor := range monitors { + fmt.Printf("API: %s\n", cachet.Config.API_Url) + fmt.Printf("Starting %d monitors:\n", len(cachet.Config.Monitors)) + for _, monitor := range cachet.Config.Monitors { fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.Url, monitor.ExpectedStatusCode) if monitor.MetricId > 0 { fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricId) @@ -35,8 +20,8 @@ func main() { ticker := time.NewTicker(time.Second) for _ = range ticker.C { - for _, monitor := range monitors { + for _, monitor := range cachet.Config.Monitors { go monitor.Run() } } -} +} \ No newline at end of file diff --git a/readme.md b/readme.md index a42c417..23b8086 100644 --- a/readme.md +++ b/readme.md @@ -6,9 +6,16 @@ This is a monitoring plugin for CachetHQ. How to run: ----------- +Example: + 1. Set up [Go](https://golang.org) 2. `go install github.com/castawaylabs/cachet-monitor` -3. `cachet-monitor` +3. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json` + +Production: + +1. Download the example config and save to `/etc/cachet-monitor.config.json` +2. Run in background: `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` Environment variables: ---------------------- From f5826d7832bf8f53e44e7fe09eec9c34c09c00aa Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Wed, 18 Mar 2015 23:01:48 +0100 Subject: [PATCH 08/40] Remove cachet/cachet.go --- cachet/cachet.go | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 cachet/cachet.go diff --git a/cachet/cachet.go b/cachet/cachet.go deleted file mode 100644 index 852a74e..0000000 --- a/cachet/cachet.go +++ /dev/null @@ -1,6 +0,0 @@ -package cachet - -import "os" - -var ApiUrl = os.Getenv("CACHET_API") -var ApiToken = os.Getenv("CACHET_TOKEN") \ No newline at end of file From 8e17ebceeff415041f5ebc78ed48543fe7ab7cda Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 19:10:29 +0100 Subject: [PATCH 09/40] Get relevant incident, fix config bug - Make generic makeRequest fn --- cachet/config.go | 4 ++-- cachet/incident.go | 58 ++++++++++++++++++++++++++++++---------------- cachet/metrics.go | 18 ++------------ cachet/monitor.go | 6 +++++ cachet/request.go | 25 ++++++++++++++++++++ 5 files changed, 73 insertions(+), 38 deletions(-) create mode 100644 cachet/request.go diff --git a/cachet/config.go b/cachet/config.go index 9db7b6e..ef7b543 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -26,8 +26,8 @@ func init() { var data []byte // test if its a url - _, err := url.ParseRequestURI(configPath) - if err == nil { + url, err := url.ParseRequestURI(configPath) + if err == nil && len(url.Scheme) > 0 { // download config response, err := http.Get(configPath) if err != nil { diff --git a/cachet/incident.go b/cachet/incident.go index 2d3bc64..ad0a80d 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -2,10 +2,7 @@ package cachet import ( "fmt" - "bytes" - "io/ioutil" "strconv" - "net/http" "encoding/json" ) @@ -25,35 +22,44 @@ type IncidentData struct { Incident Incident `json:"data"` } +type IncidentList struct { + Incidents []Incident `json:"data"` +} + +func GetIncidents() []Incident { + _, body, err := makeRequest("GET", "/incidents", nil) + if err != nil { + panic(err) + } + + var data IncidentList + err = json.Unmarshal(body, &data) + if err != nil { + fmt.Println("Cannot parse incidents.") + panic(err) + } + + return data.Incidents +} + func (incident *Incident) Send() { jsonBytes, err := json.Marshal(incident) if err != nil { panic(err) } - var req *http.Request - if incident.Id == 0 { - req, err = http.NewRequest("POST", Config.API_Url + "/incidents", bytes.NewBuffer(jsonBytes)) - } else { - req, err = http.NewRequest("PUT", Config.API_Url + "/incidents/" + strconv.Itoa(incident.Id), bytes.NewBuffer(jsonBytes)) + requestType := "POST" + requestUrl := "/incidents" + if incident.Id > 0 { + requestType = "PUT" + requestUrl = "/incidents/" + strconv.Itoa(incident.Id) } + resp, body, err := makeRequest(requestType, requestUrl, jsonBytes) if err != nil { panic(err) } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", Config.API_Token) - - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - panic(err) - } - - defer resp.Body.Close() - - body, _ := ioutil.ReadAll(resp.Body) fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) var data IncidentData @@ -72,6 +78,18 @@ func (incident *Incident) Send() { } } +func (incident *Incident) GetSimilarIncidentId() { + incidents := GetIncidents() + + for _, inc := range incidents { + if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.Human_status == inc.Human_status { + incident.Id = inc.Id + fmt.Printf("Updated incident id to %v\n", inc.Id) + break + } + } +} + func (incident *Incident) SetInvestigating() { incident.Status = 1 incident.Human_status = "Investigating" diff --git a/cachet/metrics.go b/cachet/metrics.go index b4e6eef..59eb5ba 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -2,9 +2,7 @@ package cachet import ( "fmt" - "bytes" "strconv" - "net/http" "encoding/json" ) @@ -17,21 +15,9 @@ func SendMetric(metricId int, delay int64) { "value": delay, }) - client := &http.Client{} - req, _ := http.NewRequest("POST", Config.API_Url + "/metrics/" + strconv.Itoa(metricId) + "/points", bytes.NewBuffer(jsonBytes)) - - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", Config.API_Token) - - resp, err := client.Do(req) - if err != nil { + resp, _, err := makeRequest("POST", "/metrics/" + strconv.Itoa(metricId) + "/points", jsonBytes) + if err != nil || resp.StatusCode != 200 { fmt.Printf("Could not log data point!\n%v\n", err) return } - - defer resp.Body.Close() - - if resp.StatusCode != 200 { - fmt.Println("Could not log data point!") - } } \ No newline at end of file diff --git a/cachet/monitor.go b/cachet/monitor.go index 4263cc4..1935dac 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -83,7 +83,13 @@ func (monitor *Monitor) AnalyseData() { monitor.Incident.Message += "\n\n" + *monitor.LastFailReason } + // set investigating status monitor.Incident.SetInvestigating() + + // lookup relevant incident + monitor.Incident.GetSimilarIncidentId() + + // create/update incident monitor.Incident.Send() } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. diff --git a/cachet/request.go b/cachet/request.go new file mode 100644 index 0000000..578cf17 --- /dev/null +++ b/cachet/request.go @@ -0,0 +1,25 @@ +package cachet + +import ( + "bytes" + "io/ioutil" + "net/http" +) + +func makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { + req, err := http.NewRequest(requestType, Config.API_Url + url, bytes.NewBuffer(reqBody)) + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", Config.API_Token) + + client := &http.Client{} + res, err := client.Do(req) + if err != nil { + return nil, []byte{}, err + } + + defer res.Body.Close() + body, _ := ioutil.ReadAll(res.Body) + + return res, body, nil +} \ No newline at end of file From 3d0b0c88e107ce93f012e9b593a7784a26b2952e Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 20:40:16 +0100 Subject: [PATCH 10/40] Refactor based on goreportcard.com --- cachet/component.go | 11 ++++++----- cachet/config.go | 12 +++++++----- cachet/incident.go | 48 +++++++++++++++++++++++++++------------------ cachet/metrics.go | 7 ++++--- cachet/monitor.go | 9 ++++++--- cachet/request.go | 4 ++-- main.go | 2 +- 7 files changed, 55 insertions(+), 38 deletions(-) diff --git a/cachet/component.go b/cachet/component.go index a6cd677..c194f54 100644 --- a/cachet/component.go +++ b/cachet/component.go @@ -4,15 +4,16 @@ import ( "time" ) +// Cachet component model type Component struct { - Id int `json:"id"` + ID int `json:"id"` Name string `json:"name"` Description string `json:"description"` Status int `json:"status"` Link *string `json:"link"` Order *int `json:"order"` - Group_id *int `json:"group_id"` - Created_at *time.Time `json:"created_at"` - Updated_at *time.Time `json:"updated_at"` - Deleted_at *time.Time `json:"deleted_at"` + GroupId *int `json:"group_id"` + CreatedAt *time.Time `json:"created_at"` + UpdatedAt *time.Time `json:"updated_at"` + DeletedAt *time.Time `json:"deleted_at"` } \ No newline at end of file diff --git a/cachet/config.go b/cachet/config.go index ef7b543..285df53 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -10,11 +10,13 @@ import ( "encoding/json" ) +// Static config var Config CachetConfig +// Monitoring tool configuration type CachetConfig struct { - API_Url string `json:"api_url"` - API_Token string `json:"api_token"` + APIUrl string `json:"api_url"` + APIToken string `json:"api_token"` Monitors []*Monitor `json:"monitors"` } @@ -56,13 +58,13 @@ func init() { } if len(os.Getenv("CACHET_API")) > 0 { - Config.API_Url = os.Getenv("CACHET_API") + Config.APIUrl = os.Getenv("CACHET_API") } if len(os.Getenv("CACHET_TOKEN")) > 0 { - Config.API_Token = os.Getenv("CACHET_TOKEN") + Config.APIToken = os.Getenv("CACHET_TOKEN") } - if len(Config.API_Token) == 0 || len(Config.API_Url) == 0 { + if len(Config.APIToken) == 0 || len(Config.APIUrl) == 0 { fmt.Printf("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/CastawayLabs/cachet-monitor\n") os.Exit(1) } diff --git a/cachet/incident.go b/cachet/incident.go index ad0a80d..da74e1c 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -6,26 +6,30 @@ import ( "encoding/json" ) +// Cachet Incident data model type Incident struct { - Id int `json:"id"` + ID int `json:"id"` Name string `json:"name"` Message string `json:"message"` Status int `json:"status"`// 4? - Human_status string `json:"human_status"` + HumanStatus string `json:"human_status"` Component *Component `json:"component"` - Component_id *int `json:"component_id"` - Created_at int `json:"created_at"` - Updated_at int `json:"updated_at"` + ComponentId *int `json:"component_id"` + CreatedAt int `json:"created_at"` + UpdatedAt int `json:"updated_at"` } +// Response when creating/updating an incident type IncidentData struct { Incident Incident `json:"data"` } +// from API /incidents type IncidentList struct { Incidents []Incident `json:"data"` } +// Get list of incidents func GetIncidents() []Incident { _, body, err := makeRequest("GET", "/incidents", nil) if err != nil { @@ -36,12 +40,12 @@ func GetIncidents() []Incident { err = json.Unmarshal(body, &data) if err != nil { fmt.Println("Cannot parse incidents.") - panic(err) } return data.Incidents } +// Create or Update incident func (incident *Incident) Send() { jsonBytes, err := json.Marshal(incident) if err != nil { @@ -49,13 +53,13 @@ func (incident *Incident) Send() { } requestType := "POST" - requestUrl := "/incidents" - if incident.Id > 0 { + requestURL := "/incidents" + if incident.ID > 0 { requestType = "PUT" - requestUrl = "/incidents/" + strconv.Itoa(incident.Id) + requestURL = "/incidents/" + strconv.Itoa(incident.ID) } - resp, body, err := makeRequest(requestType, requestUrl, jsonBytes) + resp, body, err := makeRequest(requestType, requestURL, jsonBytes) if err != nil { panic(err) } @@ -68,44 +72,50 @@ func (incident *Incident) Send() { fmt.Println("Cannot parse incident body.") panic(err) } else { - incident.Id = data.Incident.Id + incident.ID = data.Incident.ID } - fmt.Println("ID:"+strconv.Itoa(incident.Id)) + fmt.Println("ID:"+strconv.Itoa(incident.ID)) if resp.StatusCode != 200 { fmt.Println("Could not create/update incident!") } } +// Get the same incident. +// Updates incident.ID func (incident *Incident) GetSimilarIncidentId() { incidents := GetIncidents() for _, inc := range incidents { - if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.Human_status == inc.Human_status { - incident.Id = inc.Id - fmt.Printf("Updated incident id to %v\n", inc.Id) + if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.HumanStatus == inc.HumanStatus { + incident.ID = inc.ID + fmt.Printf("Updated incident id to %v\n", inc.ID) break } } } +// Set status to Investigating func (incident *Incident) SetInvestigating() { incident.Status = 1 - incident.Human_status = "Investigating" + incident.HumanStatus = "Investigating" } +// Set status to Identified func (incident *Incident) SetIdentified() { incident.Status = 2 - incident.Human_status = "Identified" + incident.HumanStatus = "Identified" } +// Set status to Watching func (incident *Incident) SetWatching() { incident.Status = 3 - incident.Human_status = "Watching" + incident.HumanStatus = "Watching" } +// Set status to Fixed func (incident *Incident) SetFixed() { incident.Status = 4 - incident.Human_status = "Fixed" + incident.HumanStatus = "Fixed" } \ No newline at end of file diff --git a/cachet/metrics.go b/cachet/metrics.go index 59eb5ba..44f0142 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -6,8 +6,9 @@ import ( "encoding/json" ) -func SendMetric(metricId int, delay int64) { - if metricId <= 0 { +// Send lag metric point +func SendMetric(metricID int, delay int64) { + if metricID <= 0 { return } @@ -15,7 +16,7 @@ func SendMetric(metricId int, delay int64) { "value": delay, }) - resp, _, err := makeRequest("POST", "/metrics/" + strconv.Itoa(metricId) + "/points", jsonBytes) + resp, _, err := makeRequest("POST", "/metrics/" + strconv.Itoa(metricID) + "/points", jsonBytes) if err != nil || resp.StatusCode != 200 { fmt.Printf("Could not log data point!\n%v\n", err) return diff --git a/cachet/monitor.go b/cachet/monitor.go index 1935dac..99bc790 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -8,12 +8,13 @@ import ( const timeout = time.Duration(time.Second) +// Monitor data model type Monitor struct { Name string `json:"name"` - Url string `json:"url"` - MetricId int `json:"metric_id"` + URL string `json:"url"` + MetricID int `json:"metric_id"` Threshold float32 `json:"threshold"` - ComponentId *int `json:"component_id"` + ComponentID *int `json:"component_id"` ExpectedStatusCode int `json:"expected_status_code"` History []bool `json:"-"` @@ -21,6 +22,7 @@ type Monitor struct { Incident *Incident `json:"-"` } +// Run loop func (monitor *Monitor) Run() { reqStart := getMs() isUp := monitor.doRequest() @@ -53,6 +55,7 @@ func (monitor *Monitor) doRequest() bool { return resp.StatusCode == monitor.ExpectedStatusCode } +// Decides if the monitor is statistically up or down and creates / resolves an incident func (monitor *Monitor) AnalyseData() { // look at the past few incidents numDown := 0 diff --git a/cachet/request.go b/cachet/request.go index 578cf17..bc912c6 100644 --- a/cachet/request.go +++ b/cachet/request.go @@ -7,10 +7,10 @@ import ( ) func makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { - req, err := http.NewRequest(requestType, Config.API_Url + url, bytes.NewBuffer(reqBody)) + req, err := http.NewRequest(requestType, Config.APIUrl + url, bytes.NewBuffer(reqBody)) req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", Config.API_Token) + req.Header.Set("X-Cachet-Token", Config.APIToken) client := &http.Client{} res, err := client.Do(req) diff --git a/main.go b/main.go index 2d5ebf2..b419901 100644 --- a/main.go +++ b/main.go @@ -7,7 +7,7 @@ import ( ) func main() { - fmt.Printf("API: %s\n", cachet.Config.API_Url) + fmt.Printf("API: %s\n", cachet.Config.APIUrl) fmt.Printf("Starting %d monitors:\n", len(cachet.Config.Monitors)) for _, monitor := range cachet.Config.Monitors { fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.Url, monitor.ExpectedStatusCode) From cdf51d89f6cf7c1ae66d8f6d58cfe5636212eee8 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 20:52:57 +0100 Subject: [PATCH 11/40] More 'improvements' based on goreportcard.com --- cachet/component.go | 4 ++-- cachet/config.go | 2 +- cachet/incident.go | 22 +++++++++++----------- cachet/metrics.go | 2 +- cachet/monitor.go | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cachet/component.go b/cachet/component.go index c194f54..f9090f2 100644 --- a/cachet/component.go +++ b/cachet/component.go @@ -4,7 +4,7 @@ import ( "time" ) -// Cachet component model +// Component Cachet model type Component struct { ID int `json:"id"` Name string `json:"name"` @@ -12,7 +12,7 @@ type Component struct { Status int `json:"status"` Link *string `json:"link"` Order *int `json:"order"` - GroupId *int `json:"group_id"` + GroupID *int `json:"group_id"` CreatedAt *time.Time `json:"created_at"` UpdatedAt *time.Time `json:"updated_at"` DeletedAt *time.Time `json:"deleted_at"` diff --git a/cachet/config.go b/cachet/config.go index 285df53..8ccd1bd 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -13,7 +13,7 @@ import ( // Static config var Config CachetConfig -// Monitoring tool configuration +// CachetConfig is the monitoring tool configuration type CachetConfig struct { APIUrl string `json:"api_url"` APIToken string `json:"api_token"` diff --git a/cachet/incident.go b/cachet/incident.go index da74e1c..141c877 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -6,7 +6,7 @@ import ( "encoding/json" ) -// Cachet Incident data model +// Incident Cachet data model type Incident struct { ID int `json:"id"` Name string `json:"name"` @@ -14,22 +14,22 @@ type Incident struct { Status int `json:"status"`// 4? HumanStatus string `json:"human_status"` Component *Component `json:"component"` - ComponentId *int `json:"component_id"` + ComponentID *int `json:"component_id"` CreatedAt int `json:"created_at"` UpdatedAt int `json:"updated_at"` } -// Response when creating/updating an incident +// IncidentData is a response when creating/updating an incident type IncidentData struct { Incident Incident `json:"data"` } -// from API /incidents +// IncidentList - from API /incidents type IncidentList struct { Incidents []Incident `json:"data"` } -// Get list of incidents +// GetIncidents - Get list of incidents func GetIncidents() []Incident { _, body, err := makeRequest("GET", "/incidents", nil) if err != nil { @@ -45,7 +45,7 @@ func GetIncidents() []Incident { return data.Incidents } -// Create or Update incident +// Send - Create or Update incident func (incident *Incident) Send() { jsonBytes, err := json.Marshal(incident) if err != nil { @@ -83,7 +83,7 @@ func (incident *Incident) Send() { } // Get the same incident. -// Updates incident.ID +// GetSimilarIncidentId Updates incident.ID func (incident *Incident) GetSimilarIncidentId() { incidents := GetIncidents() @@ -96,25 +96,25 @@ func (incident *Incident) GetSimilarIncidentId() { } } -// Set status to Investigating +// SetInvestigating sets status to Investigating func (incident *Incident) SetInvestigating() { incident.Status = 1 incident.HumanStatus = "Investigating" } -// Set status to Identified +// SetIdentified sets status to Identified func (incident *Incident) SetIdentified() { incident.Status = 2 incident.HumanStatus = "Identified" } -// Set status to Watching +// SetWatching sets status to Watching func (incident *Incident) SetWatching() { incident.Status = 3 incident.HumanStatus = "Watching" } -// Set status to Fixed +// SetFixed sets status to Fixed func (incident *Incident) SetFixed() { incident.Status = 4 incident.HumanStatus = "Fixed" diff --git a/cachet/metrics.go b/cachet/metrics.go index 44f0142..7a3d9e2 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -6,7 +6,7 @@ import ( "encoding/json" ) -// Send lag metric point +// SendMetric sends lag metric point func SendMetric(metricID int, delay int64) { if metricID <= 0 { return diff --git a/cachet/monitor.go b/cachet/monitor.go index 99bc790..61a41ae 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -55,7 +55,7 @@ func (monitor *Monitor) doRequest() bool { return resp.StatusCode == monitor.ExpectedStatusCode } -// Decides if the monitor is statistically up or down and creates / resolves an incident +// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident func (monitor *Monitor) AnalyseData() { // look at the past few incidents numDown := 0 From d421b35e9be1900399026fe1f16e3e482d085615 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 20:58:56 +0100 Subject: [PATCH 12/40] gofmt -w --- cachet/component.go | 22 +++++++++++----------- cachet/config.go | 18 +++++++++--------- cachet/incident.go | 24 ++++++++++++------------ cachet/metrics.go | 6 +++--- cachet/monitor.go | 26 +++++++++++++------------- cachet/request.go | 4 ++-- main.go | 4 ++-- 7 files changed, 52 insertions(+), 52 deletions(-) diff --git a/cachet/component.go b/cachet/component.go index f9090f2..e8c12fd 100644 --- a/cachet/component.go +++ b/cachet/component.go @@ -6,14 +6,14 @@ import ( // Component Cachet model type Component struct { - ID int `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status int `json:"status"` - Link *string `json:"link"` - Order *int `json:"order"` - GroupID *int `json:"group_id"` - CreatedAt *time.Time `json:"created_at"` - UpdatedAt *time.Time `json:"updated_at"` - DeletedAt *time.Time `json:"deleted_at"` -} \ No newline at end of file + ID int `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status int `json:"status"` + Link *string `json:"link"` + Order *int `json:"order"` + GroupID *int `json:"group_id"` + CreatedAt *time.Time `json:"created_at"` + UpdatedAt *time.Time `json:"updated_at"` + DeletedAt *time.Time `json:"deleted_at"` +} diff --git a/cachet/config.go b/cachet/config.go index 8ccd1bd..b8aa8b5 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -1,13 +1,13 @@ package cachet import ( - "os" - "fmt" - "flag" - "net/url" - "net/http" - "io/ioutil" "encoding/json" + "flag" + "fmt" + "io/ioutil" + "net/http" + "net/url" + "os" ) // Static config @@ -15,8 +15,8 @@ var Config CachetConfig // CachetConfig is the monitoring tool configuration type CachetConfig struct { - APIUrl string `json:"api_url"` - APIToken string `json:"api_token"` + APIUrl string `json:"api_url"` + APIToken string `json:"api_token"` Monitors []*Monitor `json:"monitors"` } @@ -73,4 +73,4 @@ func init() { fmt.Printf("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") os.Exit(1) } -} \ No newline at end of file +} diff --git a/cachet/incident.go b/cachet/incident.go index 141c877..cc9f791 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -1,22 +1,22 @@ package cachet import ( + "encoding/json" "fmt" "strconv" - "encoding/json" ) // Incident Cachet data model type Incident struct { - ID int `json:"id"` - Name string `json:"name"` - Message string `json:"message"` - Status int `json:"status"`// 4? - HumanStatus string `json:"human_status"` - Component *Component `json:"component"` - ComponentID *int `json:"component_id"` - CreatedAt int `json:"created_at"` - UpdatedAt int `json:"updated_at"` + ID int `json:"id"` + Name string `json:"name"` + Message string `json:"message"` + Status int `json:"status"` // 4? + HumanStatus string `json:"human_status"` + Component *Component `json:"component"` + ComponentID *int `json:"component_id"` + CreatedAt int `json:"created_at"` + UpdatedAt int `json:"updated_at"` } // IncidentData is a response when creating/updating an incident @@ -75,7 +75,7 @@ func (incident *Incident) Send() { incident.ID = data.Incident.ID } - fmt.Println("ID:"+strconv.Itoa(incident.ID)) + fmt.Println("ID:" + strconv.Itoa(incident.ID)) if resp.StatusCode != 200 { fmt.Println("Could not create/update incident!") @@ -118,4 +118,4 @@ func (incident *Incident) SetWatching() { func (incident *Incident) SetFixed() { incident.Status = 4 incident.HumanStatus = "Fixed" -} \ No newline at end of file +} diff --git a/cachet/metrics.go b/cachet/metrics.go index 7a3d9e2..79cf20c 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -1,9 +1,9 @@ package cachet import ( + "encoding/json" "fmt" "strconv" - "encoding/json" ) // SendMetric sends lag metric point @@ -16,9 +16,9 @@ func SendMetric(metricID int, delay int64) { "value": delay, }) - resp, _, err := makeRequest("POST", "/metrics/" + strconv.Itoa(metricID) + "/points", jsonBytes) + resp, _, err := makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) if err != nil || resp.StatusCode != 200 { fmt.Printf("Could not log data point!\n%v\n", err) return } -} \ No newline at end of file +} diff --git a/cachet/monitor.go b/cachet/monitor.go index 61a41ae..111a927 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -2,24 +2,24 @@ package cachet import ( "fmt" - "time" "net/http" + "time" ) const timeout = time.Duration(time.Second) // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` + Name string `json:"name"` + URL string `json:"url"` + MetricID int `json:"metric_id"` + Threshold float32 `json:"threshold"` + ComponentID *int `json:"component_id"` + ExpectedStatusCode int `json:"expected_status_code"` - History []bool `json:"-"` - LastFailReason *string `json:"-"` - Incident *Incident `json:"-"` + History []bool `json:"-"` + LastFailReason *string `json:"-"` + Incident *Incident `json:"-"` } // Run loop @@ -66,7 +66,7 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.Url, t, time.Now().UnixNano() / int64(time.Second), monitor.Threshold) + fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.Url, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) if len(monitor.History) != 10 { // not enough data @@ -78,7 +78,7 @@ func (monitor *Monitor) AnalyseData() { fmt.Println("Creating incident...") monitor.Incident = &Incident{ - Name: monitor.Name, + Name: monitor.Name, Message: monitor.Name + " failed", } @@ -107,4 +107,4 @@ func (monitor *Monitor) AnalyseData() { func getMs() int64 { return time.Now().UnixNano() / int64(time.Millisecond) -} \ No newline at end of file +} diff --git a/cachet/request.go b/cachet/request.go index bc912c6..644eb76 100644 --- a/cachet/request.go +++ b/cachet/request.go @@ -7,7 +7,7 @@ import ( ) func makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { - req, err := http.NewRequest(requestType, Config.APIUrl + url, bytes.NewBuffer(reqBody)) + req, err := http.NewRequest(requestType, Config.APIUrl+url, bytes.NewBuffer(reqBody)) req.Header.Set("Content-Type", "application/json") req.Header.Set("X-Cachet-Token", Config.APIToken) @@ -22,4 +22,4 @@ func makeRequest(requestType string, url string, reqBody []byte) (*http.Response body, _ := ioutil.ReadAll(res.Body) return res, body, nil -} \ No newline at end of file +} diff --git a/main.go b/main.go index b419901..fd596b6 100644 --- a/main.go +++ b/main.go @@ -2,8 +2,8 @@ package main import ( "fmt" - "time" "github.com/castawaylabs/cachet-monitor/cachet" + "time" ) func main() { @@ -19,7 +19,7 @@ func main() { fmt.Println() ticker := time.NewTicker(time.Second) - for _ = range ticker.C { + for range ticker.C { for _, monitor := range cachet.Config.Monitors { go monitor.Run() } From b5efaeece1183fd32ab0a86910b66bc6fc1e46d5 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 21:02:12 +0100 Subject: [PATCH 13/40] gofmt -w main.go --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index fd596b6..5fdfd48 100644 --- a/main.go +++ b/main.go @@ -24,4 +24,4 @@ func main() { go monitor.Run() } } -} \ No newline at end of file +} From d62aa53169bd4544c300653ba85d1b1bd848317a Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 21:04:48 +0100 Subject: [PATCH 14/40] Golint & bug fixes --- cachet/incident.go | 6 +++--- cachet/monitor.go | 10 +++++----- main.go | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cachet/incident.go b/cachet/incident.go index cc9f791..62bd649 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -82,9 +82,9 @@ func (incident *Incident) Send() { } } -// Get the same incident. -// GetSimilarIncidentId Updates incident.ID -func (incident *Incident) GetSimilarIncidentId() { +// GetSimilarIncidentId gets the same incident. +// Updates incident.ID +func (incident *Incident) GetSimilarIncidentID() { incidents := GetIncidents() for _, inc := range incidents { diff --git a/cachet/monitor.go b/cachet/monitor.go index 111a927..2a922d7 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -34,8 +34,8 @@ func (monitor *Monitor) Run() { monitor.History = append(monitor.History, isUp) monitor.AnalyseData() - if isUp == true && monitor.MetricId > 0 { - SendMetric(monitor.MetricId, lag) + if isUp == true && monitor.MetricID > 0 { + SendMetric(monitor.MetricID, lag) } } @@ -43,7 +43,7 @@ func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, } - resp, err := client.Get(monitor.Url) + resp, err := client.Get(monitor.URL) if err != nil { errString := err.Error() monitor.LastFailReason = &errString @@ -66,7 +66,7 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.Url, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) + fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) if len(monitor.History) != 10 { // not enough data @@ -90,7 +90,7 @@ func (monitor *Monitor) AnalyseData() { monitor.Incident.SetInvestigating() // lookup relevant incident - monitor.Incident.GetSimilarIncidentId() + monitor.Incident.GetSimilarIncidentID() // create/update incident monitor.Incident.Send() diff --git a/main.go b/main.go index 5fdfd48..cf79c43 100644 --- a/main.go +++ b/main.go @@ -10,9 +10,9 @@ func main() { fmt.Printf("API: %s\n", cachet.Config.APIUrl) fmt.Printf("Starting %d monitors:\n", len(cachet.Config.Monitors)) for _, monitor := range cachet.Config.Monitors { - fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.Url, monitor.ExpectedStatusCode) - if monitor.MetricId > 0 { - fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricId) + fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.URL, monitor.ExpectedStatusCode) + if monitor.MetricID > 0 { + fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricID) } } From bdb426c232389b2e2df9435e30866a8c6b3050d6 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Fri, 20 Mar 2015 21:05:47 +0100 Subject: [PATCH 15/40] more golint --- cachet/incident.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cachet/incident.go b/cachet/incident.go index 62bd649..45410a6 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -82,7 +82,7 @@ func (incident *Incident) Send() { } } -// GetSimilarIncidentId gets the same incident. +// GetSimilarIncidentID gets the same incident. // Updates incident.ID func (incident *Incident) GetSimilarIncidentID() { incidents := GetIncidents() From dce1978b510d36a8a2368c92f9a7a082fbc75e79 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 11:44:52 +0100 Subject: [PATCH 16/40] Detects hostname/interface ip - Hostname is monitor's id. Incidents will be created with the monitor id. --- cachet/config.go | 18 +++++++++++++++--- cachet/monitor.go | 2 +- main.go | 18 ++++++++++-------- system/config.go | 24 ++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 system/config.go diff --git a/cachet/config.go b/cachet/config.go index b8aa8b5..b3ca870 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -4,6 +4,7 @@ import ( "encoding/json" "flag" "fmt" + "github.com/castawaylabs/cachet-monitor/system" "io/ioutil" "net/http" "net/url" @@ -15,14 +16,17 @@ var Config CachetConfig // CachetConfig is the monitoring tool configuration type CachetConfig struct { - APIUrl string `json:"api_url"` - APIToken string `json:"api_token"` - Monitors []*Monitor `json:"monitors"` + APIUrl string `json:"api_url"` + APIToken string `json:"api_token"` + Monitors []*Monitor `json:"monitors"` + SystemName string `json:"system_name"` } func init() { var configPath string + var systemName string flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") + flag.StringVar(&systemName, "name", "", "System Name") flag.Parse() var data []byte @@ -57,6 +61,14 @@ func init() { os.Exit(1) } + if len(systemName) > 0 { + Config.SystemName = systemName + } + if len(Config.SystemName) == 0 { + // get hostname + Config.SystemName = system.GetHostname() + } + if len(os.Getenv("CACHET_API")) > 0 { Config.APIUrl = os.Getenv("CACHET_API") } diff --git a/cachet/monitor.go b/cachet/monitor.go index 2a922d7..6997de2 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -78,7 +78,7 @@ func (monitor *Monitor) AnalyseData() { fmt.Println("Creating incident...") monitor.Incident = &Incident{ - Name: monitor.Name, + Name: monitor.Name + " - " + Config.SystemName, Message: monitor.Name + " failed", } diff --git a/main.go b/main.go index cf79c43..2cce06f 100644 --- a/main.go +++ b/main.go @@ -7,12 +7,14 @@ import ( ) func main() { - fmt.Printf("API: %s\n", cachet.Config.APIUrl) - fmt.Printf("Starting %d monitors:\n", len(cachet.Config.Monitors)) - for _, monitor := range cachet.Config.Monitors { - fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.URL, monitor.ExpectedStatusCode) - if monitor.MetricID > 0 { - fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricID) + config := cachet.Config + + fmt.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl) + fmt.Printf("Starting %d monitors:\n", len(config.Monitors)) + for _, mon := range config.Monitors { + fmt.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) + if mon.MetricID > 0 { + fmt.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) } } @@ -20,8 +22,8 @@ func main() { ticker := time.NewTicker(time.Second) for range ticker.C { - for _, monitor := range cachet.Config.Monitors { - go monitor.Run() + for _, mon := range config.Monitors { + go mon.Run() } } } diff --git a/system/config.go b/system/config.go new file mode 100644 index 0000000..4c899d9 --- /dev/null +++ b/system/config.go @@ -0,0 +1,24 @@ +package system + +import ( + "net" + "os" +) + +// GetHostname returns id of the current system +func GetHostname() string { + hostname, err := os.Hostname() + if err != nil || len(hostname) == 0 { + addrs, err := net.InterfaceAddrs() + + if err != nil { + return "unknown" + } + + for _, addr := range addrs { + return addr.String() + } + } + + return hostname +} From 8e9c3888945ff876bc1c499e0e2d45e4d188d1ba Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 12:36:45 +0100 Subject: [PATCH 17/40] System logger --- cachet/config.go | 24 +++++++++++++++++++++++- cachet/incident.go | 24 +++++++++++++----------- cachet/metrics.go | 3 +-- cachet/monitor.go | 7 +++---- main.go | 12 ++++++------ 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/cachet/config.go b/cachet/config.go index b3ca870..c6e6968 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -5,7 +5,9 @@ import ( "flag" "fmt" "github.com/castawaylabs/cachet-monitor/system" + "io" "io/ioutil" + "log" "net/http" "net/url" "os" @@ -13,6 +15,8 @@ import ( // Static config var Config CachetConfig +// Central logger +var Logger *log.Logger // CachetConfig is the monitoring tool configuration type CachetConfig struct { @@ -20,13 +24,16 @@ type CachetConfig struct { APIToken string `json:"api_token"` Monitors []*Monitor `json:"monitors"` SystemName string `json:"system_name"` + LogPath string `json:"log_path"` } func init() { var configPath string var systemName string + var logPath string flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") flag.StringVar(&systemName, "name", "", "System Name") + flag.StringVar(&logPath, "log", "", "Log path") flag.Parse() var data []byte @@ -42,7 +49,6 @@ func init() { } defer response.Body.Close() - data, _ = ioutil.ReadAll(response.Body) fmt.Println("Downloaded network configuration.") @@ -85,4 +91,20 @@ func init() { fmt.Printf("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") os.Exit(1) } + + if len(logPath) > 0 { + Config.LogPath = logPath + } + + var logWriter io.Writer + logWriter = os.Stdout + if len(Config.LogPath) > 0 { + logWriter, err = os.Create(Config.LogPath) + if err != nil { + fmt.Printf("Unable to open file '%v' for logging\n", Config.LogPath) + os.Exit(1) + } + } + + Logger = log.New(logWriter, "", log.Llongfile | log.Ldate | log.Ltime) } diff --git a/cachet/incident.go b/cachet/incident.go index 45410a6..244a6b1 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -2,7 +2,6 @@ package cachet import ( "encoding/json" - "fmt" "strconv" ) @@ -13,7 +12,7 @@ type Incident struct { Message string `json:"message"` Status int `json:"status"` // 4? HumanStatus string `json:"human_status"` - Component *Component `json:"component"` + Component *Component `json:"-"` ComponentID *int `json:"component_id"` CreatedAt int `json:"created_at"` UpdatedAt int `json:"updated_at"` @@ -33,13 +32,14 @@ type IncidentList struct { func GetIncidents() []Incident { _, body, err := makeRequest("GET", "/incidents", nil) if err != nil { - panic(err) + Logger.Printf("Cannot get incidents: %v\n", err) + return []Incident{} } var data IncidentList err = json.Unmarshal(body, &data) if err != nil { - fmt.Println("Cannot parse incidents.") + Logger.Printf("Cannot parse incidents: %v\n", err) } return data.Incidents @@ -49,7 +49,8 @@ func GetIncidents() []Incident { func (incident *Incident) Send() { jsonBytes, err := json.Marshal(incident) if err != nil { - panic(err) + Logger.Printf("Cannot encode incident: %v\n", err) + return } requestType := "POST" @@ -61,24 +62,25 @@ func (incident *Incident) Send() { resp, body, err := makeRequest(requestType, requestURL, jsonBytes) if err != nil { - panic(err) + Logger.Printf("Cannot create/update incident: %v\n", err) + return } - fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) + Logger.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) var data IncidentData err = json.Unmarshal(body, &data) if err != nil { - fmt.Println("Cannot parse incident body.") + Logger.Println("Cannot parse incident body.") panic(err) } else { incident.ID = data.Incident.ID } - fmt.Println("ID:" + strconv.Itoa(incident.ID)) + Logger.Println("ID:" + strconv.Itoa(incident.ID)) if resp.StatusCode != 200 { - fmt.Println("Could not create/update incident!") + Logger.Println("Could not create/update incident!") } } @@ -90,7 +92,7 @@ func (incident *Incident) GetSimilarIncidentID() { for _, inc := range incidents { if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.HumanStatus == inc.HumanStatus { incident.ID = inc.ID - fmt.Printf("Updated incident id to %v\n", inc.ID) + Logger.Printf("Updated incident id to %v\n", inc.ID) break } } diff --git a/cachet/metrics.go b/cachet/metrics.go index 79cf20c..ff21d58 100644 --- a/cachet/metrics.go +++ b/cachet/metrics.go @@ -2,7 +2,6 @@ package cachet import ( "encoding/json" - "fmt" "strconv" ) @@ -18,7 +17,7 @@ func SendMetric(metricID int, delay int64) { resp, _, err := makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) if err != nil || resp.StatusCode != 200 { - fmt.Printf("Could not log data point!\n%v\n", err) + Logger.Printf("Could not log data point!\n%v\n", err) return } } diff --git a/cachet/monitor.go b/cachet/monitor.go index 6997de2..5b4b998 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -1,7 +1,6 @@ package cachet import ( - "fmt" "net/http" "time" ) @@ -66,7 +65,7 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) + Logger.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) if len(monitor.History) != 10 { // not enough data @@ -75,7 +74,7 @@ func (monitor *Monitor) AnalyseData() { if t > monitor.Threshold && monitor.Incident == nil { // is down, create an incident - fmt.Println("Creating incident...") + Logger.Println("Creating incident...") monitor.Incident = &Incident{ Name: monitor.Name + " - " + Config.SystemName, @@ -96,7 +95,7 @@ func (monitor *Monitor) AnalyseData() { monitor.Incident.Send() } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. - fmt.Println("Updating incident to resolved...") + Logger.Println("Updating incident to resolved...") monitor.Incident.SetFixed() monitor.Incident.Send() diff --git a/main.go b/main.go index 2cce06f..e5fde68 100644 --- a/main.go +++ b/main.go @@ -1,24 +1,24 @@ package main import ( - "fmt" "github.com/castawaylabs/cachet-monitor/cachet" "time" ) func main() { config := cachet.Config + log := cachet.Logger - fmt.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl) - fmt.Printf("Starting %d monitors:\n", len(config.Monitors)) + log.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl) + log.Printf("Starting %d monitors:\n", len(config.Monitors)) for _, mon := range config.Monitors { - fmt.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) + log.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) if mon.MetricID > 0 { - fmt.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) + log.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) } } - fmt.Println() + log.Println() ticker := time.NewTicker(time.Second) for range ticker.C { From 609064730d5a4e6232d6c0ce5399c07d0ee12895 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 12:37:58 +0100 Subject: [PATCH 18/40] gofmt -w --- cachet/config.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cachet/config.go b/cachet/config.go index c6e6968..3cd6fff 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -15,6 +15,7 @@ import ( // Static config var Config CachetConfig + // Central logger var Logger *log.Logger @@ -106,5 +107,5 @@ func init() { } } - Logger = log.New(logWriter, "", log.Llongfile | log.Ldate | log.Ltime) + Logger = log.New(logWriter, "", log.Llongfile|log.Ldate|log.Ltime) } From cceb370e302dd0bf8a0fcf0dd4d3fb058637af9d Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 12:48:35 +0100 Subject: [PATCH 19/40] No logger flags for development mode --- cachet/config.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cachet/config.go b/cachet/config.go index 3cd6fff..35af034 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -107,5 +107,10 @@ func init() { } } - Logger = log.New(logWriter, "", log.Llongfile|log.Ldate|log.Ltime) + flags := log.Llongfile|log.Ldate|log.Ltime + if len(os.Getenv("DEVELOPMENT")) > 0 { + flags = 0 + } + + Logger = log.New(logWriter, "", flags) } From 409d753931b607a7f4a40c7485af76b487e3a9b2 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 20:08:05 +0000 Subject: [PATCH 20/40] Link to incident, resolved message - Major outage when already in Partial outage - Resolved at x message - Link incident to component --- cachet/component.go | 27 ++++++++++----------- cachet/incident.go | 58 ++++++++++++++++++++++++++++++++++++++++++--- cachet/monitor.go | 10 ++++++-- 3 files changed, 76 insertions(+), 19 deletions(-) diff --git a/cachet/component.go b/cachet/component.go index e8c12fd..bebbad4 100644 --- a/cachet/component.go +++ b/cachet/component.go @@ -1,19 +1,18 @@ package cachet -import ( - "time" -) - // Component Cachet model type Component struct { - ID int `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status int `json:"status"` - Link *string `json:"link"` - Order *int `json:"order"` - GroupID *int `json:"group_id"` - CreatedAt *time.Time `json:"created_at"` - UpdatedAt *time.Time `json:"updated_at"` - DeletedAt *time.Time `json:"deleted_at"` + ID int `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status int `json:"status_id"` + HumanStatus string `json:"-"` + IncidentCount int `json:"-"` + CreatedAt int `json:"created_at"` + UpdatedAt int `json:"updated_at"` +} + +// ComponentData json response model +type ComponentData struct { + Component Component `json:"data"` } diff --git a/cachet/incident.go b/cachet/incident.go index 244a6b1..c8213c0 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -57,7 +57,7 @@ func (incident *Incident) Send() { requestURL := "/incidents" if incident.ID > 0 { requestType = "PUT" - requestURL = "/incidents/" + strconv.Itoa(incident.ID) + requestURL += "/" + strconv.Itoa(incident.ID) } resp, body, err := makeRequest(requestType, requestURL, jsonBytes) @@ -75,10 +75,9 @@ func (incident *Incident) Send() { panic(err) } else { incident.ID = data.Incident.ID + incident.Component = data.Incident.Component } - Logger.Println("ID:" + strconv.Itoa(incident.ID)) - if resp.StatusCode != 200 { Logger.Println("Could not create/update incident!") } @@ -98,6 +97,59 @@ func (incident *Incident) GetSimilarIncidentID() { } } +func (incident *Incident) fetchComponent() error { + _, body, err := makeRequest("GET", "/components/" + strconv.Itoa(*incident.ComponentID), nil) + if err != nil { + return err + } + + var data ComponentData + err = json.Unmarshal(body, &data) + if err != nil { + Logger.Println("Cannot parse component body.") + panic(err) + } + + incident.Component = &data.Component + + return nil +} + +func (incident *Incident) UpdateComponent() { + if incident.ComponentID == nil || *incident.ComponentID == 0 { + return + } + + if incident.Component == nil { + // fetch component + if err := incident.fetchComponent(); err != nil { + Logger.Printf("Cannot fetch component for incident. %v\n", err) + return + } + } + + switch incident.Status { + case 1, 2, 3: + if incident.Component.Status == 3 { + incident.Component.Status = 4 + } else { + incident.Component.Status = 3 + } + case 4: + incident.Component.Status = 1 + } + + jsonBytes, _ := json.Marshal(map[string]interface{}{ + "status": incident.Component.Status, + }) + + resp, _, err := makeRequest("PUT", "/components/" + strconv.Itoa(incident.Component.ID), jsonBytes) + if err != nil || resp.StatusCode != 200 { + Logger.Printf("Could not update component: (resp code %d) %v", resp.StatusCode, err) + return + } +} + // SetInvestigating sets status to Investigating func (incident *Incident) SetInvestigating() { incident.Status = 1 diff --git a/cachet/monitor.go b/cachet/monitor.go index 5b4b998..616ab74 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -77,8 +77,9 @@ func (monitor *Monitor) AnalyseData() { Logger.Println("Creating incident...") monitor.Incident = &Incident{ - Name: monitor.Name + " - " + Config.SystemName, - Message: monitor.Name + " failed", + Name: monitor.Name + " - " + Config.SystemName, + Message: monitor.Name + " failed", + ComponentID: monitor.ComponentID, } if monitor.LastFailReason != nil { @@ -93,12 +94,17 @@ func (monitor *Monitor) AnalyseData() { // create/update incident monitor.Incident.Send() + monitor.Incident.UpdateComponent() } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. Logger.Println("Updating incident to resolved...") + // Add resolved message + monitor.Incident.Message += "\n\n-\n\nResolved at " + time.Now().String() + monitor.Incident.SetFixed() monitor.Incident.Send() + monitor.Incident.UpdateComponent() monitor.Incident = nil } From 350244514a273ddc6beb9392c92f96870749424c Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sat, 21 Mar 2015 21:09:23 +0000 Subject: [PATCH 21/40] Improve readme, Dockerfile --- Dockerfile | 6 +++++ readme.md | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f163b42 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM golang + +ADD . /go/src/github.com/castawaylabs/cachet-monitor +RUN go install github.com/castawaylabs/cachet-monitor + +ENTRYPOINT /go/bin/cachet-monitor \ No newline at end of file diff --git a/readme.md b/readme.md index 23b8086..ba98abf 100644 --- a/readme.md +++ b/readme.md @@ -3,8 +3,59 @@ Cachet Monitor plugin This is a monitoring plugin for CachetHQ. -How to run: ------------ +Features +-------- + +- [x] Creates & Resolves Incidents +- [x] Posts monitor lag every second +- [x] Updates Component to Partial Outage +- [x] Updates Component to Major Outage if in Partial Outage +- [x] Can be run on multiple servers and geo regions + +Docker Quickstart +----------------- + +1. Create a configuration json +2. +``` +docker run -d \ + --name cachet-monitor \ + -h cachet-monitor \ + -v `pwd`/config.json:/etc/cachet-monitor.config.json \ + castawaylabs/cachet-monitor +``` + +Configuration +------------- + +``` +{ + "api_url": "https://demo.cachethq.io/api", + "api_token": "9yMHsdioQosnyVK4iCVR", + "monitors": [ + { + "name": "nodegear frontend", + "url": "https://nodegear.io/ping", + "metric_id": 0, + "component_id": 0, + "threshold": 80, + "component_id": null, + "expected_status_code": 200 + } + ] +} +``` + +*Notes:* + +- `metric_id` is optional +- `component_id` is optional +- `threshold` is a percentage +- `expected_status_code` is a http response code +- GET request will be performed on the `url` + +How to run +---------- Example: @@ -17,8 +68,15 @@ Production: 1. Download the example config and save to `/etc/cachet-monitor.config.json` 2. Run in background: `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` -Environment variables: ----------------------- +``` +Usage of cachet-monitor: + -c="/etc/cachet-monitor.config.json": Config path + -log="": Log path + -name="": System Name +``` + +Environment variables +--------------------- | Name | Example Value | Description | | ------------ | --------------------------- | --------------------------- | From f918ea38cd3f63994dcf937d438a62cd5ca2e3ac Mon Sep 17 00:00:00 2001 From: Mathieu Doyon Date: Wed, 8 Apr 2015 14:28:36 -0400 Subject: [PATCH 22/40] Add time interval --- cachet/config.go | 3 ++- example.config.json | 1 + main.go | 4 ++-- readme.md | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cachet/config.go b/cachet/config.go index 35af034..97fee11 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -23,6 +23,7 @@ var Logger *log.Logger type CachetConfig struct { APIUrl string `json:"api_url"` APIToken string `json:"api_token"` + Interval int64 `json:"interval"` Monitors []*Monitor `json:"monitors"` SystemName string `json:"system_name"` LogPath string `json:"log_path"` @@ -107,7 +108,7 @@ func init() { } } - flags := log.Llongfile|log.Ldate|log.Ltime + flags := log.Llongfile | log.Ldate | log.Ltime if len(os.Getenv("DEVELOPMENT")) > 0 { flags = 0 } diff --git a/example.config.json b/example.config.json index 737a2b1..f8130e7 100644 --- a/example.config.json +++ b/example.config.json @@ -1,6 +1,7 @@ { "api_url": "https://demo.cachethq.io/api", "api_token": "9yMHsdioQosnyVK4iCVR", + "interval": 60, "monitors": [ { "name": "nodegear frontend", diff --git a/main.go b/main.go index e5fde68..d0c672c 100644 --- a/main.go +++ b/main.go @@ -9,7 +9,7 @@ func main() { config := cachet.Config log := cachet.Logger - log.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl) + log.Printf("System: %s, Interval: %d second(s), API: %s\n", config.SystemName, config.Interval, config.APIUrl) log.Printf("Starting %d monitors:\n", len(config.Monitors)) for _, mon := range config.Monitors { log.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) @@ -20,7 +20,7 @@ func main() { log.Println() - ticker := time.NewTicker(time.Second) + ticker := time.NewTicker(time.Duration(config.Interval)*time.Second) for range ticker.C { for _, mon := range config.Monitors { go mon.Run() diff --git a/readme.md b/readme.md index ba98abf..fdba4db 100644 --- a/readme.md +++ b/readme.md @@ -7,7 +7,7 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag every second +- [x] Posts monitor lag every second * config.Interval - [x] Updates Component to Partial Outage - [x] Updates Component to Major Outage if in Partial Outage - [x] Can be run on multiple servers and geo regions @@ -32,6 +32,7 @@ Configuration { "api_url": "https://demo.cachethq.io/api", "api_token": "9yMHsdioQosnyVK4iCVR", + "interval": 60, "monitors": [ { "name": "nodegear frontend", From 7a5ad278bbb8810820c7ff25a3c0a270fe0b4015 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 20:25:34 +0100 Subject: [PATCH 23/40] Improve fail reasons, fix api crashes - Add options about TLS verification - Fix crashes when cachet presents IDs as a string - Improve fail reasons --- cachet/component.go | 18 +++++++------ cachet/config.go | 13 ++++----- cachet/incident.go | 66 ++++++++++++++++++++++++--------------------- cachet/monitor.go | 23 +++++++++++++--- cachet/request.go | 7 +++++ readme.md | 8 ++++-- 6 files changed, 85 insertions(+), 50 deletions(-) diff --git a/cachet/component.go b/cachet/component.go index bebbad4..1e0a95c 100644 --- a/cachet/component.go +++ b/cachet/component.go @@ -1,15 +1,17 @@ package cachet +import "encoding/json" + // Component Cachet model type Component struct { - ID int `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status int `json:"status_id"` - HumanStatus string `json:"-"` - IncidentCount int `json:"-"` - CreatedAt int `json:"created_at"` - UpdatedAt int `json:"updated_at"` + ID json.Number `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status json.Number `json:"status_id"` + HumanStatus string `json:"-"` + IncidentCount int `json:"-"` + CreatedAt *string `json:"created_at"` + UpdatedAt *string `json:"updated_at"` } // ComponentData json response model diff --git a/cachet/config.go b/cachet/config.go index 35af034..13b73e9 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -21,11 +21,12 @@ var Logger *log.Logger // CachetConfig is the monitoring tool configuration type CachetConfig struct { - APIUrl string `json:"api_url"` - APIToken string `json:"api_token"` - Monitors []*Monitor `json:"monitors"` - SystemName string `json:"system_name"` - LogPath string `json:"log_path"` + APIUrl string `json:"api_url"` + APIToken string `json:"api_token"` + Monitors []*Monitor `json:"monitors"` + SystemName string `json:"system_name"` + LogPath string `json:"log_path"` + InsecureAPI bool `json:"insecure_api"` } func init() { @@ -107,7 +108,7 @@ func init() { } } - flags := log.Llongfile|log.Ldate|log.Ltime + flags := log.Llongfile | log.Ldate | log.Ltime if len(os.Getenv("DEVELOPMENT")) > 0 { flags = 0 } diff --git a/cachet/incident.go b/cachet/incident.go index c8213c0..cce2c19 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -7,15 +7,15 @@ import ( // Incident Cachet data model type Incident struct { - ID int `json:"id"` - Name string `json:"name"` - Message string `json:"message"` - Status int `json:"status"` // 4? - HumanStatus string `json:"human_status"` - Component *Component `json:"-"` - ComponentID *int `json:"component_id"` - CreatedAt int `json:"created_at"` - UpdatedAt int `json:"updated_at"` + ID json.Number `json:"id"` + Name string `json:"name"` + Message string `json:"message"` + Status json.Number `json:"status"` // 4? + HumanStatus string `json:"human_status"` + Component *Component `json:"-"` + ComponentID *json.Number `json:"component_id"` + CreatedAt *string `json:"created_at"` + UpdatedAt *string `json:"updated_at"` } // IncidentData is a response when creating/updating an incident @@ -40,6 +40,7 @@ func GetIncidents() []Incident { err = json.Unmarshal(body, &data) if err != nil { Logger.Printf("Cannot parse incidents: %v\n", err) + panic(err) } return data.Incidents @@ -47,17 +48,19 @@ func GetIncidents() []Incident { // Send - Create or Update incident func (incident *Incident) Send() { - jsonBytes, err := json.Marshal(incident) - if err != nil { - Logger.Printf("Cannot encode incident: %v\n", err) - return - } + jsonBytes, _ := json.Marshal(map[string]interface{}{ + "name": incident.Name, + "message": incident.Message, + "status": incident.Status, + "component_id": incident.ComponentID, + "notify": true, + }) requestType := "POST" requestURL := "/incidents" - if incident.ID > 0 { + if len(incident.ID) > 0 { requestType = "PUT" - requestURL += "/" + strconv.Itoa(incident.ID) + requestURL += "/" + string(incident.ID) } resp, body, err := makeRequest(requestType, requestURL, jsonBytes) @@ -71,7 +74,7 @@ func (incident *Incident) Send() { var data IncidentData err = json.Unmarshal(body, &data) if err != nil { - Logger.Println("Cannot parse incident body.") + Logger.Println("Cannot parse incident body.", string(body)) panic(err) } else { incident.ID = data.Incident.ID @@ -89,7 +92,7 @@ func (incident *Incident) GetSimilarIncidentID() { incidents := GetIncidents() for _, inc := range incidents { - if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.HumanStatus == inc.HumanStatus { + if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status { incident.ID = inc.ID Logger.Printf("Updated incident id to %v\n", inc.ID) break @@ -98,7 +101,7 @@ func (incident *Incident) GetSimilarIncidentID() { } func (incident *Incident) fetchComponent() error { - _, body, err := makeRequest("GET", "/components/" + strconv.Itoa(*incident.ComponentID), nil) + _, body, err := makeRequest("GET", "/components/"+string(*incident.ComponentID), nil) if err != nil { return err } @@ -106,7 +109,7 @@ func (incident *Incident) fetchComponent() error { var data ComponentData err = json.Unmarshal(body, &data) if err != nil { - Logger.Println("Cannot parse component body.") + Logger.Println("Cannot parse component body. %v", string(body)) panic(err) } @@ -116,7 +119,7 @@ func (incident *Incident) fetchComponent() error { } func (incident *Incident) UpdateComponent() { - if incident.ComponentID == nil || *incident.ComponentID == 0 { + if incident.ComponentID == nil || len(*incident.ComponentID) == 0 { return } @@ -128,22 +131,23 @@ func (incident *Incident) UpdateComponent() { } } - switch incident.Status { + status, _ := strconv.Atoi(string(incident.Status)) + switch status { case 1, 2, 3: - if incident.Component.Status == 3 { - incident.Component.Status = 4 + if incident.Component.Status == "3" { + incident.Component.Status = "4" } else { - incident.Component.Status = 3 + incident.Component.Status = "3" } case 4: - incident.Component.Status = 1 + incident.Component.Status = "1" } jsonBytes, _ := json.Marshal(map[string]interface{}{ "status": incident.Component.Status, }) - resp, _, err := makeRequest("PUT", "/components/" + strconv.Itoa(incident.Component.ID), jsonBytes) + resp, _, err := makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes) if err != nil || resp.StatusCode != 200 { Logger.Printf("Could not update component: (resp code %d) %v", resp.StatusCode, err) return @@ -152,24 +156,24 @@ func (incident *Incident) UpdateComponent() { // SetInvestigating sets status to Investigating func (incident *Incident) SetInvestigating() { - incident.Status = 1 + incident.Status = "1" incident.HumanStatus = "Investigating" } // SetIdentified sets status to Identified func (incident *Incident) SetIdentified() { - incident.Status = 2 + incident.Status = "2" incident.HumanStatus = "Identified" } // SetWatching sets status to Watching func (incident *Incident) SetWatching() { - incident.Status = 3 + incident.Status = "3" incident.HumanStatus = "Watching" } // SetFixed sets status to Fixed func (incident *Incident) SetFixed() { - incident.Status = 4 + incident.Status = "4" incident.HumanStatus = "Fixed" } diff --git a/cachet/monitor.go b/cachet/monitor.go index 616ab74..413a418 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -1,7 +1,10 @@ package cachet import ( + "crypto/tls" + "encoding/json" "net/http" + "strconv" "time" ) @@ -15,6 +18,7 @@ type Monitor struct { Threshold float32 `json:"threshold"` ComponentID *int `json:"component_id"` ExpectedStatusCode int `json:"expected_status_code"` + StrictTLS *bool `json:"strict_tls"` History []bool `json:"-"` LastFailReason *string `json:"-"` @@ -42,6 +46,12 @@ func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, } + if monitor.StrictTLS != nil && *monitor.StrictTLS == false { + client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + } + resp, err := client.Get(monitor.URL) if err != nil { errString := err.Error() @@ -51,7 +61,13 @@ func (monitor *Monitor) doRequest() bool { defer resp.Body.Close() - return resp.StatusCode == monitor.ExpectedStatusCode + if resp.StatusCode != monitor.ExpectedStatusCode { + failReason := "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode) + monitor.LastFailReason = &failReason + return false + } + + return true } // AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident @@ -76,10 +92,11 @@ func (monitor *Monitor) AnalyseData() { // is down, create an incident Logger.Println("Creating incident...") + component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ Name: monitor.Name + " - " + Config.SystemName, - Message: monitor.Name + " failed", - ComponentID: monitor.ComponentID, + Message: monitor.Name + " check failed", + ComponentID: &component_id, } if monitor.LastFailReason != nil { diff --git a/cachet/request.go b/cachet/request.go index 644eb76..40f92b6 100644 --- a/cachet/request.go +++ b/cachet/request.go @@ -2,6 +2,7 @@ package cachet import ( "bytes" + "crypto/tls" "io/ioutil" "net/http" ) @@ -13,6 +14,12 @@ func makeRequest(requestType string, url string, reqBody []byte) (*http.Response req.Header.Set("X-Cachet-Token", Config.APIToken) client := &http.Client{} + if Config.InsecureAPI == true { + client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + } + res, err := client.Do(req) if err != nil { return nil, []byte{}, err diff --git a/readme.md b/readme.md index ba98abf..0ab084f 100644 --- a/readme.md +++ b/readme.md @@ -40,15 +40,19 @@ Configuration "component_id": 0, "threshold": 80, "component_id": null, - "expected_status_code": 200 + "expected_status_code": 200, + "strict_tls": true } - ] + ], + "insecure_api": false } ``` *Notes:* - `metric_id` is optional +- `insecure_api` if true it will ignore HTTPS certificate errors (eg if self-signed) +- `strict_tls` if false (true is default) it will ignore HTTPS certificate errors (eg if monitor uses self-signed certificate) - `component_id` is optional - `threshold` is a percentage - `expected_status_code` is a http response code From 2b4097e90aca3b398c0005841de8f7bebe50e5d4 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 20:27:26 +0100 Subject: [PATCH 24/40] Update example config with default values --- example.config.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example.config.json b/example.config.json index 737a2b1..c8f7fd4 100644 --- a/example.config.json +++ b/example.config.json @@ -8,7 +8,9 @@ "metric_id": 1, "threshold": 80, "component_id": null, - "expected_status_code": 200 + "expected_status_code": 200, + "strict_tls": true } - ] + ], + "insecure_api": false } \ No newline at end of file From 850f4d237b9ace5ca07a087d43737bcb54502ecb Mon Sep 17 00:00:00 2001 From: Mathieu Doyon Date: Sun, 19 Jul 2015 16:23:49 -0400 Subject: [PATCH 25/40] Update interval to 5 secs --- example.config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example.config.json b/example.config.json index 9a74916..2168db2 100644 --- a/example.config.json +++ b/example.config.json @@ -1,7 +1,7 @@ { "api_url": "https://demo.cachethq.io/api", "api_token": "9yMHsdioQosnyVK4iCVR", - "interval": 60, + "interval": 5, "monitors": [ { "name": "nodegear frontend", @@ -14,4 +14,4 @@ } ], "insecure_api": false -} \ No newline at end of file +} From b60967999355de9a0efea619cfd23bc643eaa829 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 21:32:26 +0100 Subject: [PATCH 26/40] Report new incident and set as fixed --- cachet/incident.go | 14 -------------- cachet/monitor.go | 13 +++++++------ 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/cachet/incident.go b/cachet/incident.go index cce2c19..f02bab1 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -86,20 +86,6 @@ func (incident *Incident) Send() { } } -// GetSimilarIncidentID gets the same incident. -// Updates incident.ID -func (incident *Incident) GetSimilarIncidentID() { - incidents := GetIncidents() - - for _, inc := range incidents { - if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status { - incident.ID = inc.ID - Logger.Printf("Updated incident id to %v\n", inc.ID) - break - } - } -} - func (incident *Incident) fetchComponent() error { _, body, err := makeRequest("GET", "/components/"+string(*incident.ComponentID), nil) if err != nil { diff --git a/cachet/monitor.go b/cachet/monitor.go index 413a418..6dc687b 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -100,15 +100,12 @@ func (monitor *Monitor) AnalyseData() { } if monitor.LastFailReason != nil { - monitor.Incident.Message += "\n\n" + *monitor.LastFailReason + monitor.Incident.Message += "\n\n - " + *monitor.LastFailReason } // set investigating status monitor.Incident.SetInvestigating() - // lookup relevant incident - monitor.Incident.GetSimilarIncidentID() - // create/update incident monitor.Incident.Send() monitor.Incident.UpdateComponent() @@ -116,8 +113,12 @@ func (monitor *Monitor) AnalyseData() { // was down, created an incident, its now ok, make it resolved. Logger.Println("Updating incident to resolved...") - // Add resolved message - monitor.Incident.Message += "\n\n-\n\nResolved at " + time.Now().String() + component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) + monitor.Incident = &Incident{ + Name: monitor.Incident.Name, + Message: monitor.Name + " check succeeded", + ComponentID: &component_id, + } monitor.Incident.SetFixed() monitor.Incident.Send() From 8b0bc42d5001bb49088714eaa86f9c3c37aa3984 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 21:38:20 +0100 Subject: [PATCH 27/40] Add screenshot to readme --- readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 0ab084f..f42c2a5 100644 --- a/readme.md +++ b/readme.md @@ -3,6 +3,8 @@ Cachet Monitor plugin This is a monitoring plugin for CachetHQ. +![screenshot](https://castawaylabs.github.io/cachet-monitor/screenshot.png) + Features -------- @@ -85,4 +87,4 @@ Environment variables | Name | Example Value | Description | | ------------ | --------------------------- | --------------------------- | | CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | -| CACHET_TOKEN | randomvalue | API Authentication token | \ No newline at end of file +| CACHET_TOKEN | randomvalue | API Authentication token | From a83cf43e60d2f4ee06c23c5150ec792b9b1c1c3e Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 21:43:59 +0100 Subject: [PATCH 28/40] Fix v1 api url --- example.config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example.config.json b/example.config.json index c8f7fd4..f77e43b 100644 --- a/example.config.json +++ b/example.config.json @@ -1,5 +1,5 @@ { - "api_url": "https://demo.cachethq.io/api", + "api_url": "https://demo.cachethq.io/api/v1", "api_token": "9yMHsdioQosnyVK4iCVR", "monitors": [ { @@ -13,4 +13,4 @@ } ], "insecure_api": false -} \ No newline at end of file +} From 270dbd361bf7a7f9be0787c0aa135c6c464b22e8 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Sun, 19 Jul 2015 21:44:21 +0100 Subject: [PATCH 29/40] Update documentation for api url --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index f42c2a5..7512674 100644 --- a/readme.md +++ b/readme.md @@ -32,7 +32,7 @@ Configuration ``` { - "api_url": "https://demo.cachethq.io/api", + "api_url": "https://demo.cachethq.io/api/v1", "api_token": "9yMHsdioQosnyVK4iCVR", "monitors": [ { From 3c1e2cd452fedbc5d3dd52abf34e737c4ee874a6 Mon Sep 17 00:00:00 2001 From: Soulou Date: Sat, 22 Aug 2015 19:13:27 +0200 Subject: [PATCH 30/40] Fix configuration example in README --- readme.md | 1 - 1 file changed, 1 deletion(-) diff --git a/readme.md b/readme.md index 7512674..80956ab 100644 --- a/readme.md +++ b/readme.md @@ -41,7 +41,6 @@ Configuration "metric_id": 0, "component_id": 0, "threshold": 80, - "component_id": null, "expected_status_code": 200, "strict_tls": true } From 76b897eb05dc0487ca0e78d8048744f5c2c93515 Mon Sep 17 00:00:00 2001 From: Soulou Date: Sun, 23 Aug 2015 17:33:23 +0200 Subject: [PATCH 31/40] Implement 'inteval' config parameter -> number of seconds between checks --- cachet/config.go | 10 +++++++++- cachet/monitor.go | 42 +++++++++++++++++++++++++++++++++--------- main.go | 37 +++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/cachet/config.go b/cachet/config.go index 13b73e9..b809fc2 100644 --- a/cachet/config.go +++ b/cachet/config.go @@ -4,13 +4,14 @@ import ( "encoding/json" "flag" "fmt" - "github.com/castawaylabs/cachet-monitor/system" "io" "io/ioutil" "log" "net/http" "net/url" "os" + + "github.com/castawaylabs/cachet-monitor/system" ) // Static config @@ -69,6 +70,13 @@ func init() { os.Exit(1) } + for _, mon := range Config.Monitors { + if mon.Interval <= 0 { + mon.Interval = 1 + } + mon.stopC = make(chan struct{}) + } + if len(systemName) > 0 { Config.SystemName = systemName } diff --git a/cachet/monitor.go b/cachet/monitor.go index 6dc687b..1d9ddb9 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -12,17 +12,21 @@ const timeout = time.Duration(time.Second) // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` - StrictTLS *bool `json:"strict_tls"` + Name string `json:"name"` + URL string `json:"url"` + MetricID int `json:"metric_id"` + Threshold float32 `json:"threshold"` + ComponentID *int `json:"component_id"` + ExpectedStatusCode int `json:"expected_status_code"` + StrictTLS *bool `json:"strict_tls"` + Interval time.Duration `json:"interval"` History []bool `json:"-"` LastFailReason *string `json:"-"` Incident *Incident `json:"-"` + + // Closed when mon.Stop() is called + stopC chan struct{} `json:"-"` } // Run loop @@ -42,6 +46,26 @@ func (monitor *Monitor) Run() { } } +func (monitor *Monitor) Stop() { + if monitor.Stopped() { + return + } + close(monitor.stopC) +} + +func (monitor *Monitor) StopC() <-chan struct{} { + return monitor.stopC +} + +func (monitor *Monitor) Stopped() bool { + select { + case <-monitor.stopC: + return true + default: + return false + } +} + func (monitor *Monitor) doRequest() bool { client := &http.Client{ Timeout: timeout, @@ -115,8 +139,8 @@ func (monitor *Monitor) AnalyseData() { component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ - Name: monitor.Incident.Name, - Message: monitor.Name + " check succeeded", + Name: monitor.Incident.Name, + Message: monitor.Name + " check succeeded", ComponentID: &component_id, } diff --git a/main.go b/main.go index e5fde68..a755057 100644 --- a/main.go +++ b/main.go @@ -1,8 +1,12 @@ package main import ( - "github.com/castawaylabs/cachet-monitor/cachet" + "os" + "os/signal" + "sync" "time" + + "github.com/castawaylabs/cachet-monitor/cachet" ) func main() { @@ -20,10 +24,31 @@ func main() { log.Println() - ticker := time.NewTicker(time.Second) - for range ticker.C { - for _, mon := range config.Monitors { - go mon.Run() - } + wg := &sync.WaitGroup{} + for _, mon := range config.Monitors { + wg.Add(1) + go func(mon *cachet.Monitor) { + ticker := time.NewTicker(mon.Interval * time.Second) + for { + select { + case <-ticker.C: + mon.Run() + case <-mon.StopC(): + wg.Done() + return + } + } + }(mon) } + + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, os.Kill) + <-signals + + log.Println("Waiting monitors to end current operation") + for _, mon := range config.Monitors { + mon.Stop() + } + + wg.Wait() } From e4a586b92af6125e1979dd02a79899a69209cae8 Mon Sep 17 00:00:00 2001 From: Soulou Date: Sun, 23 Aug 2015 20:01:41 +0200 Subject: [PATCH 32/40] update readme --- readme.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 7512674..93acd97 100644 --- a/readme.md +++ b/readme.md @@ -9,7 +9,7 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag every second +- [x] Posts monitor lag (interval configurable) - [x] Updates Component to Partial Outage - [x] Updates Component to Major Outage if in Partial Outage - [x] Can be run on multiple servers and geo regions @@ -43,7 +43,8 @@ Configuration "threshold": 80, "component_id": null, "expected_status_code": 200, - "strict_tls": true + "strict_tls": true, + "interval": 5 } ], "insecure_api": false @@ -58,6 +59,7 @@ Configuration - `component_id` is optional - `threshold` is a percentage - `expected_status_code` is a http response code +- `interval` is the duration in seconds between two checks. - GET request will be performed on the `url` How to run From 5793df735373a5a3ca39275b748fec5a8fd96062 Mon Sep 17 00:00:00 2001 From: Alan Campbell Date: Mon, 7 Mar 2016 16:31:24 -0500 Subject: [PATCH 33/40] Update go install guide --- readme.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 28a9581..f4f80a1 100644 --- a/readme.md +++ b/readme.md @@ -66,8 +66,9 @@ How to run Example: 1. Set up [Go](https://golang.org) -2. `go install github.com/castawaylabs/cachet-monitor` -3. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json` +2. `go get -d github.com/castawaylabs/cachet-monitor` +3. `go install github.com/castawaylabs/cachet-monitor` +4. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json` Production: From 267a6cb6b386c4de1a9d785a4a409569c2f1012e Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Wed, 18 May 2016 23:54:55 +0100 Subject: [PATCH 34/40] better packaging - update readme --- Dockerfile | 6 ---- main.go => cli/main.go | 6 ++-- cachet/component.go => component.go | 0 cachet/config.go => config.go | 53 ++++++++++++++++++----------- cachet/incident.go => incident.go | 0 cachet/metrics.go => metrics.go | 0 cachet/monitor.go => monitor.go | 4 +-- readme.md | 42 +++++++---------------- cachet/request.go => request.go | 0 system/config.go | 24 ------------- 10 files changed, 52 insertions(+), 83 deletions(-) delete mode 100644 Dockerfile rename main.go => cli/main.go (81%) rename cachet/component.go => component.go (100%) rename cachet/config.go => config.go (63%) rename cachet/incident.go => incident.go (100%) rename cachet/metrics.go => metrics.go (100%) rename cachet/monitor.go => monitor.go (97%) rename cachet/request.go => request.go (100%) delete mode 100644 system/config.go diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index f163b42..0000000 --- a/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM golang - -ADD . /go/src/github.com/castawaylabs/cachet-monitor -RUN go install github.com/castawaylabs/cachet-monitor - -ENTRYPOINT /go/bin/cachet-monitor \ No newline at end of file diff --git a/main.go b/cli/main.go similarity index 81% rename from main.go rename to cli/main.go index d0c672c..fe92dc8 100644 --- a/main.go +++ b/cli/main.go @@ -1,11 +1,13 @@ package main import ( - "github.com/castawaylabs/cachet-monitor/cachet" "time" + + cachet "github.com/castawaylabs/cachet-monitor" ) func main() { + cachet.New() config := cachet.Config log := cachet.Logger @@ -20,7 +22,7 @@ func main() { log.Println() - ticker := time.NewTicker(time.Duration(config.Interval)*time.Second) + ticker := time.NewTicker(time.Duration(config.Interval) * time.Second) for range ticker.C { for _, mon := range config.Monitors { go mon.Run() diff --git a/cachet/component.go b/component.go similarity index 100% rename from cachet/component.go rename to component.go diff --git a/cachet/config.go b/config.go similarity index 63% rename from cachet/config.go rename to config.go index 9e48ff0..68e181c 100644 --- a/cachet/config.go +++ b/config.go @@ -2,16 +2,16 @@ package cachet import ( "encoding/json" + "errors" "flag" "fmt" "io" "io/ioutil" "log" + "net" "net/http" "net/url" "os" - - "github.com/castawaylabs/cachet-monitor/system" ) // Static config @@ -31,7 +31,7 @@ type CachetConfig struct { InsecureAPI bool `json:"insecure_api"` } -func init() { +func New() error { var configPath string var systemName string var logPath string @@ -48,8 +48,7 @@ func init() { // download config response, err := http.Get(configPath) if err != nil { - fmt.Printf("Cannot download network config: %v\n", err) - os.Exit(1) + return errors.New("Cannot download network config: " + err.Error()) } defer response.Body.Close() @@ -59,16 +58,12 @@ func init() { } else { data, err = ioutil.ReadFile(configPath) if err != nil { - fmt.Println("Config file '" + configPath + "' missing!") - os.Exit(1) + return errors.New("Config file '" + configPath + "' missing!") } } - err = json.Unmarshal(data, &Config) - - if err != nil { - fmt.Println("Cannot parse config!") - os.Exit(1) + if err := json.Unmarshal(data, &Config); err != nil { + return errors.New("Cannot parse config!") } if len(systemName) > 0 { @@ -76,7 +71,10 @@ func init() { } if len(Config.SystemName) == 0 { // get hostname - Config.SystemName = system.GetHostname() + Config.SystemName = getHostname() + } + if Config.Interval <= 0 { + Config.Interval = 60 } if len(os.Getenv("CACHET_API")) > 0 { @@ -87,13 +85,11 @@ func init() { } if len(Config.APIToken) == 0 || len(Config.APIUrl) == 0 { - fmt.Printf("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/CastawayLabs/cachet-monitor\n") - os.Exit(1) + return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/CastawayLabs/cachet-monitor\n") } if len(Config.Monitors) == 0 { - fmt.Printf("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") - os.Exit(1) + return errors.New("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") } if len(logPath) > 0 { @@ -105,8 +101,7 @@ func init() { if len(Config.LogPath) > 0 { logWriter, err = os.Create(Config.LogPath) if err != nil { - fmt.Printf("Unable to open file '%v' for logging\n", Config.LogPath) - os.Exit(1) + return errors.New("Unable to open file '" + Config.LogPath + "' for logging\n") } } @@ -116,4 +111,24 @@ func init() { } Logger = log.New(logWriter, "", flags) + + return nil +} + +// getHostname returns id of the current system +func getHostname() string { + hostname, err := os.Hostname() + if err != nil || len(hostname) == 0 { + addrs, err := net.InterfaceAddrs() + + if err != nil { + return "unknown" + } + + for _, addr := range addrs { + return addr.String() + } + } + + return hostname } diff --git a/cachet/incident.go b/incident.go similarity index 100% rename from cachet/incident.go rename to incident.go diff --git a/cachet/metrics.go b/metrics.go similarity index 100% rename from cachet/metrics.go rename to metrics.go diff --git a/cachet/monitor.go b/monitor.go similarity index 97% rename from cachet/monitor.go rename to monitor.go index 6dc687b..8f735c1 100644 --- a/cachet/monitor.go +++ b/monitor.go @@ -115,8 +115,8 @@ func (monitor *Monitor) AnalyseData() { component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ - Name: monitor.Incident.Name, - Message: monitor.Name + " check succeeded", + Name: monitor.Incident.Name, + Message: monitor.Name + " check succeeded", ComponentID: &component_id, } diff --git a/readme.md b/readme.md index f4f80a1..26374b1 100644 --- a/readme.md +++ b/readme.md @@ -14,33 +14,20 @@ Features - [x] Updates Component to Major Outage if in Partial Outage - [x] Can be run on multiple servers and geo regions -Docker Quickstart ------------------ - -1. Create a configuration json -2. -``` -docker run -d \ - --name cachet-monitor \ - -h cachet-monitor \ - -v `pwd`/config.json:/etc/cachet-monitor.config.json \ - castawaylabs/cachet-monitor -``` - Configuration ------------- ``` { "api_url": "https://demo.cachethq.io/api/v1", - "api_token": "9yMHsdioQosnyVK4iCVR", + "api_token": "", "interval": 60, "monitors": [ { - "name": "nodegear frontend", - "url": "https://nodegear.io/ping", - "metric_id": 0, - "component_id": 0, + "name": "Name of your monitor", + "url": "Ping URL", + "metric_id": , + "component_id": , "threshold": 80, "expected_status_code": 200, "strict_tls": true @@ -60,20 +47,14 @@ Configuration - `expected_status_code` is a http response code - GET request will be performed on the `url` -How to run ----------- +Installation +------------ -Example: +1. Download binary from release page +2. Create your configuration ([example](https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json)) +3. `cachet-monitor -c /etc/cachet-monitor.config.json` -1. Set up [Go](https://golang.org) -2. `go get -d github.com/castawaylabs/cachet-monitor` -3. `go install github.com/castawaylabs/cachet-monitor` -4. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json` - -Production: - -1. Download the example config and save to `/etc/cachet-monitor.config.json` -2. Run in background: `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` +tip: run in background using `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` ``` Usage of cachet-monitor: @@ -89,3 +70,4 @@ Environment variables | ------------ | --------------------------- | --------------------------- | | CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | | CACHET_TOKEN | randomvalue | API Authentication token | +| DEVELOPMENT | 1 | Strips logging | diff --git a/cachet/request.go b/request.go similarity index 100% rename from cachet/request.go rename to request.go diff --git a/system/config.go b/system/config.go deleted file mode 100644 index 4c899d9..0000000 --- a/system/config.go +++ /dev/null @@ -1,24 +0,0 @@ -package system - -import ( - "net" - "os" -) - -// GetHostname returns id of the current system -func GetHostname() string { - hostname, err := os.Hostname() - if err != nil || len(hostname) == 0 { - addrs, err := net.InterfaceAddrs() - - if err != nil { - return "unknown" - } - - for _, addr := range addrs { - return addr.String() - } - } - - return hostname -} From dfad6f090650e5f226c2bc8de90408583e566c93 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 00:05:00 +0100 Subject: [PATCH 35/40] Vision readme --- readme.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/readme.md b/readme.md index 26374b1..f49ab61 100644 --- a/readme.md +++ b/readme.md @@ -71,3 +71,11 @@ Environment variables | CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | | CACHET_TOKEN | randomvalue | API Authentication token | | DEVELOPMENT | 1 | Strips logging | + +Vision and goals +---------------- + +We made this tool because we felt the need to have our own monitoring software (leveraging on Cachet). +The idea is a stateless program which collects data and pushes it to a central cachet instance. + +This gives us power to have an army of geographically distributed loggers and reveal issues in both latency & downtime on client websites. From 2d62fc7443942895ae0703d23186611f3d7daf0b Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 12:20:56 +0100 Subject: [PATCH 36/40] refactoring --- cli/main.go | 110 ++++++++++++++++++++++++++++++++++++++++++--------- component.go | 20 ---------- config.go | 110 ++++++++++----------------------------------------- http.go | 45 +++++++++++++++++++++ incident.go | 100 +++++++++++++++++++++------------------------- metrics.go | 12 +++--- monitor.go | 40 ++++++++++++++----- readme.md | 2 +- request.go | 32 --------------- 9 files changed, 242 insertions(+), 229 deletions(-) delete mode 100644 component.go create mode 100644 http.go delete mode 100644 request.go diff --git a/cli/main.go b/cli/main.go index fe92dc8..c394621 100644 --- a/cli/main.go +++ b/cli/main.go @@ -1,31 +1,105 @@ package main import ( - "time" + "encoding/json" + "errors" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" cachet "github.com/castawaylabs/cachet-monitor" ) +var configPath string +var systemName string +var logPath string + func main() { - cachet.New() - config := cachet.Config - log := cachet.Logger + flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") + flag.StringVar(&systemName, "name", "", "System Name") + flag.StringVar(&logPath, "log", "", "Log path") + flag.Parse() - log.Printf("System: %s, Interval: %d second(s), API: %s\n", config.SystemName, config.Interval, config.APIUrl) - log.Printf("Starting %d monitors:\n", len(config.Monitors)) - for _, mon := range config.Monitors { - log.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) - if mon.MetricID > 0 { - log.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) - } + cfg, err := getConfiguration(configPath) + if err != nil { + panic(err) } - log.Println() - - ticker := time.NewTicker(time.Duration(config.Interval) * time.Second) - for range ticker.C { - for _, mon := range config.Monitors { - go mon.Run() - } + if len(systemName) > 0 { + cfg.SystemName = systemName } + if len(logPath) > 0 { + cfg.LogPath = logPath + } + + if len(os.Getenv("CACHET_API")) > 0 { + cfg.APIUrl = os.Getenv("CACHET_API") + } + if len(os.Getenv("CACHET_TOKEN")) > 0 { + cfg.APIToken = os.Getenv("CACHET_TOKEN") + } + + if err := cfg.ValidateConfiguration(); err != nil { + panic(err) + } + + cfg.Run() +} + +func getLogger(logPath string) *log.Logger { + var logWriter = os.Stdout + var err error + + if len(logPath) > 0 { + logWriter, err = os.Create(logPath) + if err != nil { + fmt.Printf("Unable to open file '%v' for logging\n", logPath) + os.Exit(1) + } + } + + flags := log.Llongfile | log.Ldate | log.Ltime + if len(os.Getenv("CACHET_DEV")) > 0 { + flags = 0 + } + + return log.New(logWriter, "", flags) +} + +func getConfiguration(path string) (*cachet.CachetMonitor, error) { + var cfg cachet.CachetMonitor + var data []byte + + // test if its a url + url, err := url.ParseRequestURI(path) + if err == nil && len(url.Scheme) > 0 { + // download config + response, err := http.Get(path) + if err != nil { + return nil, errors.New("Cannot download network config: " + err.Error()) + } + + defer response.Body.Close() + data, _ = ioutil.ReadAll(response.Body) + + fmt.Println("Downloaded network configuration.") + } else { + data, err = ioutil.ReadFile(path) + if err != nil { + return nil, errors.New("Config file '" + path + "' missing!") + } + } + + if err := json.Unmarshal(data, &cfg); err != nil { + fmt.Println(err) + return nil, errors.New("Cannot parse config!") + } + + cfg.Logger = getLogger(cfg.LogPath) + + return &cfg, nil } diff --git a/component.go b/component.go deleted file mode 100644 index 1e0a95c..0000000 --- a/component.go +++ /dev/null @@ -1,20 +0,0 @@ -package cachet - -import "encoding/json" - -// Component Cachet model -type Component struct { - ID json.Number `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status json.Number `json:"status_id"` - HumanStatus string `json:"-"` - IncidentCount int `json:"-"` - CreatedAt *string `json:"created_at"` - UpdatedAt *string `json:"updated_at"` -} - -// ComponentData json response model -type ComponentData struct { - Component Component `json:"data"` -} diff --git a/config.go b/config.go index 68e181c..b0dc9fa 100644 --- a/config.go +++ b/config.go @@ -1,117 +1,47 @@ package cachet import ( - "encoding/json" "errors" - "flag" - "fmt" - "io" - "io/ioutil" "log" "net" - "net/http" - "net/url" "os" ) -// Static config -var Config CachetConfig +type CachetMonitor struct { + Logger *log.Logger `json:"-"` -// Central logger -var Logger *log.Logger + APIUrl string `json:"api_url"` + APIToken string `json:"api_token"` + Interval int64 `json:"interval"` + SystemName string `json:"system_name"` + LogPath string `json:"log_path"` + InsecureAPI bool `json:"insecure_api"` -// CachetConfig is the monitoring tool configuration -type CachetConfig struct { - APIUrl string `json:"api_url"` - APIToken string `json:"api_token"` - Interval int64 `json:"interval"` - Monitors []*Monitor `json:"monitors"` - SystemName string `json:"system_name"` - LogPath string `json:"log_path"` - InsecureAPI bool `json:"insecure_api"` + Monitors []*Monitor `json:"monitors"` } -func New() error { - var configPath string - var systemName string - var logPath string - flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") - flag.StringVar(&systemName, "name", "", "System Name") - flag.StringVar(&logPath, "log", "", "Log path") - flag.Parse() - - var data []byte - - // test if its a url - url, err := url.ParseRequestURI(configPath) - if err == nil && len(url.Scheme) > 0 { - // download config - response, err := http.Get(configPath) - if err != nil { - return errors.New("Cannot download network config: " + err.Error()) - } - - defer response.Body.Close() - data, _ = ioutil.ReadAll(response.Body) - - fmt.Println("Downloaded network configuration.") - } else { - data, err = ioutil.ReadFile(configPath) - if err != nil { - return errors.New("Config file '" + configPath + "' missing!") - } +func (mon *CachetMonitor) ValidateConfiguration() error { + if mon.Logger == nil { + mon.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) } - if err := json.Unmarshal(data, &Config); err != nil { - return errors.New("Cannot parse config!") - } - - if len(systemName) > 0 { - Config.SystemName = systemName - } - if len(Config.SystemName) == 0 { + if len(mon.SystemName) == 0 { // get hostname - Config.SystemName = getHostname() - } - if Config.Interval <= 0 { - Config.Interval = 60 + mon.SystemName = getHostname() } - if len(os.Getenv("CACHET_API")) > 0 { - Config.APIUrl = os.Getenv("CACHET_API") - } - if len(os.Getenv("CACHET_TOKEN")) > 0 { - Config.APIToken = os.Getenv("CACHET_TOKEN") + if mon.Interval <= 0 { + mon.Interval = 60 } - if len(Config.APIToken) == 0 || len(Config.APIUrl) == 0 { - return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/CastawayLabs/cachet-monitor\n") + if len(mon.APIToken) == 0 || len(mon.APIUrl) == 0 { + return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n") } - if len(Config.Monitors) == 0 { - return errors.New("No monitors defined!\nSee sample configuration: https://github.com/CastawayLabs/cachet-monitor/blob/master/example.config.json\n") + if len(mon.Monitors) == 0 { + return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n") } - if len(logPath) > 0 { - Config.LogPath = logPath - } - - var logWriter io.Writer - logWriter = os.Stdout - if len(Config.LogPath) > 0 { - logWriter, err = os.Create(Config.LogPath) - if err != nil { - return errors.New("Unable to open file '" + Config.LogPath + "' for logging\n") - } - } - - flags := log.Llongfile | log.Ldate | log.Ltime - if len(os.Getenv("DEVELOPMENT")) > 0 { - flags = 0 - } - - Logger = log.New(logWriter, "", flags) - return nil } diff --git a/http.go b/http.go new file mode 100644 index 0000000..f6b5b01 --- /dev/null +++ b/http.go @@ -0,0 +1,45 @@ +package cachet + +import ( + "bytes" + "crypto/tls" + "encoding/json" + "io/ioutil" + "net/http" +) + +// Component Cachet model +type Component struct { + ID json.Number `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status json.Number `json:"status_id"` + HumanStatus string `json:"-"` + IncidentCount int `json:"-"` + CreatedAt *string `json:"created_at"` + UpdatedAt *string `json:"updated_at"` +} + +func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { + req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody)) + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", monitor.APIToken) + + client := &http.Client{} + if monitor.InsecureAPI == true { + client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + } + + res, err := client.Do(req) + if err != nil { + return nil, []byte{}, err + } + + defer res.Body.Close() + body, _ := ioutil.ReadAll(res.Body) + + return res, body, nil +} diff --git a/incident.go b/incident.go index f02bab1..00077ce 100644 --- a/incident.go +++ b/incident.go @@ -2,6 +2,8 @@ package cachet import ( "encoding/json" + "errors" + "fmt" "strconv" ) @@ -18,36 +20,26 @@ type Incident struct { UpdatedAt *string `json:"updated_at"` } -// IncidentData is a response when creating/updating an incident -type IncidentData struct { - Incident Incident `json:"data"` -} - -// IncidentList - from API /incidents -type IncidentList struct { - Incidents []Incident `json:"data"` -} - // GetIncidents - Get list of incidents -func GetIncidents() []Incident { - _, body, err := makeRequest("GET", "/incidents", nil) +func (monitor *CachetMonitor) GetIncidents() ([]Incident, error) { + _, body, err := monitor.makeRequest("GET", "/incidents", nil) if err != nil { - Logger.Printf("Cannot get incidents: %v\n", err) - return []Incident{} + return []Incident{}, fmt.Errorf("Cannot get incidents: %v\n", err) } - var data IncidentList + var data struct { + Incidents []Incident `json:"data"` + } err = json.Unmarshal(body, &data) if err != nil { - Logger.Printf("Cannot parse incidents: %v\n", err) - panic(err) + return []Incident{}, fmt.Errorf("Cannot parse incidents: %v\n", err) } - return data.Incidents + return data.Incidents, nil } // Send - Create or Update incident -func (incident *Incident) Send() { +func (monitor *CachetMonitor) SendIncident(incident *Incident) error { jsonBytes, _ := json.Marshal(map[string]interface{}{ "name": incident.Name, "message": incident.Message, @@ -63,58 +55,57 @@ func (incident *Incident) Send() { requestURL += "/" + string(incident.ID) } - resp, body, err := makeRequest(requestType, requestURL, jsonBytes) + resp, body, err := monitor.makeRequest(requestType, requestURL, jsonBytes) if err != nil { - Logger.Printf("Cannot create/update incident: %v\n", err) - return + return err } - Logger.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) - - var data IncidentData - err = json.Unmarshal(body, &data) - if err != nil { - Logger.Println("Cannot parse incident body.", string(body)) - panic(err) + var data struct { + Incident Incident `json:"data"` + } + if err := json.Unmarshal(body, &data); err != nil { + return errors.New("Cannot parse incident body." + string(body)) } else { incident.ID = data.Incident.ID incident.Component = data.Incident.Component } if resp.StatusCode != 200 { - Logger.Println("Could not create/update incident!") + return errors.New("Could not create/update incident!") } -} - -func (incident *Incident) fetchComponent() error { - _, body, err := makeRequest("GET", "/components/"+string(*incident.ComponentID), nil) - if err != nil { - return err - } - - var data ComponentData - err = json.Unmarshal(body, &data) - if err != nil { - Logger.Println("Cannot parse component body. %v", string(body)) - panic(err) - } - - incident.Component = &data.Component return nil } -func (incident *Incident) UpdateComponent() { +func (monitor *CachetMonitor) fetchComponent(componentID string) (*Component, error) { + _, body, err := monitor.makeRequest("GET", "/components/"+componentID, nil) + if err != nil { + return nil, err + } + + var data struct { + Component Component `json:"data"` + } + if err := json.Unmarshal(body, &data); err != nil { + return nil, errors.New("Cannot parse component body. " + string(body)) + } + + return &data.Component, nil +} + +func (monitor *CachetMonitor) UpdateComponent(incident *Incident) error { if incident.ComponentID == nil || len(*incident.ComponentID) == 0 { - return + return nil } if incident.Component == nil { // fetch component - if err := incident.fetchComponent(); err != nil { - Logger.Printf("Cannot fetch component for incident. %v\n", err) - return + component, err := monitor.fetchComponent(string(*incident.ComponentID)) + if err != nil { + return fmt.Errorf("Cannot fetch component for incident. %v\n", err) } + + incident.Component = component } status, _ := strconv.Atoi(string(incident.Status)) @@ -133,11 +124,12 @@ func (incident *Incident) UpdateComponent() { "status": incident.Component.Status, }) - resp, _, err := makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes) + resp, _, err := monitor.makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes) if err != nil || resp.StatusCode != 200 { - Logger.Printf("Could not update component: (resp code %d) %v", resp.StatusCode, err) - return + return fmt.Errorf("Could not update component: (resp code %d) %v", resp.StatusCode, err) } + + return nil } // SetInvestigating sets status to Investigating diff --git a/metrics.go b/metrics.go index ff21d58..25e4973 100644 --- a/metrics.go +++ b/metrics.go @@ -2,22 +2,24 @@ package cachet import ( "encoding/json" + "fmt" "strconv" ) // SendMetric sends lag metric point -func SendMetric(metricID int, delay int64) { +func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { if metricID <= 0 { - return + return nil } jsonBytes, _ := json.Marshal(&map[string]interface{}{ "value": delay, }) - resp, _, err := makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) + resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) if err != nil || resp.StatusCode != 200 { - Logger.Printf("Could not log data point!\n%v\n", err) - return + return fmt.Errorf("Could not log data point!\n%v\n", err) } + + return nil } diff --git a/monitor.go b/monitor.go index 8f735c1..368dd57 100644 --- a/monitor.go +++ b/monitor.go @@ -23,6 +23,28 @@ type Monitor struct { History []bool `json:"-"` LastFailReason *string `json:"-"` Incident *Incident `json:"-"` + config *CachetMonitor +} + +func (cfg *CachetMonitor) Run() { + cfg.Logger.Printf("System: %s\nInterval: %d second(s)\nAPI: %s\n\n", cfg.SystemName, cfg.Interval, cfg.APIUrl) + cfg.Logger.Printf("Starting %d monitors:\n", len(cfg.Monitors)) + for _, mon := range cfg.Monitors { + cfg.Logger.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) + if mon.MetricID > 0 { + cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) + } + } + + cfg.Logger.Println() + + ticker := time.NewTicker(time.Duration(cfg.Interval) * time.Second) + for range ticker.C { + for _, mon := range cfg.Monitors { + mon.config = cfg + go mon.Run() + } + } } // Run loop @@ -38,7 +60,7 @@ func (monitor *Monitor) Run() { monitor.AnalyseData() if isUp == true && monitor.MetricID > 0 { - SendMetric(monitor.MetricID, lag) + monitor.config.SendMetric(monitor.MetricID, lag) } } @@ -81,7 +103,7 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - Logger.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) + monitor.config.Logger.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) if len(monitor.History) != 10 { // not enough data @@ -90,11 +112,11 @@ func (monitor *Monitor) AnalyseData() { if t > monitor.Threshold && monitor.Incident == nil { // is down, create an incident - Logger.Println("Creating incident...") + monitor.config.Logger.Println("Creating incident...") component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ - Name: monitor.Name + " - " + Config.SystemName, + Name: monitor.Name + " - " + monitor.config.SystemName, Message: monitor.Name + " check failed", ComponentID: &component_id, } @@ -107,11 +129,11 @@ func (monitor *Monitor) AnalyseData() { monitor.Incident.SetInvestigating() // create/update incident - monitor.Incident.Send() - monitor.Incident.UpdateComponent() + monitor.config.SendIncident(monitor.Incident) + monitor.config.UpdateComponent(monitor.Incident) } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. - Logger.Println("Updating incident to resolved...") + monitor.config.Logger.Println("Updating incident to resolved...") component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) monitor.Incident = &Incident{ @@ -121,8 +143,8 @@ func (monitor *Monitor) AnalyseData() { } monitor.Incident.SetFixed() - monitor.Incident.Send() - monitor.Incident.UpdateComponent() + monitor.config.SendIncident(monitor.Incident) + monitor.config.UpdateComponent(monitor.Incident) monitor.Incident = nil } diff --git a/readme.md b/readme.md index f49ab61..e306379 100644 --- a/readme.md +++ b/readme.md @@ -70,7 +70,7 @@ Environment variables | ------------ | --------------------------- | --------------------------- | | CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | | CACHET_TOKEN | randomvalue | API Authentication token | -| DEVELOPMENT | 1 | Strips logging | +| CACHET_DEV | 1 | Strips logging | Vision and goals ---------------- diff --git a/request.go b/request.go deleted file mode 100644 index 40f92b6..0000000 --- a/request.go +++ /dev/null @@ -1,32 +0,0 @@ -package cachet - -import ( - "bytes" - "crypto/tls" - "io/ioutil" - "net/http" -) - -func makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { - req, err := http.NewRequest(requestType, Config.APIUrl+url, bytes.NewBuffer(reqBody)) - - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Cachet-Token", Config.APIToken) - - client := &http.Client{} - if Config.InsecureAPI == true { - client.Transport = &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } - } - - res, err := client.Do(req) - if err != nil { - return nil, []byte{}, err - } - - defer res.Body.Close() - body, _ := ioutil.ReadAll(res.Body) - - return res, body, nil -} From c729fbdf41b2bdf4ce3d917dbba5a3106e5e84ab Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 12:55:53 +0100 Subject: [PATCH 37/40] Update readme --- readme.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/readme.md b/readme.md index 0e5d6e8..30831ec 100644 --- a/readme.md +++ b/readme.md @@ -1,8 +1,3 @@ -Cachet Monitor plugin -===================== - -This is a monitoring plugin for CachetHQ. - ![screenshot](https://castawaylabs.github.io/cachet-monitor/screenshot.png) Features From 0ceccccd459a64fd87f0acd60d38889cb4af71e0 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 18:40:01 +0100 Subject: [PATCH 38/40] More config options, markdown in incidents - Specify method to ping with - Body regex match - Markdown in incident messages - Update existing incidents --- cli/main.go | 20 +++- config.go | 25 ++--- http.go | 37 ++++--- incident.go | 163 +++++++++++------------------ metrics.go | 25 ----- monitor.go | 289 +++++++++++++++++++++++++++++----------------------- readme.md | 58 +++++------ 7 files changed, 305 insertions(+), 312 deletions(-) delete mode 100644 metrics.go diff --git a/cli/main.go b/cli/main.go index c394621..4028b4d 100644 --- a/cli/main.go +++ b/cli/main.go @@ -10,6 +10,8 @@ import ( "net/http" "net/url" "os" + "os/signal" + "sync" cachet "github.com/castawaylabs/cachet-monitor" ) @@ -47,7 +49,23 @@ func main() { panic(err) } - cfg.Run() + cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors)) + + wg := &sync.WaitGroup{} + for _, mon := range cfg.Monitors { + go mon.Start(cfg, wg) + } + + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, os.Kill) + <-signals + + cfg.Logger.Println("Abort: Waiting monitors to finish") + for _, mon := range cfg.Monitors { + mon.Stop() + } + + wg.Wait() } func getLogger(logPath string) *log.Logger { diff --git a/config.go b/config.go index b0dc9fa..8a12419 100644 --- a/config.go +++ b/config.go @@ -12,7 +12,6 @@ type CachetMonitor struct { APIUrl string `json:"api_url"` APIToken string `json:"api_token"` - Interval int64 `json:"interval"` SystemName string `json:"system_name"` LogPath string `json:"log_path"` InsecureAPI bool `json:"insecure_api"` @@ -20,28 +19,30 @@ type CachetMonitor struct { Monitors []*Monitor `json:"monitors"` } -func (mon *CachetMonitor) ValidateConfiguration() error { - if mon.Logger == nil { - mon.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) +func (cfg *CachetMonitor) ValidateConfiguration() error { + if cfg.Logger == nil { + cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) } - if len(mon.SystemName) == 0 { + if len(cfg.SystemName) == 0 { // get hostname - mon.SystemName = getHostname() + cfg.SystemName = getHostname() } - if mon.Interval <= 0 { - mon.Interval = 60 - } - - if len(mon.APIToken) == 0 || len(mon.APIUrl) == 0 { + if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 { return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n") } - if len(mon.Monitors) == 0 { + if len(cfg.Monitors) == 0 { return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n") } + for _, monitor := range cfg.Monitors { + if err := monitor.ValidateConfiguration(); err != nil { + return err + } + } + return nil } diff --git a/http.go b/http.go index f6b5b01..602d9d1 100644 --- a/http.go +++ b/http.go @@ -4,22 +4,13 @@ import ( "bytes" "crypto/tls" "encoding/json" + "fmt" "io/ioutil" "net/http" + "strconv" + "time" ) -// Component Cachet model -type Component struct { - ID json.Number `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - Status json.Number `json:"status_id"` - HumanStatus string `json:"-"` - IncidentCount int `json:"-"` - CreatedAt *string `json:"created_at"` - UpdatedAt *string `json:"updated_at"` -} - func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody)) @@ -43,3 +34,25 @@ func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBod return res, body, nil } + +// SendMetric sends lag metric point +func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { + if metricID <= 0 { + return nil + } + + jsonBytes, _ := json.Marshal(&map[string]interface{}{ + "value": delay, + }) + + resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) + if err != nil || resp.StatusCode != 200 { + return fmt.Errorf("Could not log data point!\n%v\n", err) + } + + return nil +} + +func getMs() int64 { + return time.Now().UnixNano() / int64(time.Millisecond) +} diff --git a/incident.go b/incident.go index 00077ce..3cbb3c4 100644 --- a/incident.go +++ b/incident.go @@ -2,156 +2,113 @@ package cachet import ( "encoding/json" - "errors" "fmt" "strconv" ) // Incident Cachet data model type Incident struct { - ID json.Number `json:"id"` - Name string `json:"name"` - Message string `json:"message"` - Status json.Number `json:"status"` // 4? - HumanStatus string `json:"human_status"` - Component *Component `json:"-"` - ComponentID *json.Number `json:"component_id"` - CreatedAt *string `json:"created_at"` - UpdatedAt *string `json:"updated_at"` -} + ID int `json:"id"` + Name string `json:"name"` + Message string `json:"message"` + Status int `json:"status"` + Visible int `json"visible"` + Notify bool `json:"notify"` -// GetIncidents - Get list of incidents -func (monitor *CachetMonitor) GetIncidents() ([]Incident, error) { - _, body, err := monitor.makeRequest("GET", "/incidents", nil) - if err != nil { - return []Incident{}, fmt.Errorf("Cannot get incidents: %v\n", err) - } - - var data struct { - Incidents []Incident `json:"data"` - } - err = json.Unmarshal(body, &data) - if err != nil { - return []Incident{}, fmt.Errorf("Cannot parse incidents: %v\n", err) - } - - return data.Incidents, nil + ComponentID int `json:"component_id"` + ComponentStatus int `json:"component_status"` } // Send - Create or Update incident -func (monitor *CachetMonitor) SendIncident(incident *Incident) error { - jsonBytes, _ := json.Marshal(map[string]interface{}{ - "name": incident.Name, - "message": incident.Message, - "status": incident.Status, - "component_id": incident.ComponentID, - "notify": true, - }) +func (incident *Incident) Send(cfg *CachetMonitor) error { + switch incident.Status { + case 1, 2, 3: + // partial outage + incident.ComponentStatus = 3 + + componentStatus, err := incident.GetComponentStatus(cfg) + if componentStatus == 3 { + // major outage + incident.ComponentStatus = 4 + } + + if err != nil { + cfg.Logger.Printf("cannot fetch component: %v", err) + } + case 4: + // fixed + incident.ComponentStatus = 1 + } requestType := "POST" requestURL := "/incidents" - if len(incident.ID) > 0 { + if incident.ID > 0 { requestType = "PUT" - requestURL += "/" + string(incident.ID) + requestURL += "/" + strconv.Itoa(incident.ID) } - resp, body, err := monitor.makeRequest(requestType, requestURL, jsonBytes) + jsonBytes, _ := json.Marshal(incident) + + resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes) if err != nil { return err } var data struct { - Incident Incident `json:"data"` + Incident struct { + ID int `json:"id"` + } `json:"data"` } if err := json.Unmarshal(body, &data); err != nil { - return errors.New("Cannot parse incident body." + string(body)) - } else { - incident.ID = data.Incident.ID - incident.Component = data.Incident.Component + return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body)) + } + + incident.ID = data.Incident.ID + if resp.StatusCode != 200 { + return fmt.Errorf("Could not create/update incident!") + } + + return nil +} + +func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) { + resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil) + if err != nil { + return 0, err } if resp.StatusCode != 200 { - return errors.New("Could not create/update incident!") - } - - return nil -} - -func (monitor *CachetMonitor) fetchComponent(componentID string) (*Component, error) { - _, body, err := monitor.makeRequest("GET", "/components/"+componentID, nil) - if err != nil { - return nil, err + return 0, fmt.Errorf("Invalid status code. Received %d", resp.StatusCode) } var data struct { - Component Component `json:"data"` + Component struct { + Status int `json:"status"` + } `json:"data"` } if err := json.Unmarshal(body, &data); err != nil { - return nil, errors.New("Cannot parse component body. " + string(body)) + return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err) } - return &data.Component, nil -} - -func (monitor *CachetMonitor) UpdateComponent(incident *Incident) error { - if incident.ComponentID == nil || len(*incident.ComponentID) == 0 { - return nil - } - - if incident.Component == nil { - // fetch component - component, err := monitor.fetchComponent(string(*incident.ComponentID)) - if err != nil { - return fmt.Errorf("Cannot fetch component for incident. %v\n", err) - } - - incident.Component = component - } - - status, _ := strconv.Atoi(string(incident.Status)) - switch status { - case 1, 2, 3: - if incident.Component.Status == "3" { - incident.Component.Status = "4" - } else { - incident.Component.Status = "3" - } - case 4: - incident.Component.Status = "1" - } - - jsonBytes, _ := json.Marshal(map[string]interface{}{ - "status": incident.Component.Status, - }) - - resp, _, err := monitor.makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes) - if err != nil || resp.StatusCode != 200 { - return fmt.Errorf("Could not update component: (resp code %d) %v", resp.StatusCode, err) - } - - return nil + return data.Component.Status, nil } // SetInvestigating sets status to Investigating func (incident *Incident) SetInvestigating() { - incident.Status = "1" - incident.HumanStatus = "Investigating" + incident.Status = 1 } // SetIdentified sets status to Identified func (incident *Incident) SetIdentified() { - incident.Status = "2" - incident.HumanStatus = "Identified" + incident.Status = 2 } // SetWatching sets status to Watching func (incident *Incident) SetWatching() { - incident.Status = "3" - incident.HumanStatus = "Watching" + incident.Status = 3 } // SetFixed sets status to Fixed func (incident *Incident) SetFixed() { - incident.Status = "4" - incident.HumanStatus = "Fixed" + incident.Status = 4 } diff --git a/metrics.go b/metrics.go deleted file mode 100644 index 25e4973..0000000 --- a/metrics.go +++ /dev/null @@ -1,25 +0,0 @@ -package cachet - -import ( - "encoding/json" - "fmt" - "strconv" -) - -// SendMetric sends lag metric point -func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { - if metricID <= 0 { - return nil - } - - jsonBytes, _ := json.Marshal(&map[string]interface{}{ - "value": delay, - }) - - resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) - if err != nil || resp.StatusCode != 200 { - return fmt.Errorf("Could not log data point!\n%v\n", err) - } - - return nil -} diff --git a/monitor.go b/monitor.go index b1acce8..93b3de5 100644 --- a/monitor.go +++ b/monitor.go @@ -2,103 +2,81 @@ package cachet import ( "crypto/tls" - "encoding/json" - "log" + "errors" + "fmt" + "io/ioutil" "net/http" - "os" - "os/signal" + "regexp" "strconv" + "strings" "sync" "time" ) -const timeout = time.Duration(time.Second) +const HttpTimeout = time.Duration(time.Second) +const DefaultInterval = 60 +const DefaultTimeFormat = "15:04:05 Jan 2 MST" // Monitor data model type Monitor struct { - Name string `json:"name"` - URL string `json:"url"` - MetricID int `json:"metric_id"` - Threshold float32 `json:"threshold"` - ComponentID *int `json:"component_id"` - ExpectedStatusCode int `json:"expected_status_code"` - StrictTLS *bool `json:"strict_tls"` - Interval time.Duration `json:"interval"` + Name string `json:"name"` + URL string `json:"url"` + Method string `json:"method"` + StrictTLS bool `json:"strict_tls"` + CheckInterval time.Duration `json:"interval"` - History []bool `json:"-"` - LastFailReason *string `json:"-"` - Incident *Incident `json:"-"` + MetricID int `json:"metric_id"` + ComponentID int `json:"component_id"` + + // Threshold = percentage + Threshold float32 `json:"threshold"` + // Saturat + ExpectedStatusCode int `json:"expected_status_code"` + // compiled to Regexp + ExpectedBody string `json:"expected_body"` + bodyRegexp *regexp.Regexp + + history []bool + lastFailReason string + incident *Incident config *CachetMonitor // Closed when mon.Stop() is called stopC chan bool } -func (cfg *CachetMonitor) Run() { - cfg.Logger.Printf("System: %s\nInterval: %d second(s)\nAPI: %s\n\n", cfg.SystemName, cfg.Interval, cfg.APIUrl) - cfg.Logger.Printf("Starting %d monitors:\n", len(cfg.Monitors)) - for _, mon := range cfg.Monitors { - cfg.Logger.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode) - if mon.MetricID > 0 { - cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID) +func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) { + wg.Add(1) + mon.config = cfg + mon.stopC = make(chan bool) + + mon.config.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL) + + // print features + if mon.ExpectedStatusCode > 0 { + mon.config.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode) + } + if len(mon.ExpectedBody) > 0 { + mon.config.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody) + } + if mon.MetricID > 0 { + mon.config.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID) + } + if mon.ComponentID > 0 { + mon.config.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID) + } + + mon.Tick() + + ticker := time.NewTicker(mon.CheckInterval * time.Second) + for { + select { + case <-ticker.C: + mon.Tick() + case <-mon.stopC: + wg.Done() + return } - if mon.ComponentID != nil && *mon.ComponentID > 0 { - cfg.Logger.Printf(" - Updates component id: %d\n", *mon.ComponentID) - } - } - - cfg.Logger.Println() - wg := &sync.WaitGroup{} - - for _, mon := range cfg.Monitors { - wg.Add(1) - mon.config = cfg - mon.stopC = make(chan bool) - - go func(mon *Monitor) { - if mon.Interval < 1 { - mon.Interval = time.Duration(cfg.Interval) - } - - ticker := time.NewTicker(mon.Interval * time.Second) - for { - select { - case <-ticker.C: - mon.Run() - case <-mon.StopC(): - wg.Done() - return - } - } - }(mon) - } - - signals := make(chan os.Signal, 1) - signal.Notify(signals, os.Interrupt, os.Kill) - <-signals - - log.Println("Waiting monitors to end current operation") - for _, mon := range cfg.Monitors { - mon.Stop() - } - - wg.Wait() -} - -// Run loop -func (monitor *Monitor) Run() { - reqStart := getMs() - isUp := monitor.doRequest() - lag := getMs() - reqStart - - if len(monitor.History) >= 10 { - monitor.History = monitor.History[len(monitor.History)-9:] - } - monitor.History = append(monitor.History, isUp) - monitor.AnalyseData() - - if isUp == true && monitor.MetricID > 0 { - monitor.config.SendMetric(monitor.MetricID, lag) } } @@ -110,10 +88,6 @@ func (monitor *Monitor) Stop() { close(monitor.stopC) } -func (monitor *Monitor) StopC() <-chan bool { - return monitor.stopC -} - func (monitor *Monitor) Stopped() bool { select { case <-monitor.stopC: @@ -123,11 +97,30 @@ func (monitor *Monitor) Stopped() bool { } } +func (monitor *Monitor) Tick() { + reqStart := getMs() + isUp := monitor.doRequest() + lag := getMs() - reqStart + + if len(monitor.history) == 9 { + monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name) + } + if len(monitor.history) >= 10 { + monitor.history = monitor.history[len(monitor.history)-9:] + } + monitor.history = append(monitor.history, isUp) + monitor.AnalyseData() + + if isUp == true && monitor.MetricID > 0 { + monitor.config.SendMetric(monitor.MetricID, lag) + } +} + func (monitor *Monitor) doRequest() bool { client := &http.Client{ - Timeout: timeout, + Timeout: HttpTimeout, } - if monitor.StrictTLS != nil && *monitor.StrictTLS == false { + if monitor.StrictTLS == false { client.Transport = &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } @@ -135,19 +128,36 @@ func (monitor *Monitor) doRequest() bool { resp, err := client.Get(monitor.URL) if err != nil { - errString := err.Error() - monitor.LastFailReason = &errString + monitor.lastFailReason = err.Error() + return false } defer resp.Body.Close() - if resp.StatusCode != monitor.ExpectedStatusCode { - failReason := "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode) - monitor.LastFailReason = &failReason + if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode { + monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode) + return false } + if monitor.bodyRegexp != nil { + // check body + responseBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + monitor.lastFailReason = err.Error() + + return false + } + + match := monitor.bodyRegexp.Match(responseBody) + if !match { + monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody + } + + return match + } + return true } @@ -155,60 +165,89 @@ func (monitor *Monitor) doRequest() bool { func (monitor *Monitor) AnalyseData() { // look at the past few incidents numDown := 0 - for _, wasUp := range monitor.History { + for _, wasUp := range monitor.history { if wasUp == false { numDown++ } } - t := (float32(numDown) / float32(len(monitor.History))) * 100 - monitor.config.Logger.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.URL, t, time.Now().UnixNano()/int64(time.Second), monitor.Threshold) + t := (float32(numDown) / float32(len(monitor.history))) * 100 + monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second)) - if len(monitor.History) != 10 { - // not enough data + if len(monitor.history) != 10 { + // not saturated return } - if t > monitor.Threshold && monitor.Incident == nil { - // is down, create an incident - monitor.config.Logger.Println("Creating incident...") - - component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) - monitor.Incident = &Incident{ + if t > monitor.Threshold && monitor.incident == nil { + monitor.incident = &Incident{ Name: monitor.Name + " - " + monitor.config.SystemName, - Message: monitor.Name + " check failed", - ComponentID: &component_id, + ComponentID: monitor.ComponentID, + Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat), + Notify: true, } - if monitor.LastFailReason != nil { - monitor.Incident.Message += "\n\n - " + *monitor.LastFailReason + if len(monitor.lastFailReason) > 0 { + monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`" } + // is down, create an incident + monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason) // set investigating status - monitor.Incident.SetInvestigating() - + monitor.incident.SetInvestigating() // create/update incident - monitor.config.SendIncident(monitor.Incident) - monitor.config.UpdateComponent(monitor.Incident) - } else if t < monitor.Threshold && monitor.Incident != nil { - // was down, created an incident, its now ok, make it resolved. - monitor.config.Logger.Println("Updating incident to resolved...") - - component_id := json.Number(strconv.Itoa(*monitor.ComponentID)) - monitor.Incident = &Incident{ - Name: monitor.Incident.Name, - Message: monitor.Name + " check succeeded", - ComponentID: &component_id, + if err := monitor.incident.Send(monitor.config); err != nil { + monitor.config.Logger.Printf("Error sending incident: %v\n", err) } + } else if t < monitor.Threshold && monitor.incident != nil { + // was down, created an incident, its now ok, make it resolved. + monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name) - monitor.Incident.SetFixed() - monitor.config.SendIncident(monitor.Incident) - monitor.config.UpdateComponent(monitor.Incident) + // resolve incident + monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message + monitor.incident.SetFixed() + monitor.incident.Send(monitor.config) - monitor.Incident = nil + monitor.lastFailReason = "" + monitor.incident = nil } } -func getMs() int64 { - return time.Now().UnixNano() / int64(time.Millisecond) +func (monitor *Monitor) ValidateConfiguration() error { + if len(monitor.ExpectedBody) > 0 { + exp, err := regexp.Compile(monitor.ExpectedBody) + if err != nil { + return err + } + + monitor.bodyRegexp = exp + } + + if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 { + return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty") + } + + if monitor.CheckInterval < 1 { + monitor.CheckInterval = DefaultInterval + } + + monitor.Method = strings.ToUpper(monitor.Method) + switch monitor.Method { + case "GET", "POST", "DELETE", "OPTIONS", "HEAD": + break + case "": + monitor.Method = "GET" + default: + return fmt.Errorf("Unsupported check method: %v", monitor.Method) + } + + if monitor.ComponentID == 0 && monitor.MetricID == 0 { + return errors.New("component_id & metric_id are unset") + } + + if monitor.Threshold <= 0 { + monitor.Threshold = 100 + } + + return nil } diff --git a/readme.md b/readme.md index 30831ec..b2fd71d 100644 --- a/readme.md +++ b/readme.md @@ -4,9 +4,9 @@ Features -------- - [x] Creates & Resolves Incidents -- [x] Posts monitor lag (interval configurable) +- [x] Posts monitor lag to cachet graphs - [x] Updates Component to Partial Outage -- [x] Updates Component to Major Outage if in Partial Outage +- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring) - [x] Can be run on multiple servers and geo regions Configuration @@ -14,36 +14,26 @@ Configuration ``` { - "api_url": "https://demo.cachethq.io/api/v1", - "api_token": "", - "interval": 60, - "monitors": [ - { - "name": "Name of your monitor", - "url": "Ping URL", - "metric_id": , - "component_id": , - "threshold": 80, - "expected_status_code": 200, - "strict_tls": true, - "interval": 5 - } - ], - "insecure_api": false + "api_url": "https:///api/v1", + "api_token": "", + "insecure_api": false, // optional, false default, set if your certificate is self-signed/untrusted + "monitors": [{ + "name": "Name of your monitor", // required, friendly name for your monitor + "url": "Ping URL", // required, url to probe + "method": "get", // optional, http method (defaults GET) + "strict_tls": true, // self-signed ssl certificate + "interval": 10, // seconds between checks + "metric_id": , // post lag to cachet metric (graph) + "component_id": , // post incidents to this component + "threshold": 80, // If % of downtime is over this threshold, open an incident + "expected_status_code": 200, // optional, expected status code (either status code or body must be supplied) + "expected_body": "P.*NG" // optional, regular expression + }], + "system_name": "", // optional, system name to identify bot + "log_path": "" // optional, defaults to stdout } ``` -*Notes:* - -- `metric_id` is optional -- `insecure_api` if true it will ignore HTTPS certificate errors (eg if self-signed) -- `strict_tls` if false (true is default) it will ignore HTTPS certificate errors (eg if monitor uses self-signed certificate) -- `component_id` is optional -- `threshold` is a percentage -- `expected_status_code` is a http response code -- `interval` is the duration in seconds between two checks. -- GET request will be performed on the `url` - Installation ------------ @@ -63,11 +53,11 @@ Usage of cachet-monitor: Environment variables --------------------- -| Name | Example Value | Description | -| ------------ | --------------------------- | --------------------------- | -| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | -| CACHET_TOKEN | randomvalue | API Authentication token | -| CACHET_DEV | 1 | Strips logging | +| Name | Example Value | Description | +| ------------ | ------------------------------ | --------------------------- | +| CACHET_API | http://demo.cachethq.io/api/v1 | URL endpoint for cachet api | +| CACHET_TOKEN | APIToken123 | API Authentication token | +| CACHET_DEV | 1 | Strips logging | Vision and goals ---------------- From 6e9f5440ba4cc1c94f53715b1f894aa5af62b420 Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 19:18:02 +0100 Subject: [PATCH 39/40] Update documentation --- http.go | 6 +++--- monitor.go | 2 +- readme.md | 46 +++++++++++++++++++++++++++++++--------------- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/http.go b/http.go index 602d9d1..3391174 100644 --- a/http.go +++ b/http.go @@ -36,8 +36,8 @@ func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBod } // SendMetric sends lag metric point -func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { - if metricID <= 0 { +func (monitor *Monitor) SendMetric(delay int64) error { + if monitor.MetricID == 0 { return nil } @@ -45,7 +45,7 @@ func (monitor *CachetMonitor) SendMetric(metricID int, delay int64) error { "value": delay, }) - resp, _, err := monitor.makeRequest("POST", "/metrics/"+strconv.Itoa(metricID)+"/points", jsonBytes) + resp, _, err := monitor.config.makeRequest("POST", "/metrics/"+strconv.Itoa(monitor.MetricID)+"/points", jsonBytes) if err != nil || resp.StatusCode != 200 { return fmt.Errorf("Could not log data point!\n%v\n", err) } diff --git a/monitor.go b/monitor.go index 93b3de5..b556d0b 100644 --- a/monitor.go +++ b/monitor.go @@ -112,7 +112,7 @@ func (monitor *Monitor) Tick() { monitor.AnalyseData() if isUp == true && monitor.MetricID > 0 { - monitor.config.SendMetric(monitor.MetricID, lag) + monitor.SendMetric(lag) } } diff --git a/readme.md b/readme.md index b2fd71d..aa0a9fb 100644 --- a/readme.md +++ b/readme.md @@ -14,34 +14,50 @@ Configuration ``` { + // URL for the API. Note: Must end with /api/v1 "api_url": "https:///api/v1", + // Your API token for Cachet "api_token": "", - "insecure_api": false, // optional, false default, set if your certificate is self-signed/untrusted + // optional, false default, set if your certificate is self-signed/untrusted + "insecure_api": false, "monitors": [{ - "name": "Name of your monitor", // required, friendly name for your monitor - "url": "Ping URL", // required, url to probe - "method": "get", // optional, http method (defaults GET) - "strict_tls": true, // self-signed ssl certificate - "interval": 10, // seconds between checks - "metric_id": , // post lag to cachet metric (graph) - "component_id": , // post incidents to this component - "threshold": 80, // If % of downtime is over this threshold, open an incident - "expected_status_code": 200, // optional, expected status code (either status code or body must be supplied) - "expected_body": "P.*NG" // optional, regular expression + // required, friendly name for your monitor + "name": "Name of your monitor", + // required, url to probe + "url": "Ping URL", + // optional, http method (defaults GET) + "method": "get", + // self-signed ssl certificate + "strict_tls": true, + // seconds between checks + "interval": 10, + // post lag to cachet metric (graph) + // note either metric ID or component ID are required + "metric_id": , + // post incidents to this component + "component_id": , + // If % of downtime is over this threshold, open an incident + "threshold": 80, + // optional, expected status code (either status code or body must be supplied) + "expected_status_code": 200, + // optional, regular expression to match body content + "expected_body": "P.*NG" }], - "system_name": "", // optional, system name to identify bot - "log_path": "" // optional, defaults to stdout + // optional, system name to identify bot (uses hostname by default) + "system_name": "", + // optional, defaults to stdout + "log_path": "" } ``` Installation ------------ -1. Download binary from release page +1. Download binary from [release page](https://github.com/CastawayLabs/cachet-monitor/releases) 2. Create your configuration ([example](https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json)) 3. `cachet-monitor -c /etc/cachet-monitor.config.json` -tip: run in background using `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` +pro tip: run in background using `nohup cachet-monitor 2>&1 > /var/log/cachet-monitor.log &` ``` Usage of cachet-monitor: From 9d0e1766950885a0b74d0419b170489025dac34d Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Thu, 19 May 2016 19:26:29 +0100 Subject: [PATCH 40/40] Add to readme, remove comment --- monitor.go | 5 ++--- readme.md | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/monitor.go b/monitor.go index b556d0b..fbe7927 100644 --- a/monitor.go +++ b/monitor.go @@ -29,9 +29,8 @@ type Monitor struct { ComponentID int `json:"component_id"` // Threshold = percentage - Threshold float32 `json:"threshold"` - // Saturat - ExpectedStatusCode int `json:"expected_status_code"` + Threshold float32 `json:"threshold"` + ExpectedStatusCode int `json:"expected_status_code"` // compiled to Regexp ExpectedBody string `json:"expected_body"` bodyRegexp *regexp.Regexp diff --git a/readme.md b/readme.md index aa0a9fb..703696b 100644 --- a/readme.md +++ b/readme.md @@ -82,3 +82,10 @@ We made this tool because we felt the need to have our own monitoring software ( The idea is a stateless program which collects data and pushes it to a central cachet instance. This gives us power to have an army of geographically distributed loggers and reveal issues in both latency & downtime on client websites. + +Package usage +------------- + +When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside. + +[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)