14 Commits

Author SHA1 Message Date
Alan Campbell
5793df7353 Update go install guide 2016-03-07 16:31:24 -05:00
Matej Kramny
fae03e7561 Merge pull request #10 from studiofrenetic/master
Add time interval
2015-12-08 13:02:12 +00:00
Matej Kramny
bcf53f0afc Merge pull request #17 from Soulou/patch-1
Fix configuration example in README
2015-08-24 00:08:36 +01:00
Soulou
3c1e2cd452 Fix configuration example in README 2015-08-22 19:13:27 +02:00
Matej Kramny
270dbd361b Update documentation for api url 2015-07-19 21:44:21 +01:00
Matej Kramny
a83cf43e60 Fix v1 api url 2015-07-19 21:43:59 +01:00
Matej Kramny
8b0bc42d50 Add screenshot to readme 2015-07-19 21:38:20 +01:00
Matej Kramny
b609679993 Report new incident and set as fixed 2015-07-19 21:32:26 +01:00
Mathieu Doyon
850f4d237b Update interval to 5 secs 2015-07-19 16:23:49 -04:00
Mathieu Doyon
019bc8c057 Fix merge conflict 2015-07-19 16:23:18 -04:00
Matej Kramny
a710944218 Merge pull request #15 from CastawayLabs/update-cachet-api
Fix API-related crashes, improve fail messages
2015-07-19 20:31:35 +01:00
Matej Kramny
2b4097e90a Update example config with default values 2015-07-19 20:27:26 +01:00
Matej Kramny
7a5ad278bb Improve fail reasons, fix api crashes
- Add options about TLS verification
- Fix crashes when cachet presents IDs as a string
- Improve fail reasons
2015-07-19 20:25:34 +01:00
Mathieu Doyon
f918ea38cd Add time interval 2015-04-08 14:28:36 -04:00
8 changed files with 112 additions and 82 deletions

View File

@@ -1,15 +1,17 @@
package cachet
import "encoding/json"
// Component Cachet model
type Component struct {
ID int `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Status int `json:"status_id"`
HumanStatus string `json:"-"`
IncidentCount int `json:"-"`
CreatedAt int `json:"created_at"`
UpdatedAt int `json:"updated_at"`
ID json.Number `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Status json.Number `json:"status_id"`
HumanStatus string `json:"-"`
IncidentCount int `json:"-"`
CreatedAt *string `json:"created_at"`
UpdatedAt *string `json:"updated_at"`
}
// ComponentData json response model

View File

@@ -4,13 +4,14 @@ import (
"encoding/json"
"flag"
"fmt"
"github.com/castawaylabs/cachet-monitor/system"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"github.com/castawaylabs/cachet-monitor/system"
)
// Static config
@@ -21,11 +22,13 @@ var Logger *log.Logger
// CachetConfig is the monitoring tool configuration
type CachetConfig struct {
APIUrl string `json:"api_url"`
APIToken string `json:"api_token"`
Monitors []*Monitor `json:"monitors"`
SystemName string `json:"system_name"`
LogPath string `json:"log_path"`
APIUrl string `json:"api_url"`
APIToken string `json:"api_token"`
Interval int64 `json:"interval"`
Monitors []*Monitor `json:"monitors"`
SystemName string `json:"system_name"`
LogPath string `json:"log_path"`
InsecureAPI bool `json:"insecure_api"`
}
func init() {
@@ -107,7 +110,7 @@ func init() {
}
}
flags := log.Llongfile|log.Ldate|log.Ltime
flags := log.Llongfile | log.Ldate | log.Ltime
if len(os.Getenv("DEVELOPMENT")) > 0 {
flags = 0
}

View File

@@ -7,15 +7,15 @@ import (
// Incident Cachet data model
type Incident struct {
ID int `json:"id"`
Name string `json:"name"`
Message string `json:"message"`
Status int `json:"status"` // 4?
HumanStatus string `json:"human_status"`
Component *Component `json:"-"`
ComponentID *int `json:"component_id"`
CreatedAt int `json:"created_at"`
UpdatedAt int `json:"updated_at"`
ID json.Number `json:"id"`
Name string `json:"name"`
Message string `json:"message"`
Status json.Number `json:"status"` // 4?
HumanStatus string `json:"human_status"`
Component *Component `json:"-"`
ComponentID *json.Number `json:"component_id"`
CreatedAt *string `json:"created_at"`
UpdatedAt *string `json:"updated_at"`
}
// IncidentData is a response when creating/updating an incident
@@ -40,6 +40,7 @@ func GetIncidents() []Incident {
err = json.Unmarshal(body, &data)
if err != nil {
Logger.Printf("Cannot parse incidents: %v\n", err)
panic(err)
}
return data.Incidents
@@ -47,17 +48,19 @@ func GetIncidents() []Incident {
// Send - Create or Update incident
func (incident *Incident) Send() {
jsonBytes, err := json.Marshal(incident)
if err != nil {
Logger.Printf("Cannot encode incident: %v\n", err)
return
}
jsonBytes, _ := json.Marshal(map[string]interface{}{
"name": incident.Name,
"message": incident.Message,
"status": incident.Status,
"component_id": incident.ComponentID,
"notify": true,
})
requestType := "POST"
requestURL := "/incidents"
if incident.ID > 0 {
if len(incident.ID) > 0 {
requestType = "PUT"
requestURL += "/" + strconv.Itoa(incident.ID)
requestURL += "/" + string(incident.ID)
}
resp, body, err := makeRequest(requestType, requestURL, jsonBytes)
@@ -71,7 +74,7 @@ func (incident *Incident) Send() {
var data IncidentData
err = json.Unmarshal(body, &data)
if err != nil {
Logger.Println("Cannot parse incident body.")
Logger.Println("Cannot parse incident body.", string(body))
panic(err)
} else {
incident.ID = data.Incident.ID
@@ -83,22 +86,8 @@ func (incident *Incident) Send() {
}
}
// GetSimilarIncidentID gets the same incident.
// Updates incident.ID
func (incident *Incident) GetSimilarIncidentID() {
incidents := GetIncidents()
for _, inc := range incidents {
if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.HumanStatus == inc.HumanStatus {
incident.ID = inc.ID
Logger.Printf("Updated incident id to %v\n", inc.ID)
break
}
}
}
func (incident *Incident) fetchComponent() error {
_, body, err := makeRequest("GET", "/components/" + strconv.Itoa(*incident.ComponentID), nil)
_, body, err := makeRequest("GET", "/components/"+string(*incident.ComponentID), nil)
if err != nil {
return err
}
@@ -106,7 +95,7 @@ func (incident *Incident) fetchComponent() error {
var data ComponentData
err = json.Unmarshal(body, &data)
if err != nil {
Logger.Println("Cannot parse component body.")
Logger.Println("Cannot parse component body. %v", string(body))
panic(err)
}
@@ -116,7 +105,7 @@ func (incident *Incident) fetchComponent() error {
}
func (incident *Incident) UpdateComponent() {
if incident.ComponentID == nil || *incident.ComponentID == 0 {
if incident.ComponentID == nil || len(*incident.ComponentID) == 0 {
return
}
@@ -128,22 +117,23 @@ func (incident *Incident) UpdateComponent() {
}
}
switch incident.Status {
status, _ := strconv.Atoi(string(incident.Status))
switch status {
case 1, 2, 3:
if incident.Component.Status == 3 {
incident.Component.Status = 4
if incident.Component.Status == "3" {
incident.Component.Status = "4"
} else {
incident.Component.Status = 3
incident.Component.Status = "3"
}
case 4:
incident.Component.Status = 1
incident.Component.Status = "1"
}
jsonBytes, _ := json.Marshal(map[string]interface{}{
"status": incident.Component.Status,
})
resp, _, err := makeRequest("PUT", "/components/" + strconv.Itoa(incident.Component.ID), jsonBytes)
resp, _, err := makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes)
if err != nil || resp.StatusCode != 200 {
Logger.Printf("Could not update component: (resp code %d) %v", resp.StatusCode, err)
return
@@ -152,24 +142,24 @@ func (incident *Incident) UpdateComponent() {
// SetInvestigating sets status to Investigating
func (incident *Incident) SetInvestigating() {
incident.Status = 1
incident.Status = "1"
incident.HumanStatus = "Investigating"
}
// SetIdentified sets status to Identified
func (incident *Incident) SetIdentified() {
incident.Status = 2
incident.Status = "2"
incident.HumanStatus = "Identified"
}
// SetWatching sets status to Watching
func (incident *Incident) SetWatching() {
incident.Status = 3
incident.Status = "3"
incident.HumanStatus = "Watching"
}
// SetFixed sets status to Fixed
func (incident *Incident) SetFixed() {
incident.Status = 4
incident.Status = "4"
incident.HumanStatus = "Fixed"
}

View File

@@ -1,7 +1,10 @@
package cachet
import (
"crypto/tls"
"encoding/json"
"net/http"
"strconv"
"time"
)
@@ -15,6 +18,7 @@ type Monitor struct {
Threshold float32 `json:"threshold"`
ComponentID *int `json:"component_id"`
ExpectedStatusCode int `json:"expected_status_code"`
StrictTLS *bool `json:"strict_tls"`
History []bool `json:"-"`
LastFailReason *string `json:"-"`
@@ -42,6 +46,12 @@ func (monitor *Monitor) doRequest() bool {
client := &http.Client{
Timeout: timeout,
}
if monitor.StrictTLS != nil && *monitor.StrictTLS == false {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
resp, err := client.Get(monitor.URL)
if err != nil {
errString := err.Error()
@@ -51,7 +61,13 @@ func (monitor *Monitor) doRequest() bool {
defer resp.Body.Close()
return resp.StatusCode == monitor.ExpectedStatusCode
if resp.StatusCode != monitor.ExpectedStatusCode {
failReason := "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
monitor.LastFailReason = &failReason
return false
}
return true
}
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
@@ -76,22 +92,20 @@ func (monitor *Monitor) AnalyseData() {
// is down, create an incident
Logger.Println("Creating incident...")
component_id := json.Number(strconv.Itoa(*monitor.ComponentID))
monitor.Incident = &Incident{
Name: monitor.Name + " - " + Config.SystemName,
Message: monitor.Name + " failed",
ComponentID: monitor.ComponentID,
Message: monitor.Name + " check failed",
ComponentID: &component_id,
}
if monitor.LastFailReason != nil {
monitor.Incident.Message += "\n\n" + *monitor.LastFailReason
monitor.Incident.Message += "\n\n - " + *monitor.LastFailReason
}
// set investigating status
monitor.Incident.SetInvestigating()
// lookup relevant incident
monitor.Incident.GetSimilarIncidentID()
// create/update incident
monitor.Incident.Send()
monitor.Incident.UpdateComponent()
@@ -99,8 +113,12 @@ func (monitor *Monitor) AnalyseData() {
// was down, created an incident, its now ok, make it resolved.
Logger.Println("Updating incident to resolved...")
// Add resolved message
monitor.Incident.Message += "\n\n-\n\nResolved at " + time.Now().String()
component_id := json.Number(strconv.Itoa(*monitor.ComponentID))
monitor.Incident = &Incident{
Name: monitor.Incident.Name,
Message: monitor.Name + " check succeeded",
ComponentID: &component_id,
}
monitor.Incident.SetFixed()
monitor.Incident.Send()

View File

@@ -2,6 +2,7 @@ package cachet
import (
"bytes"
"crypto/tls"
"io/ioutil"
"net/http"
)
@@ -13,6 +14,12 @@ func makeRequest(requestType string, url string, reqBody []byte) (*http.Response
req.Header.Set("X-Cachet-Token", Config.APIToken)
client := &http.Client{}
if Config.InsecureAPI == true {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
res, err := client.Do(req)
if err != nil {
return nil, []byte{}, err

View File

@@ -1,6 +1,7 @@
{
"api_url": "https://demo.cachethq.io/api",
"api_url": "https://demo.cachethq.io/api/v1",
"api_token": "9yMHsdioQosnyVK4iCVR",
"interval": 5,
"monitors": [
{
"name": "nodegear frontend",
@@ -8,7 +9,9 @@
"metric_id": 1,
"threshold": 80,
"component_id": null,
"expected_status_code": 200
"expected_status_code": 200,
"strict_tls": true
}
]
}
],
"insecure_api": false
}

View File

@@ -9,7 +9,7 @@ func main() {
config := cachet.Config
log := cachet.Logger
log.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl)
log.Printf("System: %s, Interval: %d second(s), API: %s\n", config.SystemName, config.Interval, config.APIUrl)
log.Printf("Starting %d monitors:\n", len(config.Monitors))
for _, mon := range config.Monitors {
log.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode)
@@ -20,7 +20,7 @@ func main() {
log.Println()
ticker := time.NewTicker(time.Second)
ticker := time.NewTicker(time.Duration(config.Interval)*time.Second)
for range ticker.C {
for _, mon := range config.Monitors {
go mon.Run()

View File

@@ -3,11 +3,13 @@ Cachet Monitor plugin
This is a monitoring plugin for CachetHQ.
![screenshot](https://castawaylabs.github.io/cachet-monitor/screenshot.png)
Features
--------
- [x] Creates & Resolves Incidents
- [x] Posts monitor lag every second
- [x] Posts monitor lag every second * config.Interval
- [x] Updates Component to Partial Outage
- [x] Updates Component to Major Outage if in Partial Outage
- [x] Can be run on multiple servers and geo regions
@@ -30,8 +32,9 @@ Configuration
```
{
"api_url": "https://demo.cachethq.io/api",
"api_url": "https://demo.cachethq.io/api/v1",
"api_token": "9yMHsdioQosnyVK4iCVR",
"interval": 60,
"monitors": [
{
"name": "nodegear frontend",
@@ -39,16 +42,19 @@ Configuration
"metric_id": 0,
"component_id": 0,
"threshold": 80,
"component_id": null,
"expected_status_code": 200
"expected_status_code": 200,
"strict_tls": true
}
]
],
"insecure_api": false
}
```
*Notes:*
- `metric_id` is optional
- `insecure_api` if true it will ignore HTTPS certificate errors (eg if self-signed)
- `strict_tls` if false (true is default) it will ignore HTTPS certificate errors (eg if monitor uses self-signed certificate)
- `component_id` is optional
- `threshold` is a percentage
- `expected_status_code` is a http response code
@@ -60,8 +66,9 @@ How to run
Example:
1. Set up [Go](https://golang.org)
2. `go install github.com/castawaylabs/cachet-monitor`
3. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json`
2. `go get -d github.com/castawaylabs/cachet-monitor`
3. `go install github.com/castawaylabs/cachet-monitor`
4. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json`
Production:
@@ -81,4 +88,4 @@ Environment variables
| Name | Example Value | Description |
| ------------ | --------------------------- | --------------------------- |
| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api |
| CACHET_TOKEN | randomvalue | API Authentication token |
| CACHET_TOKEN | randomvalue | API Authentication token |