From 92d8791a4f0ec393aabcc1f24e2fa372a896676c Mon Sep 17 00:00:00 2001 From: Matej Kramny Date: Mon, 16 Mar 2015 21:02:29 +0100 Subject: [PATCH] Log & resolve incidents, add readme --- cachet/incident.go | 82 ++++++++++++++++++++++++++++++++++++++++++++-- cachet/monitor.go | 30 ++++++++++++----- main.go | 17 ++++++++-- readme.md | 18 ++++++++++ 4 files changed, 133 insertions(+), 14 deletions(-) create mode 100644 readme.md diff --git a/cachet/incident.go b/cachet/incident.go index c4a177e..a9bc815 100644 --- a/cachet/incident.go +++ b/cachet/incident.go @@ -1,7 +1,12 @@ package cachet import ( - "time" + "fmt" + "bytes" + "io/ioutil" + "strconv" + "net/http" + "encoding/json" ) type Incident struct { @@ -12,6 +17,77 @@ type Incident struct { Human_status string `json:"human_status"` Component *Component `json:"component"` Component_id *int `json:"component_id"` - Created_at *time.Time `json:"created_at"` - Updated_at *time.Time `json:"updated_at"` + Created_at int `json:"created_at"` + Updated_at int `json:"updated_at"` +} + +type IncidentData struct { + Incident Incident `json:"data"` +} + +func (incident *Incident) Send() { + jsonBytes, err := json.Marshal(incident) + if err != nil { + panic(err) + } + + var req *http.Request + if incident.Id == 0 { + req, err = http.NewRequest("POST", apiUrl + "/incidents", bytes.NewBuffer(jsonBytes)) + } else { + req, err = http.NewRequest("PUT", apiUrl + "/incidents/" + strconv.Itoa(incident.Id), bytes.NewBuffer(jsonBytes)) + } + + if err != nil { + panic(err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Cachet-Token", apiToken) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + fmt.Println(strconv.Itoa(resp.StatusCode) + " " + string(body)) + + var data IncidentData + err = json.Unmarshal(body, &data) + if err != nil { + fmt.Println("Cannot parse incident body.") + panic(err) + } else { + incident.Id = data.Incident.Id + } + + fmt.Println("ID:"+strconv.Itoa(incident.Id)) + + if resp.StatusCode != 200 { + fmt.Println("Could not create/update incident!") + } +} + +func (incident *Incident) SetInvestigating() { + incident.Status = 1 + incident.Human_status = "Investigating" +} + +func (incident *Incident) SetIdentified() { + incident.Status = 2 + incident.Human_status = "Identified" +} + +func (incident *Incident) SetWatching() { + incident.Status = 3 + incident.Human_status = "Watching" +} + +func (incident *Incident) SetFixed() { + incident.Status = 4 + incident.Human_status = "Fixed" } \ No newline at end of file diff --git a/cachet/monitor.go b/cachet/monitor.go index e023cbb..e08cc0c 100644 --- a/cachet/monitor.go +++ b/cachet/monitor.go @@ -9,6 +9,7 @@ import ( const timeout = time.Duration(time.Second) type Monitor struct { + Name string `json:"name"` Url string `json:"url"` MetricId int `json:"metric_id"` Threshold float32 `json:"threshold"` @@ -30,9 +31,8 @@ func (monitor *Monitor) Run() { monitor.History = append(monitor.History, isUp) monitor.AnalyseData() - if isUp == true { + if isUp == true && monitor.MetricId > 0 { SendMetric(monitor.MetricId, lag) - return } } @@ -52,11 +52,6 @@ func (monitor *Monitor) doRequest() bool { func (monitor *Monitor) AnalyseData() { // look at the past few incidents - if len(monitor.History) != 10 { - // not enough data - return - } - numDown := 0 for _, wasUp := range monitor.History { if wasUp == false { @@ -65,14 +60,31 @@ func (monitor *Monitor) AnalyseData() { } t := (float32(numDown) / float32(len(monitor.History))) * 100 - fmt.Printf("%s %.2f%% Down. Threshold: %.2f%%\n", monitor.Url, t, monitor.Threshold) + fmt.Printf("%s %.2f%% Down at %v. Threshold: %.2f%%\n", monitor.Url, t, time.Now().UnixNano() / int64(time.Second), monitor.Threshold) + + if len(monitor.History) != 10 { + // not enough data + return + } + if t > monitor.Threshold && monitor.Incident == nil { // is down, create an incident fmt.Println("Creating incident...") - monitor.Incident = &Incident{} + + monitor.Incident = &Incident{ + Name: monitor.Name, + Message: monitor.Name + " is unreachable.", + } + + monitor.Incident.SetInvestigating() + monitor.Incident.Send() } else if t < monitor.Threshold && monitor.Incident != nil { // was down, created an incident, its now ok, make it resolved. fmt.Println("Updating incident to resolved...") + + monitor.Incident.SetFixed() + monitor.Incident.Send() + monitor.Incident = nil } } diff --git a/main.go b/main.go index c3ce774..bb44ac3 100644 --- a/main.go +++ b/main.go @@ -1,25 +1,38 @@ package main import ( + "fmt" "time" - "./cachet" + "github.com/castawaylabs/cachet-monitor/cachet" ) func main() { monitors := []*cachet.Monitor{ /*&cachet.Monitor{ + Name: "nodegear frontend", Url: "https://nodegear.io/ping", MetricId: 1, Threshold: 80.0, + ExpectedStatusCode: 200, },*/ &cachet.Monitor{ + Name: "local test server", Url: "http://localhost:1337", - MetricId: 1, Threshold: 80.0, ExpectedStatusCode: 200, }, } + fmt.Printf("Starting %d monitors:\n", len(monitors)) + for _, monitor := range monitors { + fmt.Printf(" %s: GET %s & Expect HTTP %d\n", monitor.Name, monitor.Url, monitor.ExpectedStatusCode) + if monitor.MetricId > 0 { + fmt.Printf(" - Logs lag to metric id: %d\n", monitor.MetricId) + } + } + + fmt.Println() + ticker := time.NewTicker(time.Second) for _ = range ticker.C { for _, monitor := range monitors { diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..ea29b12 --- /dev/null +++ b/readme.md @@ -0,0 +1,18 @@ +Cachet Monitor plugin +===================== + +This is a monitoring plugin for CachetHQ. + +How to run: +----------- + +1. Set up [Go](https://golang.org) +2. `go install github.com/castawaylabs/cachet-monitor` +3. `cachet-monitor` + +Environment variables: +---------------------- + +| Name | Example Value | Description | +| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api | +| CACHET_TOKEN | randomvalue | API Authentication token | \ No newline at end of file