Compare commits
14 Commits
v1.0.0
...
fix-readme
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5793df7353 | ||
|
|
fae03e7561 | ||
|
|
bcf53f0afc | ||
|
|
3c1e2cd452 | ||
|
|
270dbd361b | ||
|
|
a83cf43e60 | ||
|
|
8b0bc42d50 | ||
|
|
b609679993 | ||
|
|
850f4d237b | ||
|
|
019bc8c057 | ||
|
|
a710944218 | ||
|
|
2b4097e90a | ||
|
|
7a5ad278bb | ||
|
|
f918ea38cd |
@@ -1,15 +1,17 @@
|
||||
package cachet
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
// Component Cachet model
|
||||
type Component struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Status int `json:"status_id"`
|
||||
HumanStatus string `json:"-"`
|
||||
IncidentCount int `json:"-"`
|
||||
CreatedAt int `json:"created_at"`
|
||||
UpdatedAt int `json:"updated_at"`
|
||||
ID json.Number `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Status json.Number `json:"status_id"`
|
||||
HumanStatus string `json:"-"`
|
||||
IncidentCount int `json:"-"`
|
||||
CreatedAt *string `json:"created_at"`
|
||||
UpdatedAt *string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// ComponentData json response model
|
||||
|
||||
@@ -4,13 +4,14 @@ import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/castawaylabs/cachet-monitor/system"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
|
||||
"github.com/castawaylabs/cachet-monitor/system"
|
||||
)
|
||||
|
||||
// Static config
|
||||
@@ -21,11 +22,13 @@ var Logger *log.Logger
|
||||
|
||||
// CachetConfig is the monitoring tool configuration
|
||||
type CachetConfig struct {
|
||||
APIUrl string `json:"api_url"`
|
||||
APIToken string `json:"api_token"`
|
||||
Monitors []*Monitor `json:"monitors"`
|
||||
SystemName string `json:"system_name"`
|
||||
LogPath string `json:"log_path"`
|
||||
APIUrl string `json:"api_url"`
|
||||
APIToken string `json:"api_token"`
|
||||
Interval int64 `json:"interval"`
|
||||
Monitors []*Monitor `json:"monitors"`
|
||||
SystemName string `json:"system_name"`
|
||||
LogPath string `json:"log_path"`
|
||||
InsecureAPI bool `json:"insecure_api"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -107,7 +110,7 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
flags := log.Llongfile|log.Ldate|log.Ltime
|
||||
flags := log.Llongfile | log.Ldate | log.Ltime
|
||||
if len(os.Getenv("DEVELOPMENT")) > 0 {
|
||||
flags = 0
|
||||
}
|
||||
|
||||
@@ -7,15 +7,15 @@ import (
|
||||
|
||||
// Incident Cachet data model
|
||||
type Incident struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Message string `json:"message"`
|
||||
Status int `json:"status"` // 4?
|
||||
HumanStatus string `json:"human_status"`
|
||||
Component *Component `json:"-"`
|
||||
ComponentID *int `json:"component_id"`
|
||||
CreatedAt int `json:"created_at"`
|
||||
UpdatedAt int `json:"updated_at"`
|
||||
ID json.Number `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Message string `json:"message"`
|
||||
Status json.Number `json:"status"` // 4?
|
||||
HumanStatus string `json:"human_status"`
|
||||
Component *Component `json:"-"`
|
||||
ComponentID *json.Number `json:"component_id"`
|
||||
CreatedAt *string `json:"created_at"`
|
||||
UpdatedAt *string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// IncidentData is a response when creating/updating an incident
|
||||
@@ -40,6 +40,7 @@ func GetIncidents() []Incident {
|
||||
err = json.Unmarshal(body, &data)
|
||||
if err != nil {
|
||||
Logger.Printf("Cannot parse incidents: %v\n", err)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return data.Incidents
|
||||
@@ -47,17 +48,19 @@ func GetIncidents() []Incident {
|
||||
|
||||
// Send - Create or Update incident
|
||||
func (incident *Incident) Send() {
|
||||
jsonBytes, err := json.Marshal(incident)
|
||||
if err != nil {
|
||||
Logger.Printf("Cannot encode incident: %v\n", err)
|
||||
return
|
||||
}
|
||||
jsonBytes, _ := json.Marshal(map[string]interface{}{
|
||||
"name": incident.Name,
|
||||
"message": incident.Message,
|
||||
"status": incident.Status,
|
||||
"component_id": incident.ComponentID,
|
||||
"notify": true,
|
||||
})
|
||||
|
||||
requestType := "POST"
|
||||
requestURL := "/incidents"
|
||||
if incident.ID > 0 {
|
||||
if len(incident.ID) > 0 {
|
||||
requestType = "PUT"
|
||||
requestURL += "/" + strconv.Itoa(incident.ID)
|
||||
requestURL += "/" + string(incident.ID)
|
||||
}
|
||||
|
||||
resp, body, err := makeRequest(requestType, requestURL, jsonBytes)
|
||||
@@ -71,7 +74,7 @@ func (incident *Incident) Send() {
|
||||
var data IncidentData
|
||||
err = json.Unmarshal(body, &data)
|
||||
if err != nil {
|
||||
Logger.Println("Cannot parse incident body.")
|
||||
Logger.Println("Cannot parse incident body.", string(body))
|
||||
panic(err)
|
||||
} else {
|
||||
incident.ID = data.Incident.ID
|
||||
@@ -83,22 +86,8 @@ func (incident *Incident) Send() {
|
||||
}
|
||||
}
|
||||
|
||||
// GetSimilarIncidentID gets the same incident.
|
||||
// Updates incident.ID
|
||||
func (incident *Incident) GetSimilarIncidentID() {
|
||||
incidents := GetIncidents()
|
||||
|
||||
for _, inc := range incidents {
|
||||
if incident.Name == inc.Name && incident.Message == inc.Message && incident.Status == inc.Status && incident.HumanStatus == inc.HumanStatus {
|
||||
incident.ID = inc.ID
|
||||
Logger.Printf("Updated incident id to %v\n", inc.ID)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (incident *Incident) fetchComponent() error {
|
||||
_, body, err := makeRequest("GET", "/components/" + strconv.Itoa(*incident.ComponentID), nil)
|
||||
_, body, err := makeRequest("GET", "/components/"+string(*incident.ComponentID), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -106,7 +95,7 @@ func (incident *Incident) fetchComponent() error {
|
||||
var data ComponentData
|
||||
err = json.Unmarshal(body, &data)
|
||||
if err != nil {
|
||||
Logger.Println("Cannot parse component body.")
|
||||
Logger.Println("Cannot parse component body. %v", string(body))
|
||||
panic(err)
|
||||
}
|
||||
|
||||
@@ -116,7 +105,7 @@ func (incident *Incident) fetchComponent() error {
|
||||
}
|
||||
|
||||
func (incident *Incident) UpdateComponent() {
|
||||
if incident.ComponentID == nil || *incident.ComponentID == 0 {
|
||||
if incident.ComponentID == nil || len(*incident.ComponentID) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -128,22 +117,23 @@ func (incident *Incident) UpdateComponent() {
|
||||
}
|
||||
}
|
||||
|
||||
switch incident.Status {
|
||||
status, _ := strconv.Atoi(string(incident.Status))
|
||||
switch status {
|
||||
case 1, 2, 3:
|
||||
if incident.Component.Status == 3 {
|
||||
incident.Component.Status = 4
|
||||
if incident.Component.Status == "3" {
|
||||
incident.Component.Status = "4"
|
||||
} else {
|
||||
incident.Component.Status = 3
|
||||
incident.Component.Status = "3"
|
||||
}
|
||||
case 4:
|
||||
incident.Component.Status = 1
|
||||
incident.Component.Status = "1"
|
||||
}
|
||||
|
||||
jsonBytes, _ := json.Marshal(map[string]interface{}{
|
||||
"status": incident.Component.Status,
|
||||
})
|
||||
|
||||
resp, _, err := makeRequest("PUT", "/components/" + strconv.Itoa(incident.Component.ID), jsonBytes)
|
||||
resp, _, err := makeRequest("PUT", "/components/"+string(incident.Component.ID), jsonBytes)
|
||||
if err != nil || resp.StatusCode != 200 {
|
||||
Logger.Printf("Could not update component: (resp code %d) %v", resp.StatusCode, err)
|
||||
return
|
||||
@@ -152,24 +142,24 @@ func (incident *Incident) UpdateComponent() {
|
||||
|
||||
// SetInvestigating sets status to Investigating
|
||||
func (incident *Incident) SetInvestigating() {
|
||||
incident.Status = 1
|
||||
incident.Status = "1"
|
||||
incident.HumanStatus = "Investigating"
|
||||
}
|
||||
|
||||
// SetIdentified sets status to Identified
|
||||
func (incident *Incident) SetIdentified() {
|
||||
incident.Status = 2
|
||||
incident.Status = "2"
|
||||
incident.HumanStatus = "Identified"
|
||||
}
|
||||
|
||||
// SetWatching sets status to Watching
|
||||
func (incident *Incident) SetWatching() {
|
||||
incident.Status = 3
|
||||
incident.Status = "3"
|
||||
incident.HumanStatus = "Watching"
|
||||
}
|
||||
|
||||
// SetFixed sets status to Fixed
|
||||
func (incident *Incident) SetFixed() {
|
||||
incident.Status = 4
|
||||
incident.Status = "4"
|
||||
incident.HumanStatus = "Fixed"
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
package cachet
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -15,6 +18,7 @@ type Monitor struct {
|
||||
Threshold float32 `json:"threshold"`
|
||||
ComponentID *int `json:"component_id"`
|
||||
ExpectedStatusCode int `json:"expected_status_code"`
|
||||
StrictTLS *bool `json:"strict_tls"`
|
||||
|
||||
History []bool `json:"-"`
|
||||
LastFailReason *string `json:"-"`
|
||||
@@ -42,6 +46,12 @@ func (monitor *Monitor) doRequest() bool {
|
||||
client := &http.Client{
|
||||
Timeout: timeout,
|
||||
}
|
||||
if monitor.StrictTLS != nil && *monitor.StrictTLS == false {
|
||||
client.Transport = &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
}
|
||||
}
|
||||
|
||||
resp, err := client.Get(monitor.URL)
|
||||
if err != nil {
|
||||
errString := err.Error()
|
||||
@@ -51,7 +61,13 @@ func (monitor *Monitor) doRequest() bool {
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
return resp.StatusCode == monitor.ExpectedStatusCode
|
||||
if resp.StatusCode != monitor.ExpectedStatusCode {
|
||||
failReason := "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
|
||||
monitor.LastFailReason = &failReason
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
||||
@@ -76,22 +92,20 @@ func (monitor *Monitor) AnalyseData() {
|
||||
// is down, create an incident
|
||||
Logger.Println("Creating incident...")
|
||||
|
||||
component_id := json.Number(strconv.Itoa(*monitor.ComponentID))
|
||||
monitor.Incident = &Incident{
|
||||
Name: monitor.Name + " - " + Config.SystemName,
|
||||
Message: monitor.Name + " failed",
|
||||
ComponentID: monitor.ComponentID,
|
||||
Message: monitor.Name + " check failed",
|
||||
ComponentID: &component_id,
|
||||
}
|
||||
|
||||
if monitor.LastFailReason != nil {
|
||||
monitor.Incident.Message += "\n\n" + *monitor.LastFailReason
|
||||
monitor.Incident.Message += "\n\n - " + *monitor.LastFailReason
|
||||
}
|
||||
|
||||
// set investigating status
|
||||
monitor.Incident.SetInvestigating()
|
||||
|
||||
// lookup relevant incident
|
||||
monitor.Incident.GetSimilarIncidentID()
|
||||
|
||||
// create/update incident
|
||||
monitor.Incident.Send()
|
||||
monitor.Incident.UpdateComponent()
|
||||
@@ -99,8 +113,12 @@ func (monitor *Monitor) AnalyseData() {
|
||||
// was down, created an incident, its now ok, make it resolved.
|
||||
Logger.Println("Updating incident to resolved...")
|
||||
|
||||
// Add resolved message
|
||||
monitor.Incident.Message += "\n\n-\n\nResolved at " + time.Now().String()
|
||||
component_id := json.Number(strconv.Itoa(*monitor.ComponentID))
|
||||
monitor.Incident = &Incident{
|
||||
Name: monitor.Incident.Name,
|
||||
Message: monitor.Name + " check succeeded",
|
||||
ComponentID: &component_id,
|
||||
}
|
||||
|
||||
monitor.Incident.SetFixed()
|
||||
monitor.Incident.Send()
|
||||
|
||||
@@ -2,6 +2,7 @@ package cachet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
)
|
||||
@@ -13,6 +14,12 @@ func makeRequest(requestType string, url string, reqBody []byte) (*http.Response
|
||||
req.Header.Set("X-Cachet-Token", Config.APIToken)
|
||||
|
||||
client := &http.Client{}
|
||||
if Config.InsecureAPI == true {
|
||||
client.Transport = &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
}
|
||||
}
|
||||
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, []byte{}, err
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"api_url": "https://demo.cachethq.io/api",
|
||||
"api_url": "https://demo.cachethq.io/api/v1",
|
||||
"api_token": "9yMHsdioQosnyVK4iCVR",
|
||||
"interval": 5,
|
||||
"monitors": [
|
||||
{
|
||||
"name": "nodegear frontend",
|
||||
@@ -8,7 +9,9 @@
|
||||
"metric_id": 1,
|
||||
"threshold": 80,
|
||||
"component_id": null,
|
||||
"expected_status_code": 200
|
||||
"expected_status_code": 200,
|
||||
"strict_tls": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"insecure_api": false
|
||||
}
|
||||
|
||||
4
main.go
4
main.go
@@ -9,7 +9,7 @@ func main() {
|
||||
config := cachet.Config
|
||||
log := cachet.Logger
|
||||
|
||||
log.Printf("System: %s, API: %s\n", config.SystemName, config.APIUrl)
|
||||
log.Printf("System: %s, Interval: %d second(s), API: %s\n", config.SystemName, config.Interval, config.APIUrl)
|
||||
log.Printf("Starting %d monitors:\n", len(config.Monitors))
|
||||
for _, mon := range config.Monitors {
|
||||
log.Printf(" %s: GET %s & Expect HTTP %d\n", mon.Name, mon.URL, mon.ExpectedStatusCode)
|
||||
@@ -20,7 +20,7 @@ func main() {
|
||||
|
||||
log.Println()
|
||||
|
||||
ticker := time.NewTicker(time.Second)
|
||||
ticker := time.NewTicker(time.Duration(config.Interval)*time.Second)
|
||||
for range ticker.C {
|
||||
for _, mon := range config.Monitors {
|
||||
go mon.Run()
|
||||
|
||||
23
readme.md
23
readme.md
@@ -3,11 +3,13 @@ Cachet Monitor plugin
|
||||
|
||||
This is a monitoring plugin for CachetHQ.
|
||||
|
||||

|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
- [x] Creates & Resolves Incidents
|
||||
- [x] Posts monitor lag every second
|
||||
- [x] Posts monitor lag every second * config.Interval
|
||||
- [x] Updates Component to Partial Outage
|
||||
- [x] Updates Component to Major Outage if in Partial Outage
|
||||
- [x] Can be run on multiple servers and geo regions
|
||||
@@ -30,8 +32,9 @@ Configuration
|
||||
|
||||
```
|
||||
{
|
||||
"api_url": "https://demo.cachethq.io/api",
|
||||
"api_url": "https://demo.cachethq.io/api/v1",
|
||||
"api_token": "9yMHsdioQosnyVK4iCVR",
|
||||
"interval": 60,
|
||||
"monitors": [
|
||||
{
|
||||
"name": "nodegear frontend",
|
||||
@@ -39,16 +42,19 @@ Configuration
|
||||
"metric_id": 0,
|
||||
"component_id": 0,
|
||||
"threshold": 80,
|
||||
"component_id": null,
|
||||
"expected_status_code": 200
|
||||
"expected_status_code": 200,
|
||||
"strict_tls": true
|
||||
}
|
||||
]
|
||||
],
|
||||
"insecure_api": false
|
||||
}
|
||||
```
|
||||
|
||||
*Notes:*
|
||||
|
||||
- `metric_id` is optional
|
||||
- `insecure_api` if true it will ignore HTTPS certificate errors (eg if self-signed)
|
||||
- `strict_tls` if false (true is default) it will ignore HTTPS certificate errors (eg if monitor uses self-signed certificate)
|
||||
- `component_id` is optional
|
||||
- `threshold` is a percentage
|
||||
- `expected_status_code` is a http response code
|
||||
@@ -60,8 +66,9 @@ How to run
|
||||
Example:
|
||||
|
||||
1. Set up [Go](https://golang.org)
|
||||
2. `go install github.com/castawaylabs/cachet-monitor`
|
||||
3. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json`
|
||||
2. `go get -d github.com/castawaylabs/cachet-monitor`
|
||||
3. `go install github.com/castawaylabs/cachet-monitor`
|
||||
4. `cachet-monitor -c https://raw.githubusercontent.com/CastawayLabs/cachet-monitor/master/example.config.json`
|
||||
|
||||
Production:
|
||||
|
||||
@@ -81,4 +88,4 @@ Environment variables
|
||||
| Name | Example Value | Description |
|
||||
| ------------ | --------------------------- | --------------------------- |
|
||||
| CACHET_API | http://demo.cachethq.io/api | URL endpoint for cachet api |
|
||||
| CACHET_TOKEN | randomvalue | API Authentication token |
|
||||
| CACHET_TOKEN | randomvalue | API Authentication token |
|
||||
|
||||
Reference in New Issue
Block a user