20 Commits

Author SHA1 Message Date
Matej Kramny
c04128ce36 compile message for Fixed status
- better logging
2017-02-06 23:05:39 -08:00
Matej Kramny
1b93730121 compile template message 2017-02-06 21:56:08 -08:00
Matej Kramny
85d92bcb07 rename monitor -> mon 2017-02-06 10:57:02 -08:00
Matej Kramny
0dc54e4e6e - customisable time format
- custom messages
- configure threshold count instead of uptime %
2017-02-05 23:43:38 -08:00
Matej Kramny
b3bc1d4405 - compile message templates
- send metrics to cachet
- fix http default configuration
2017-02-05 19:27:01 -08:00
Matej Kramny
b4fa33b8ad - immediate tick flag
- reword Start -> ClockStart etc
2017-02-04 22:40:31 -08:00
Matej Kramny
edfd4a51e6 Print monitor features 2017-02-04 22:15:24 -08:00
Matej Kramny
a2d8128109 huuman friendly config! 2017-02-04 21:49:13 -08:00
Matej Kramny
d43eca4b7d - yaml & json supported 2017-02-04 21:48:27 -08:00
Matej Kramny
36bf228599 a compiling proof of concept
- abstract type
- http, tcp, icmp & dns monitor types
- unmarshal from json into any monitor type
2017-02-04 18:23:53 -08:00
Matej Kramny
0cd6fa13a7 Merge branch 'master' into v3
# Conflicts:
#	cli/main.go
2017-02-04 16:12:42 -08:00
Matej Kramny
e910807973 basic refactor + new prototype 2017-02-04 16:02:22 -08:00
Matej Kramny
9b29a0450c Merge pull request #40 from to-kn/add_http_header_support
Add http header support
2017-02-04 14:45:21 -08:00
Matej Kramny
aaecc1669a Merge pull request #41 from yacloud-io/master
Support making request with proxy
2016-07-27 16:30:15 +01:00
Yi Tao Jiang
48586eb0aa Support making request with proxy 2016-07-27 23:24:54 +08:00
Tobias Knipping
2c364f3d2f add support for specifying http-headers and really use Method spezified 2016-07-24 16:34:30 +02:00
Matej Kramny
0de0baf5f9 Merge pull request #31 from faizshukri/fix/timeout-exceed
Timeout exceeded while awaiting headers
2016-06-22 15:59:54 +01:00
Faiz Shukri
3f4b9ced77 Add timeout customization 2016-06-16 13:38:51 +08:00
Matej Kramny
20e4dd1414 Add to readme 2016-05-19 19:43:01 +01:00
Matej Kramny
29b02fd164 Update example JSON 2016-05-19 19:41:19 +01:00
15 changed files with 712 additions and 314 deletions

5
.gitignore vendored
View File

@@ -1,2 +1,3 @@
gin-bin /config.yml
example.config.local.json /config.json
examples/

79
api.go Normal file
View File

@@ -0,0 +1,79 @@
package cachet
import (
"bytes"
"crypto/tls"
"encoding/json"
"errors"
"net/http"
"strconv"
"time"
"github.com/Sirupsen/logrus"
)
type CachetAPI struct {
URL string `json:"url"`
Token string `json:"token"`
Insecure bool `json:"insecure"`
}
type CachetResponse struct {
Data json.RawMessage `json:"data"`
}
// TODO: test
func (api CachetAPI) Ping() error {
resp, _, err := api.NewRequest("GET", "/ping", nil)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return errors.New("API Responded with non-200 status code")
}
return nil
}
// SendMetric adds a data point to a cachet monitor
func (api CachetAPI) SendMetric(id int, lag int64) {
logrus.Debugf("Sending lag metric ID:%d RTT %vms", id, lag)
jsonBytes, _ := json.Marshal(map[string]interface{}{
"value": lag,
"timestamp": time.Now().Unix(),
})
resp, _, err := api.NewRequest("POST", "/metrics/"+strconv.Itoa(id)+"/points", jsonBytes)
if err != nil || resp.StatusCode != 200 {
logrus.Warnf("Could not log metric! ID: %d, err: %v", id, err)
}
}
// TODO: test
// NewRequest wraps http.NewRequest
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Cachet-Token", api.Token)
transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: api.Insecure}
client := &http.Client{
Transport: transport,
}
res, err := client.Do(req)
if err != nil {
return nil, CachetResponse{}, err
}
var body struct {
Data json.RawMessage `json:"data"`
}
err = json.NewDecoder(res.Body).Decode(&body)
return res, body, err
}

View File

@@ -3,105 +3,125 @@ package main
import ( import (
"encoding/json" "encoding/json"
"errors" "errors"
"flag"
"fmt"
"io/ioutil" "io/ioutil"
"log"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"os/signal" "os/signal"
"strings"
"sync" "sync"
"github.com/Sirupsen/logrus"
cachet "github.com/castawaylabs/cachet-monitor" cachet "github.com/castawaylabs/cachet-monitor"
docopt "github.com/docopt/docopt-go"
"github.com/mitchellh/mapstructure"
"gopkg.in/yaml.v2"
) )
var configPath string const usage = `cachet-monitor
var systemName string
var logPath string Usage:
cachet-monitor (-c PATH | --config PATH) [--log=LOGPATH] [--name=NAME] [--immediate]
cachet-monitor -h | --help | --version
cachet-monitor print-config
Arguments:
PATH path to config.json
LOGPATH path to log output (defaults to STDOUT)
NAME name of this logger
Examples:
cachet-monitor -c /root/cachet-monitor.json
cachet-monitor -c /root/cachet-monitor.json --log=/var/log/cachet-monitor.log --name="development machine"
Options:
-c PATH.json --config PATH Path to configuration file
-h --help Show this screen.
--version Show version
--immediate Tick immediately (by default waits for first defined interval)
print-config Print example configuration
Environment varaibles:
CACHET_API override API url from configuration
CACHET_TOKEN override API token from configuration
CACHET_DEV set to enable dev logging`
func main() { func main() {
flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") arguments, _ := docopt.Parse(usage, nil, true, "cachet-monitor", false)
flag.StringVar(&systemName, "name", "", "System Name")
flag.StringVar(&logPath, "log", "", "Log path")
flag.Parse()
cfg, err := getConfiguration(configPath) cfg, err := getConfiguration(arguments["--config"].(string))
if err != nil { if err != nil {
panic(err) logrus.Panicf("Unable to start (reading config): %v", err)
} }
if len(systemName) > 0 { if immediate, ok := arguments["--immediate"]; ok {
cfg.SystemName = systemName cfg.Immediate = immediate.(bool)
} }
if len(logPath) > 0 {
cfg.LogPath = logPath if name := arguments["--name"]; name != nil {
cfg.SystemName = name.(string)
} }
logrus.SetOutput(getLogger(arguments["--log"]))
if len(os.Getenv("CACHET_API")) > 0 { if len(os.Getenv("CACHET_API")) > 0 {
cfg.APIUrl = os.Getenv("CACHET_API") cfg.API.URL = os.Getenv("CACHET_API")
} }
if len(os.Getenv("CACHET_TOKEN")) > 0 { if len(os.Getenv("CACHET_TOKEN")) > 0 {
cfg.APIToken = os.Getenv("CACHET_TOKEN") cfg.API.Token = os.Getenv("CACHET_TOKEN")
}
if len(os.Getenv("CACHET_DEV")) > 0 {
logrus.SetLevel(logrus.DebugLevel)
} }
if err := cfg.ValidateConfiguration(); err != nil { if valid := cfg.Validate(); !valid {
panic(err) logrus.Errorf("Invalid configuration")
os.Exit(1)
} }
cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors)) logrus.Debug("Configuration valid")
logrus.Infof("System: %s", cfg.SystemName)
logrus.Infof("API: %s", cfg.API.URL)
logrus.Infof("Monitors: %d\n", len(cfg.Monitors))
logrus.Infof("Pinging cachet")
if err := cfg.API.Ping(); err != nil {
logrus.Errorf("Cannot ping cachet!\n%v", err)
os.Exit(1)
}
logrus.Infof("Ping OK")
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
for _, mon := range cfg.Monitors { for index, monitor := range cfg.Monitors {
cfg.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL) logrus.Infof("Starting Monitor #%d: ", index)
logrus.Infof("Features: \n - %v", strings.Join(monitor.Describe(), "\n - "))
// print features go monitor.ClockStart(cfg, monitor, wg)
if mon.ExpectedStatusCode > 0 {
cfg.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode)
}
if len(mon.ExpectedBody) > 0 {
cfg.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody)
}
if mon.MetricID > 0 {
cfg.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID)
}
if mon.ComponentID > 0 {
cfg.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID)
}
go mon.Start(cfg, wg)
} }
signals := make(chan os.Signal, 1) signals := make(chan os.Signal, 1)
signal.Notify(signals, os.Interrupt, os.Kill) signal.Notify(signals, os.Interrupt, os.Kill)
<-signals <-signals
cfg.Logger.Println("Abort: Waiting monitors to finish") logrus.Warnf("Abort: Waiting monitors to finish")
for _, mon := range cfg.Monitors { for _, mon := range cfg.Monitors {
mon.Stop() mon.GetMonitor().ClockStop()
} }
wg.Wait() wg.Wait()
} }
func getLogger(logPath string) *log.Logger { func getLogger(logPath interface{}) *os.File {
var logWriter = os.Stdout if logPath == nil || len(logPath.(string)) == 0 {
var err error return os.Stdout
}
if len(logPath) > 0 { file, err := os.Create(logPath.(string))
logWriter, err = os.Create(logPath)
if err != nil { if err != nil {
fmt.Printf("Unable to open file '%v' for logging\n", logPath) logrus.Errorf("Unable to open file '%v' for logging: \n%v", logPath, err)
os.Exit(1) os.Exit(1)
} }
}
flags := log.Llongfile | log.Ldate | log.Ltime return file
if len(os.Getenv("CACHET_DEV")) > 0 {
flags = 0
}
return log.New(logWriter, "", flags)
} }
func getConfiguration(path string) (*cachet.CachetMonitor, error) { func getConfiguration(path string) (*cachet.CachetMonitor, error) {
@@ -114,26 +134,73 @@ func getConfiguration(path string) (*cachet.CachetMonitor, error) {
// download config // download config
response, err := http.Get(path) response, err := http.Get(path)
if err != nil { if err != nil {
return nil, errors.New("Cannot download network config: " + err.Error()) logrus.Warn("Unable to download network configuration")
return nil, err
} }
defer response.Body.Close() defer response.Body.Close()
data, _ = ioutil.ReadAll(response.Body) data, _ = ioutil.ReadAll(response.Body)
fmt.Println("Downloaded network configuration.") logrus.Info("Downloaded network configuration.")
} else { } else {
data, err = ioutil.ReadFile(path) data, err = ioutil.ReadFile(path)
if err != nil { if err != nil {
return nil, errors.New("Config file '" + path + "' missing!") return nil, errors.New("Unable to open file: '" + path + "'")
} }
} }
if err := json.Unmarshal(data, &cfg); err != nil { if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") {
fmt.Println(err) err = yaml.Unmarshal(data, &cfg)
return nil, errors.New("Cannot parse config!") } else {
err = json.Unmarshal(data, &cfg)
} }
cfg.Logger = getLogger(cfg.LogPath) if err != nil {
logrus.Warnf("Unable to parse configuration file")
}
return &cfg, nil cfg.Monitors = make([]cachet.MonitorInterface, len(cfg.RawMonitors))
for index, rawMonitor := range cfg.RawMonitors {
var t cachet.MonitorInterface
var err error
// get default type
monType := cachet.GetMonitorType("")
if t, ok := rawMonitor["type"].(string); ok {
monType = cachet.GetMonitorType(t)
}
switch monType {
case "http":
var s cachet.HTTPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "dns":
var s cachet.DNSMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "icmp":
var s cachet.ICMPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "tcp":
var s cachet.TCPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
default:
logrus.Errorf("Invalid monitor type (index: %d) %v", index, monType)
continue
}
t.GetMonitor().Type = monType
if err != nil {
logrus.Errorf("Unable to unmarshal monitor to type (index: %d): %v", index, err)
continue
}
cfg.Monitors[index] = t
}
return &cfg, err
} }

View File

@@ -1,65 +1,89 @@
package cachet package cachet
import ( import (
"errors"
"log"
"net" "net"
"os" "os"
"strings"
"time"
"github.com/Sirupsen/logrus"
) )
type CachetMonitor struct { type CachetMonitor struct {
Logger *log.Logger `json:"-"` SystemName string `json:"system_name" yaml:"system_name"`
DateFormat string `json:"date_format" yaml:"date_format"`
API CachetAPI `json:"api"`
RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"`
APIUrl string `json:"api_url"` Monitors []MonitorInterface `json:"-" yaml:"-"`
APIToken string `json:"api_token"` Immediate bool `json:"-" yaml:"-"`
SystemName string `json:"system_name"`
LogPath string `json:"log_path"`
InsecureAPI bool `json:"insecure_api"`
Monitors []*Monitor `json:"monitors"`
} }
func (cfg *CachetMonitor) ValidateConfiguration() error { // Validate configuration
if cfg.Logger == nil { func (cfg *CachetMonitor) Validate() bool {
cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) valid := true
}
if len(cfg.SystemName) == 0 { if len(cfg.SystemName) == 0 {
// get hostname // get hostname
cfg.SystemName = getHostname() cfg.SystemName = getHostname()
} }
if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 { if len(cfg.DateFormat) == 0 {
return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n") cfg.DateFormat = DefaultTimeFormat
}
if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 {
logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor")
valid = false
} }
if len(cfg.Monitors) == 0 { if len(cfg.Monitors) == 0 {
return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n") logrus.Warnf("No monitors defined!\nSee help for example configuration")
valid = false
} }
for _, monitor := range cfg.Monitors { for index, monitor := range cfg.Monitors {
if err := monitor.ValidateConfiguration(); err != nil { if errs := monitor.Validate(); len(errs) > 0 {
return err logrus.Warnf("Monitor validation errors (index %d): %v", index, "\n - "+strings.Join(errs, "\n - "))
valid = false
} }
} }
return nil return valid
} }
// getHostname returns id of the current system // getHostname returns id of the current system
func getHostname() string { func getHostname() string {
hostname, err := os.Hostname() hostname, err := os.Hostname()
if err != nil || len(hostname) == 0 { if err == nil && len(hostname) > 0 {
addrs, err := net.InterfaceAddrs() return hostname
}
if err != nil { addrs, err := net.InterfaceAddrs()
if err != nil || len(addrs) == 0 {
return "unknown" return "unknown"
} }
for _, addr := range addrs { return addrs[0].String()
return addr.String() }
}
func getMs() int64 {
return time.Now().UnixNano() / int64(time.Millisecond)
}
func GetMonitorType(t string) string {
if len(t) == 0 {
return "http"
} }
return hostname return strings.ToLower(t)
}
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
return map[string]interface{}{
"SystemName": monitor.config.SystemName,
"API": monitor.config.API,
"Monitor": monitor,
"now": time.Now().Format(monitor.config.DateFormat),
}
} }

15
config_test.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
import (
"testing"
)
func TestGetMonitorType(t *testing.T) {
if monType := GetMonitorType(""); monType != "http" {
t.Error("monitor type `` should default to http")
}
if mt := GetMonitorType("HTTP"); mt != "http" {
t.Error("does not return correct monitor type")
}
}

5
dns.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type DNSMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -1,17 +1,22 @@
{ {
"api_url": "https://demo.cachethq.io/api/v1", "api": {
"api_token": "9yMHsdioQosnyVK4iCVR", "url": "https://demo.cachethq.io/api/v1",
"interval": 5, "token": "9yMHsdioQosnyVK4iCVR",
"insecure": true
},
"monitors": [ "monitors": [
{ {
"name": "nodegear frontend", "name": "google",
"url": "https://nodegear.io/ping", "url": "https://google.com",
"metric_id": 1,
"threshold": 80, "threshold": 80,
"component_id": null, "component_id": 1,
"interval": 10,
"timeout": 5,
"headers": {
"Authorization": "Basic <hash>"
},
"expected_status_code": 200, "expected_status_code": 200,
"strict_tls": true "strict_tls": true
} }
], ]
"insecure_api": false
} }

14
example.config.yml Normal file
View File

@@ -0,0 +1,14 @@
api:
url: https://demo.cachethq.io/api/v1
token: 9yMHsdioQosnyVK4iCVR
monitors:
- name: google
target: https://google.com
threshold: 80
component_id: 1
interval: 10
timeout: 5
headers:
Authorization: Basic <hash>
expected_status_code: 200
strict: true

133
http.go
View File

@@ -1,58 +1,125 @@
package cachet package cachet
import ( import (
"bytes"
"crypto/tls" "crypto/tls"
"encoding/json"
"fmt"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"regexp"
"strconv" "strconv"
"strings"
"time" "time"
) )
func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { // Investigating template
req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody)) var defaultHTTPInvestigatingTpl = MessageTemplate{
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
req.Header.Set("Content-Type", "application/json") {{ .FailReason }}`,
req.Header.Set("X-Cachet-Token", monitor.APIToken) }
client := &http.Client{} // Fixed template
if monitor.InsecureAPI == true { var defaultHTTPFixedTpl = MessageTemplate{
client.Transport = &http.Transport{ Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, Message: `**Resolved** - {{ .now }}
}
- - -
{{ .incident.Message }}`,
}
type HTTPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
Method string
ExpectedStatusCode int `mapstructure:"expected_status_code"`
Headers map[string]string
// compiled to Regexp
ExpectedBody string `mapstructure:"expected_body"`
bodyRegexp *regexp.Regexp
}
// TODO: test
func (monitor *HTTPMonitor) test() bool {
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
for k, v := range monitor.Headers {
req.Header.Add(k, v)
} }
res, err := client.Do(req) transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: monitor.Strict == false}
client := &http.Client{
Timeout: time.Duration(monitor.Timeout * time.Second),
Transport: transport,
}
resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, []byte{}, err monitor.lastFailReason = err.Error()
return false
} }
defer res.Body.Close() defer resp.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
return res, body, nil if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
} monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
return false
// SendMetric sends lag metric point
func (monitor *Monitor) SendMetric(delay int64) error {
if monitor.MetricID == 0 {
return nil
} }
jsonBytes, _ := json.Marshal(&map[string]interface{}{ if monitor.bodyRegexp != nil {
"value": delay, // check response body
}) responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
resp, _, err := monitor.config.makeRequest("POST", "/metrics/"+strconv.Itoa(monitor.MetricID)+"/points", jsonBytes) monitor.lastFailReason = err.Error()
if err != nil || resp.StatusCode != 200 { return false
return fmt.Errorf("Could not log data point!\n%v\n", err)
} }
return nil if !monitor.bodyRegexp.Match(responseBody) {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
return false
}
}
return true
} }
func getMs() int64 { // TODO: test
return time.Now().UnixNano() / int64(time.Millisecond) func (mon *HTTPMonitor) Validate() []string {
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
errs := mon.AbstractMonitor.Validate()
if len(mon.ExpectedBody) > 0 {
exp, err := regexp.Compile(mon.ExpectedBody)
if err != nil {
errs = append(errs, "Regexp compilation failure: "+err.Error())
} else {
mon.bodyRegexp = exp
}
}
if len(mon.ExpectedBody) == 0 && mon.ExpectedStatusCode == 0 {
errs = append(errs, "Both 'expected_body' and 'expected_status_code' fields empty")
}
mon.Method = strings.ToUpper(mon.Method)
switch mon.Method {
case "GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD":
break
case "":
mon.Method = "GET"
default:
errs = append(errs, "Unsupported HTTP method: "+mon.Method)
}
return errs
}
func (mon *HTTPMonitor) Describe() []string {
features := mon.AbstractMonitor.Describe()
features = append(features, "Method: "+mon.Method)
return features
} }

5
icmp.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type ICMPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -4,6 +4,8 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"strconv" "strconv"
"github.com/Sirupsen/logrus"
) )
// Incident Cachet data model // Incident Cachet data model
@@ -33,7 +35,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
} }
if err != nil { if err != nil {
cfg.Logger.Printf("cannot fetch component: %v", err) logrus.Warnf("cannot fetch component: %v", err)
} }
case 4: case 4:
// fixed // fixed
@@ -49,21 +51,19 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
jsonBytes, _ := json.Marshal(incident) jsonBytes, _ := json.Marshal(incident)
resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes) resp, body, err := cfg.API.NewRequest(requestType, requestURL, jsonBytes)
if err != nil { if err != nil {
return err return err
} }
var data struct { var data struct {
Incident struct {
ID int `json:"id"` ID int `json:"id"`
} `json:"data"`
} }
if err := json.Unmarshal(body, &data); err != nil { if err := json.Unmarshal(body.Data, &data); err != nil {
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body)) return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body.Data))
} }
incident.ID = data.Incident.ID incident.ID = data.ID
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
return fmt.Errorf("Could not create/update incident!") return fmt.Errorf("Could not create/update incident!")
} }
@@ -72,7 +72,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
} }
func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) { func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil) resp, body, err := cfg.API.NewRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
if err != nil { if err != nil {
return 0, err return 0, err
} }
@@ -82,15 +82,13 @@ func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
} }
var data struct { var data struct {
Component struct {
Status int `json:"status"` Status int `json:"status"`
} `json:"data"`
} }
if err := json.Unmarshal(body, &data); err != nil { if err := json.Unmarshal(body.Data, &data); err != nil {
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err) return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body.Data), err)
} }
return data.Component.Status, nil return data.Status, nil
} }
// SetInvestigating sets status to Investigating // SetInvestigating sets status to Investigating

View File

@@ -1,41 +1,59 @@
package cachet package cachet
import ( import (
"crypto/tls"
"errors"
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"sync" "sync"
"time" "time"
"github.com/Sirupsen/logrus"
) )
const HttpTimeout = time.Duration(time.Second) const DefaultInterval = time.Second * 60
const DefaultInterval = 60 const DefaultTimeout = time.Second
const DefaultTimeFormat = "15:04:05 Jan 2 MST" const DefaultTimeFormat = "15:04:05 Jan 2 MST"
const HistorySize = 10
// Monitor data model type MonitorInterface interface {
type Monitor struct { ClockStart(*CachetMonitor, MonitorInterface, *sync.WaitGroup)
Name string `json:"name"` ClockStop()
URL string `json:"url"` tick(MonitorInterface)
Method string `json:"method"` test() bool
StrictTLS bool `json:"strict_tls"`
CheckInterval time.Duration `json:"interval"`
MetricID int `json:"metric_id"` Validate() []string
ComponentID int `json:"component_id"` GetMonitor() *AbstractMonitor
Describe() []string
}
// Threshold = percentage // AbstractMonitor data model
Threshold float32 `json:"threshold"` type AbstractMonitor struct {
ExpectedStatusCode int `json:"expected_status_code"` Name string
// compiled to Regexp Target string
ExpectedBody string `json:"expected_body"`
bodyRegexp *regexp.Regexp // (default)http, tcp, dns, icmp
Type string
Strict bool
Interval time.Duration
Timeout time.Duration
MetricID int `mapstructure:"metric_id"`
ComponentID int `mapstructure:"component_id"`
// Templating stuff
Template struct {
Investigating MessageTemplate
Fixed MessageTemplate
}
// Threshold = percentage / number of down incidents
Threshold float32
ThresholdCount bool `mapstructure:"threshold_count"`
// lag / average(lagHistory) * 100 = percentage above average lag
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
PerformanceThreshold float32
history []bool history []bool
lagHistory []float32
lastFailReason string lastFailReason string
incident *Incident incident *Incident
config *CachetMonitor config *CachetMonitor
@@ -44,17 +62,67 @@ type Monitor struct {
stopC chan bool stopC chan bool
} }
func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) { func (mon *AbstractMonitor) Validate() []string {
errs := []string{}
if len(mon.Name) == 0 {
errs = append(errs, "Name is required")
}
if mon.Interval < 1 {
mon.Interval = DefaultInterval
}
if mon.Timeout < 1 {
mon.Timeout = DefaultTimeout
}
if mon.Timeout > mon.Interval {
errs = append(errs, "Timeout greater than interval")
}
if mon.ComponentID == 0 && mon.MetricID == 0 {
errs = append(errs, "component_id & metric_id are unset")
}
if mon.Threshold <= 0 {
mon.Threshold = 100
}
if err := mon.Template.Fixed.Compile(); err != nil {
errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
}
if err := mon.Template.Investigating.Compile(); err != nil {
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
}
return errs
}
func (mon *AbstractMonitor) GetMonitor() *AbstractMonitor {
return mon
}
func (mon *AbstractMonitor) Describe() []string {
features := []string{"Type: " + mon.Type}
if len(mon.Name) > 0 {
features = append(features, "Name: "+mon.Name)
}
return features
}
func (mon *AbstractMonitor) ClockStart(cfg *CachetMonitor, iface MonitorInterface, wg *sync.WaitGroup) {
wg.Add(1) wg.Add(1)
mon.config = cfg mon.config = cfg
mon.stopC = make(chan bool) mon.stopC = make(chan bool)
mon.Tick() if cfg.Immediate {
mon.tick(iface)
}
ticker := time.NewTicker(mon.CheckInterval * time.Second) ticker := time.NewTicker(mon.Interval * time.Second)
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
mon.Tick() mon.tick(iface)
case <-mon.stopC: case <-mon.stopC:
wg.Done() wg.Done()
return return
@@ -62,174 +130,124 @@ func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
} }
} }
func (monitor *Monitor) Stop() { func (mon *AbstractMonitor) ClockStop() {
if monitor.Stopped() {
return
}
close(monitor.stopC)
}
func (monitor *Monitor) Stopped() bool {
select { select {
case <-monitor.stopC: case <-mon.stopC:
return true return
default: default:
return false close(mon.stopC)
} }
} }
func (monitor *Monitor) Tick() { func (mon *AbstractMonitor) test() bool { return false }
// TODO: test
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
reqStart := getMs() reqStart := getMs()
isUp := monitor.doRequest() up := iface.test()
lag := getMs() - reqStart lag := getMs() - reqStart
if len(monitor.history) == 9 { histSize := HistorySize
monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name) if mon.ThresholdCount {
histSize = int(mon.Threshold)
} }
if len(monitor.history) >= 10 {
monitor.history = monitor.history[len(monitor.history)-9:]
}
monitor.history = append(monitor.history, isUp)
monitor.AnalyseData()
if isUp == true && monitor.MetricID > 0 { if len(mon.history) == histSize-1 {
monitor.SendMetric(lag) logrus.Warnf("%v is now saturated\n", mon.Name)
}
if len(mon.history) >= histSize {
mon.history = mon.history[len(mon.history)-(histSize-1):]
}
mon.history = append(mon.history, up)
mon.AnalyseData()
// report lag
if mon.MetricID > 0 {
go mon.config.API.SendMetric(mon.MetricID, lag)
} }
} }
func (monitor *Monitor) doRequest() bool { // TODO: test
client := &http.Client{
Timeout: HttpTimeout,
}
if monitor.StrictTLS == false {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
resp, err := client.Get(monitor.URL)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
defer resp.Body.Close()
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
return false
}
if monitor.bodyRegexp != nil {
// check body
responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
match := monitor.bodyRegexp.Match(responseBody)
if !match {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
}
return match
}
return true
}
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident // AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
func (monitor *Monitor) AnalyseData() { func (mon *AbstractMonitor) AnalyseData() {
// look at the past few incidents // look at the past few incidents
numDown := 0 numDown := 0
for _, wasUp := range monitor.history { for _, wasUp := range mon.history {
if wasUp == false { if wasUp == false {
numDown++ numDown++
} }
} }
t := (float32(numDown) / float32(len(monitor.history))) * 100 t := (float32(numDown) / float32(len(mon.history))) * 100
monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second)) l := logrus.WithFields(logrus.Fields{
"monitor": mon.Name,
"time": time.Now().Format(mon.config.DateFormat),
})
if numDown == 0 {
l.Printf("monitor is up")
} else if mon.ThresholdCount {
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
} else {
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
}
if len(monitor.history) != 10 { histSize := HistorySize
if mon.ThresholdCount {
histSize = int(mon.Threshold)
}
if len(mon.history) != histSize {
// not saturated // not saturated
return return
} }
if t > monitor.Threshold && monitor.incident == nil { triggered := (mon.ThresholdCount && numDown == int(mon.Threshold)) || (!mon.ThresholdCount && t > mon.Threshold)
monitor.incident = &Incident{
Name: monitor.Name + " - " + monitor.config.SystemName, if triggered && mon.incident == nil {
ComponentID: monitor.ComponentID, // create incident
Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat), tplData := getTemplateData(mon)
tplData["FailReason"] = mon.lastFailReason
subject, message := mon.Template.Investigating.Exec(tplData)
mon.incident = &Incident{
Name: subject,
ComponentID: mon.ComponentID,
Message: message,
Notify: true, Notify: true,
} }
if len(monitor.lastFailReason) > 0 { // is down, create an incident
monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`" l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
// set investigating status
mon.incident.SetInvestigating()
// create/update incident
if err := mon.incident.Send(mon.config); err != nil {
l.Printf("Error sending incident: %v", err)
} }
// is down, create an incident return
monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason)
// set investigating status
monitor.incident.SetInvestigating()
// create/update incident
if err := monitor.incident.Send(monitor.config); err != nil {
monitor.config.Logger.Printf("Error sending incident: %v\n", err)
} }
} else if t < monitor.Threshold && monitor.incident != nil {
// still triggered or no incident
if triggered || mon.incident == nil {
return
}
// was down, created an incident, its now ok, make it resolved. // was down, created an incident, its now ok, make it resolved.
monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name) l.Warn("Resolving incident")
// resolve incident // resolve incident
monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message tplData := getTemplateData(mon)
monitor.incident.SetFixed() tplData["incident"] = mon.incident
monitor.incident.Send(monitor.config)
monitor.lastFailReason = "" subject, message := mon.Template.Fixed.Exec(tplData)
monitor.incident = nil mon.incident.Name = subject
mon.incident.Message = message
mon.incident.SetFixed()
if err := mon.incident.Send(mon.config); err != nil {
l.Printf("Error sending incident: %v", err)
} }
}
mon.lastFailReason = ""
func (monitor *Monitor) ValidateConfiguration() error { mon.incident = nil
if len(monitor.ExpectedBody) > 0 {
exp, err := regexp.Compile(monitor.ExpectedBody)
if err != nil {
return err
}
monitor.bodyRegexp = exp
}
if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 {
return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty")
}
if monitor.CheckInterval < 1 {
monitor.CheckInterval = DefaultInterval
}
monitor.Method = strings.ToUpper(monitor.Method)
switch monitor.Method {
case "GET", "POST", "DELETE", "OPTIONS", "HEAD":
break
case "":
monitor.Method = "GET"
default:
return fmt.Errorf("Unsupported check method: %v", monitor.Method)
}
if monitor.ComponentID == 0 && monitor.MetricID == 0 {
return errors.New("component_id & metric_id are unset")
}
if monitor.Threshold <= 0 {
monitor.Threshold = 100
}
return nil
} }

View File

@@ -4,6 +4,7 @@ Features
-------- --------
- [x] Creates & Resolves Incidents - [x] Creates & Resolves Incidents
- [x] Check URLs by response code and/or body contents
- [x] Posts monitor lag to cachet graphs - [x] Posts monitor lag to cachet graphs
- [x] Updates Component to Partial Outage - [x] Updates Component to Partial Outage
- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring) - [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring)
@@ -27,10 +28,17 @@ Configuration
"url": "Ping URL", "url": "Ping URL",
// optional, http method (defaults GET) // optional, http method (defaults GET)
"method": "get", "method": "get",
// optional, http Headers to add (default none)
"headers": [
// specify Name and Value of Http-Header, eg. Authorization
{ "name": "Authorization", "value": "Basic <hash>" }
],
// self-signed ssl certificate // self-signed ssl certificate
"strict_tls": true, "strict_tls": true,
// seconds between checks // seconds between checks
"interval": 10, "interval": 10,
// seconds for http timeout
"timeout": 5,
// post lag to cachet metric (graph) // post lag to cachet metric (graph)
// note either metric ID or component ID are required // note either metric ID or component ID are required
"metric_id": <metric id>, "metric_id": <metric id>,
@@ -89,3 +97,27 @@ Package usage
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside. When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor) [API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
## License
MIT License
Copyright (c) 2016 Castaway Labs LLC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

15
tcp.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
type TCPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
// same as output from net.JoinHostPort
// defaults to parsed config from /etc/resolv.conf when empty
DNSServer string
// Will be converted to FQDN
Domain string
Type string
// expected answers (regex)
Expect []string
}

53
template.go Normal file
View File

@@ -0,0 +1,53 @@
package cachet
import (
"bytes"
"text/template"
)
type MessageTemplate struct {
Subject string `json:"subject"`
Message string `json:"message"`
subjectTpl *template.Template
messageTpl *template.Template
}
func (t *MessageTemplate) SetDefault(d MessageTemplate) {
if len(t.Subject) == 0 {
t.Subject = d.Subject
}
if len(t.Message) == 0 {
t.Message = d.Message
}
}
// TODO: test
func (t *MessageTemplate) Compile() error {
var err error
if len(t.Subject) > 0 {
t.subjectTpl, err = compileTemplate(t.Subject)
}
if err == nil && len(t.Message) > 0 {
t.messageTpl, err = compileTemplate(t.Message)
}
return err
}
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
return t.exec(t.subjectTpl, data), t.exec(t.messageTpl, data)
}
func (t *MessageTemplate) exec(tpl *template.Template, data interface{}) string {
buf := new(bytes.Buffer)
tpl.Execute(buf, data)
return buf.String()
}
func compileTemplate(text string) (*template.Template, error) {
return template.New("").Parse(text)
}