20 Commits

Author SHA1 Message Date
Matej Kramny
c04128ce36 compile message for Fixed status
- better logging
2017-02-06 23:05:39 -08:00
Matej Kramny
1b93730121 compile template message 2017-02-06 21:56:08 -08:00
Matej Kramny
85d92bcb07 rename monitor -> mon 2017-02-06 10:57:02 -08:00
Matej Kramny
0dc54e4e6e - customisable time format
- custom messages
- configure threshold count instead of uptime %
2017-02-05 23:43:38 -08:00
Matej Kramny
b3bc1d4405 - compile message templates
- send metrics to cachet
- fix http default configuration
2017-02-05 19:27:01 -08:00
Matej Kramny
b4fa33b8ad - immediate tick flag
- reword Start -> ClockStart etc
2017-02-04 22:40:31 -08:00
Matej Kramny
edfd4a51e6 Print monitor features 2017-02-04 22:15:24 -08:00
Matej Kramny
a2d8128109 huuman friendly config! 2017-02-04 21:49:13 -08:00
Matej Kramny
d43eca4b7d - yaml & json supported 2017-02-04 21:48:27 -08:00
Matej Kramny
36bf228599 a compiling proof of concept
- abstract type
- http, tcp, icmp & dns monitor types
- unmarshal from json into any monitor type
2017-02-04 18:23:53 -08:00
Matej Kramny
0cd6fa13a7 Merge branch 'master' into v3
# Conflicts:
#	cli/main.go
2017-02-04 16:12:42 -08:00
Matej Kramny
e910807973 basic refactor + new prototype 2017-02-04 16:02:22 -08:00
Matej Kramny
9b29a0450c Merge pull request #40 from to-kn/add_http_header_support
Add http header support
2017-02-04 14:45:21 -08:00
Matej Kramny
aaecc1669a Merge pull request #41 from yacloud-io/master
Support making request with proxy
2016-07-27 16:30:15 +01:00
Yi Tao Jiang
48586eb0aa Support making request with proxy 2016-07-27 23:24:54 +08:00
Tobias Knipping
2c364f3d2f add support for specifying http-headers and really use Method spezified 2016-07-24 16:34:30 +02:00
Matej Kramny
0de0baf5f9 Merge pull request #31 from faizshukri/fix/timeout-exceed
Timeout exceeded while awaiting headers
2016-06-22 15:59:54 +01:00
Faiz Shukri
3f4b9ced77 Add timeout customization 2016-06-16 13:38:51 +08:00
Matej Kramny
20e4dd1414 Add to readme 2016-05-19 19:43:01 +01:00
Matej Kramny
29b02fd164 Update example JSON 2016-05-19 19:41:19 +01:00
15 changed files with 712 additions and 314 deletions

5
.gitignore vendored
View File

@@ -1,2 +1,3 @@
gin-bin /config.yml
example.config.local.json /config.json
examples/

79
api.go Normal file
View File

@@ -0,0 +1,79 @@
package cachet
import (
"bytes"
"crypto/tls"
"encoding/json"
"errors"
"net/http"
"strconv"
"time"
"github.com/Sirupsen/logrus"
)
type CachetAPI struct {
URL string `json:"url"`
Token string `json:"token"`
Insecure bool `json:"insecure"`
}
type CachetResponse struct {
Data json.RawMessage `json:"data"`
}
// TODO: test
func (api CachetAPI) Ping() error {
resp, _, err := api.NewRequest("GET", "/ping", nil)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return errors.New("API Responded with non-200 status code")
}
return nil
}
// SendMetric adds a data point to a cachet monitor
func (api CachetAPI) SendMetric(id int, lag int64) {
logrus.Debugf("Sending lag metric ID:%d RTT %vms", id, lag)
jsonBytes, _ := json.Marshal(map[string]interface{}{
"value": lag,
"timestamp": time.Now().Unix(),
})
resp, _, err := api.NewRequest("POST", "/metrics/"+strconv.Itoa(id)+"/points", jsonBytes)
if err != nil || resp.StatusCode != 200 {
logrus.Warnf("Could not log metric! ID: %d, err: %v", id, err)
}
}
// TODO: test
// NewRequest wraps http.NewRequest
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Cachet-Token", api.Token)
transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: api.Insecure}
client := &http.Client{
Transport: transport,
}
res, err := client.Do(req)
if err != nil {
return nil, CachetResponse{}, err
}
var body struct {
Data json.RawMessage `json:"data"`
}
err = json.NewDecoder(res.Body).Decode(&body)
return res, body, err
}

View File

@@ -3,105 +3,125 @@ package main
import ( import (
"encoding/json" "encoding/json"
"errors" "errors"
"flag"
"fmt"
"io/ioutil" "io/ioutil"
"log"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"os/signal" "os/signal"
"strings"
"sync" "sync"
"github.com/Sirupsen/logrus"
cachet "github.com/castawaylabs/cachet-monitor" cachet "github.com/castawaylabs/cachet-monitor"
docopt "github.com/docopt/docopt-go"
"github.com/mitchellh/mapstructure"
"gopkg.in/yaml.v2"
) )
var configPath string const usage = `cachet-monitor
var systemName string
var logPath string Usage:
cachet-monitor (-c PATH | --config PATH) [--log=LOGPATH] [--name=NAME] [--immediate]
cachet-monitor -h | --help | --version
cachet-monitor print-config
Arguments:
PATH path to config.json
LOGPATH path to log output (defaults to STDOUT)
NAME name of this logger
Examples:
cachet-monitor -c /root/cachet-monitor.json
cachet-monitor -c /root/cachet-monitor.json --log=/var/log/cachet-monitor.log --name="development machine"
Options:
-c PATH.json --config PATH Path to configuration file
-h --help Show this screen.
--version Show version
--immediate Tick immediately (by default waits for first defined interval)
print-config Print example configuration
Environment varaibles:
CACHET_API override API url from configuration
CACHET_TOKEN override API token from configuration
CACHET_DEV set to enable dev logging`
func main() { func main() {
flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path") arguments, _ := docopt.Parse(usage, nil, true, "cachet-monitor", false)
flag.StringVar(&systemName, "name", "", "System Name")
flag.StringVar(&logPath, "log", "", "Log path")
flag.Parse()
cfg, err := getConfiguration(configPath) cfg, err := getConfiguration(arguments["--config"].(string))
if err != nil { if err != nil {
panic(err) logrus.Panicf("Unable to start (reading config): %v", err)
} }
if len(systemName) > 0 { if immediate, ok := arguments["--immediate"]; ok {
cfg.SystemName = systemName cfg.Immediate = immediate.(bool)
} }
if len(logPath) > 0 {
cfg.LogPath = logPath if name := arguments["--name"]; name != nil {
cfg.SystemName = name.(string)
} }
logrus.SetOutput(getLogger(arguments["--log"]))
if len(os.Getenv("CACHET_API")) > 0 { if len(os.Getenv("CACHET_API")) > 0 {
cfg.APIUrl = os.Getenv("CACHET_API") cfg.API.URL = os.Getenv("CACHET_API")
} }
if len(os.Getenv("CACHET_TOKEN")) > 0 { if len(os.Getenv("CACHET_TOKEN")) > 0 {
cfg.APIToken = os.Getenv("CACHET_TOKEN") cfg.API.Token = os.Getenv("CACHET_TOKEN")
}
if len(os.Getenv("CACHET_DEV")) > 0 {
logrus.SetLevel(logrus.DebugLevel)
} }
if err := cfg.ValidateConfiguration(); err != nil { if valid := cfg.Validate(); !valid {
panic(err) logrus.Errorf("Invalid configuration")
os.Exit(1)
} }
cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors)) logrus.Debug("Configuration valid")
logrus.Infof("System: %s", cfg.SystemName)
logrus.Infof("API: %s", cfg.API.URL)
logrus.Infof("Monitors: %d\n", len(cfg.Monitors))
logrus.Infof("Pinging cachet")
if err := cfg.API.Ping(); err != nil {
logrus.Errorf("Cannot ping cachet!\n%v", err)
os.Exit(1)
}
logrus.Infof("Ping OK")
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
for _, mon := range cfg.Monitors { for index, monitor := range cfg.Monitors {
cfg.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL) logrus.Infof("Starting Monitor #%d: ", index)
logrus.Infof("Features: \n - %v", strings.Join(monitor.Describe(), "\n - "))
// print features go monitor.ClockStart(cfg, monitor, wg)
if mon.ExpectedStatusCode > 0 {
cfg.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode)
}
if len(mon.ExpectedBody) > 0 {
cfg.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody)
}
if mon.MetricID > 0 {
cfg.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID)
}
if mon.ComponentID > 0 {
cfg.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID)
}
go mon.Start(cfg, wg)
} }
signals := make(chan os.Signal, 1) signals := make(chan os.Signal, 1)
signal.Notify(signals, os.Interrupt, os.Kill) signal.Notify(signals, os.Interrupt, os.Kill)
<-signals <-signals
cfg.Logger.Println("Abort: Waiting monitors to finish") logrus.Warnf("Abort: Waiting monitors to finish")
for _, mon := range cfg.Monitors { for _, mon := range cfg.Monitors {
mon.Stop() mon.GetMonitor().ClockStop()
} }
wg.Wait() wg.Wait()
} }
func getLogger(logPath string) *log.Logger { func getLogger(logPath interface{}) *os.File {
var logWriter = os.Stdout if logPath == nil || len(logPath.(string)) == 0 {
var err error return os.Stdout
if len(logPath) > 0 {
logWriter, err = os.Create(logPath)
if err != nil {
fmt.Printf("Unable to open file '%v' for logging\n", logPath)
os.Exit(1)
}
} }
flags := log.Llongfile | log.Ldate | log.Ltime file, err := os.Create(logPath.(string))
if len(os.Getenv("CACHET_DEV")) > 0 { if err != nil {
flags = 0 logrus.Errorf("Unable to open file '%v' for logging: \n%v", logPath, err)
os.Exit(1)
} }
return log.New(logWriter, "", flags) return file
} }
func getConfiguration(path string) (*cachet.CachetMonitor, error) { func getConfiguration(path string) (*cachet.CachetMonitor, error) {
@@ -114,26 +134,73 @@ func getConfiguration(path string) (*cachet.CachetMonitor, error) {
// download config // download config
response, err := http.Get(path) response, err := http.Get(path)
if err != nil { if err != nil {
return nil, errors.New("Cannot download network config: " + err.Error()) logrus.Warn("Unable to download network configuration")
return nil, err
} }
defer response.Body.Close() defer response.Body.Close()
data, _ = ioutil.ReadAll(response.Body) data, _ = ioutil.ReadAll(response.Body)
fmt.Println("Downloaded network configuration.") logrus.Info("Downloaded network configuration.")
} else { } else {
data, err = ioutil.ReadFile(path) data, err = ioutil.ReadFile(path)
if err != nil { if err != nil {
return nil, errors.New("Config file '" + path + "' missing!") return nil, errors.New("Unable to open file: '" + path + "'")
} }
} }
if err := json.Unmarshal(data, &cfg); err != nil { if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") {
fmt.Println(err) err = yaml.Unmarshal(data, &cfg)
return nil, errors.New("Cannot parse config!") } else {
err = json.Unmarshal(data, &cfg)
} }
cfg.Logger = getLogger(cfg.LogPath) if err != nil {
logrus.Warnf("Unable to parse configuration file")
}
return &cfg, nil cfg.Monitors = make([]cachet.MonitorInterface, len(cfg.RawMonitors))
for index, rawMonitor := range cfg.RawMonitors {
var t cachet.MonitorInterface
var err error
// get default type
monType := cachet.GetMonitorType("")
if t, ok := rawMonitor["type"].(string); ok {
monType = cachet.GetMonitorType(t)
}
switch monType {
case "http":
var s cachet.HTTPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "dns":
var s cachet.DNSMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "icmp":
var s cachet.ICMPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "tcp":
var s cachet.TCPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
default:
logrus.Errorf("Invalid monitor type (index: %d) %v", index, monType)
continue
}
t.GetMonitor().Type = monType
if err != nil {
logrus.Errorf("Unable to unmarshal monitor to type (index: %d): %v", index, err)
continue
}
cfg.Monitors[index] = t
}
return &cfg, err
} }

View File

@@ -1,65 +1,89 @@
package cachet package cachet
import ( import (
"errors"
"log"
"net" "net"
"os" "os"
"strings"
"time"
"github.com/Sirupsen/logrus"
) )
type CachetMonitor struct { type CachetMonitor struct {
Logger *log.Logger `json:"-"` SystemName string `json:"system_name" yaml:"system_name"`
DateFormat string `json:"date_format" yaml:"date_format"`
API CachetAPI `json:"api"`
RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"`
APIUrl string `json:"api_url"` Monitors []MonitorInterface `json:"-" yaml:"-"`
APIToken string `json:"api_token"` Immediate bool `json:"-" yaml:"-"`
SystemName string `json:"system_name"`
LogPath string `json:"log_path"`
InsecureAPI bool `json:"insecure_api"`
Monitors []*Monitor `json:"monitors"`
} }
func (cfg *CachetMonitor) ValidateConfiguration() error { // Validate configuration
if cfg.Logger == nil { func (cfg *CachetMonitor) Validate() bool {
cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime) valid := true
}
if len(cfg.SystemName) == 0 { if len(cfg.SystemName) == 0 {
// get hostname // get hostname
cfg.SystemName = getHostname() cfg.SystemName = getHostname()
} }
if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 { if len(cfg.DateFormat) == 0 {
return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n") cfg.DateFormat = DefaultTimeFormat
}
if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 {
logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor")
valid = false
} }
if len(cfg.Monitors) == 0 { if len(cfg.Monitors) == 0 {
return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n") logrus.Warnf("No monitors defined!\nSee help for example configuration")
valid = false
} }
for _, monitor := range cfg.Monitors { for index, monitor := range cfg.Monitors {
if err := monitor.ValidateConfiguration(); err != nil { if errs := monitor.Validate(); len(errs) > 0 {
return err logrus.Warnf("Monitor validation errors (index %d): %v", index, "\n - "+strings.Join(errs, "\n - "))
valid = false
} }
} }
return nil return valid
} }
// getHostname returns id of the current system // getHostname returns id of the current system
func getHostname() string { func getHostname() string {
hostname, err := os.Hostname() hostname, err := os.Hostname()
if err != nil || len(hostname) == 0 { if err == nil && len(hostname) > 0 {
addrs, err := net.InterfaceAddrs() return hostname
if err != nil {
return "unknown"
}
for _, addr := range addrs {
return addr.String()
}
} }
return hostname addrs, err := net.InterfaceAddrs()
if err != nil || len(addrs) == 0 {
return "unknown"
}
return addrs[0].String()
}
func getMs() int64 {
return time.Now().UnixNano() / int64(time.Millisecond)
}
func GetMonitorType(t string) string {
if len(t) == 0 {
return "http"
}
return strings.ToLower(t)
}
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
return map[string]interface{}{
"SystemName": monitor.config.SystemName,
"API": monitor.config.API,
"Monitor": monitor,
"now": time.Now().Format(monitor.config.DateFormat),
}
} }

15
config_test.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
import (
"testing"
)
func TestGetMonitorType(t *testing.T) {
if monType := GetMonitorType(""); monType != "http" {
t.Error("monitor type `` should default to http")
}
if mt := GetMonitorType("HTTP"); mt != "http" {
t.Error("does not return correct monitor type")
}
}

5
dns.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type DNSMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -1,17 +1,22 @@
{ {
"api_url": "https://demo.cachethq.io/api/v1", "api": {
"api_token": "9yMHsdioQosnyVK4iCVR", "url": "https://demo.cachethq.io/api/v1",
"interval": 5, "token": "9yMHsdioQosnyVK4iCVR",
"insecure": true
},
"monitors": [ "monitors": [
{ {
"name": "nodegear frontend", "name": "google",
"url": "https://nodegear.io/ping", "url": "https://google.com",
"metric_id": 1,
"threshold": 80, "threshold": 80,
"component_id": null, "component_id": 1,
"interval": 10,
"timeout": 5,
"headers": {
"Authorization": "Basic <hash>"
},
"expected_status_code": 200, "expected_status_code": 200,
"strict_tls": true "strict_tls": true
} }
], ]
"insecure_api": false
} }

14
example.config.yml Normal file
View File

@@ -0,0 +1,14 @@
api:
url: https://demo.cachethq.io/api/v1
token: 9yMHsdioQosnyVK4iCVR
monitors:
- name: google
target: https://google.com
threshold: 80
component_id: 1
interval: 10
timeout: 5
headers:
Authorization: Basic <hash>
expected_status_code: 200
strict: true

135
http.go
View File

@@ -1,58 +1,125 @@
package cachet package cachet
import ( import (
"bytes"
"crypto/tls" "crypto/tls"
"encoding/json"
"fmt"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"regexp"
"strconv" "strconv"
"strings"
"time" "time"
) )
func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) { // Investigating template
req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody)) var defaultHTTPInvestigatingTpl = MessageTemplate{
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
req.Header.Set("Content-Type", "application/json") {{ .FailReason }}`,
req.Header.Set("X-Cachet-Token", monitor.APIToken) }
client := &http.Client{} // Fixed template
if monitor.InsecureAPI == true { var defaultHTTPFixedTpl = MessageTemplate{
client.Transport = &http.Transport{ Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, Message: `**Resolved** - {{ .now }}
- - -
{{ .incident.Message }}`,
}
type HTTPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
Method string
ExpectedStatusCode int `mapstructure:"expected_status_code"`
Headers map[string]string
// compiled to Regexp
ExpectedBody string `mapstructure:"expected_body"`
bodyRegexp *regexp.Regexp
}
// TODO: test
func (monitor *HTTPMonitor) test() bool {
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
for k, v := range monitor.Headers {
req.Header.Add(k, v)
}
transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: monitor.Strict == false}
client := &http.Client{
Timeout: time.Duration(monitor.Timeout * time.Second),
Transport: transport,
}
resp, err := client.Do(req)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
defer resp.Body.Close()
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
return false
}
if monitor.bodyRegexp != nil {
// check response body
responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
if !monitor.bodyRegexp.Match(responseBody) {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
return false
} }
} }
res, err := client.Do(req) return true
if err != nil {
return nil, []byte{}, err
}
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
return res, body, nil
} }
// SendMetric sends lag metric point // TODO: test
func (monitor *Monitor) SendMetric(delay int64) error { func (mon *HTTPMonitor) Validate() []string {
if monitor.MetricID == 0 { mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
return nil mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
errs := mon.AbstractMonitor.Validate()
if len(mon.ExpectedBody) > 0 {
exp, err := regexp.Compile(mon.ExpectedBody)
if err != nil {
errs = append(errs, "Regexp compilation failure: "+err.Error())
} else {
mon.bodyRegexp = exp
}
} }
jsonBytes, _ := json.Marshal(&map[string]interface{}{ if len(mon.ExpectedBody) == 0 && mon.ExpectedStatusCode == 0 {
"value": delay, errs = append(errs, "Both 'expected_body' and 'expected_status_code' fields empty")
})
resp, _, err := monitor.config.makeRequest("POST", "/metrics/"+strconv.Itoa(monitor.MetricID)+"/points", jsonBytes)
if err != nil || resp.StatusCode != 200 {
return fmt.Errorf("Could not log data point!\n%v\n", err)
} }
return nil mon.Method = strings.ToUpper(mon.Method)
switch mon.Method {
case "GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD":
break
case "":
mon.Method = "GET"
default:
errs = append(errs, "Unsupported HTTP method: "+mon.Method)
}
return errs
} }
func getMs() int64 { func (mon *HTTPMonitor) Describe() []string {
return time.Now().UnixNano() / int64(time.Millisecond) features := mon.AbstractMonitor.Describe()
features = append(features, "Method: "+mon.Method)
return features
} }

5
icmp.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type ICMPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -4,6 +4,8 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"strconv" "strconv"
"github.com/Sirupsen/logrus"
) )
// Incident Cachet data model // Incident Cachet data model
@@ -33,7 +35,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
} }
if err != nil { if err != nil {
cfg.Logger.Printf("cannot fetch component: %v", err) logrus.Warnf("cannot fetch component: %v", err)
} }
case 4: case 4:
// fixed // fixed
@@ -49,21 +51,19 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
jsonBytes, _ := json.Marshal(incident) jsonBytes, _ := json.Marshal(incident)
resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes) resp, body, err := cfg.API.NewRequest(requestType, requestURL, jsonBytes)
if err != nil { if err != nil {
return err return err
} }
var data struct { var data struct {
Incident struct { ID int `json:"id"`
ID int `json:"id"`
} `json:"data"`
} }
if err := json.Unmarshal(body, &data); err != nil { if err := json.Unmarshal(body.Data, &data); err != nil {
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body)) return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body.Data))
} }
incident.ID = data.Incident.ID incident.ID = data.ID
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
return fmt.Errorf("Could not create/update incident!") return fmt.Errorf("Could not create/update incident!")
} }
@@ -72,7 +72,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
} }
func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) { func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil) resp, body, err := cfg.API.NewRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
if err != nil { if err != nil {
return 0, err return 0, err
} }
@@ -82,15 +82,13 @@ func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
} }
var data struct { var data struct {
Component struct { Status int `json:"status"`
Status int `json:"status"`
} `json:"data"`
} }
if err := json.Unmarshal(body, &data); err != nil { if err := json.Unmarshal(body.Data, &data); err != nil {
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err) return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body.Data), err)
} }
return data.Component.Status, nil return data.Status, nil
} }
// SetInvestigating sets status to Investigating // SetInvestigating sets status to Investigating

View File

@@ -1,41 +1,59 @@
package cachet package cachet
import ( import (
"crypto/tls"
"errors"
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"sync" "sync"
"time" "time"
"github.com/Sirupsen/logrus"
) )
const HttpTimeout = time.Duration(time.Second) const DefaultInterval = time.Second * 60
const DefaultInterval = 60 const DefaultTimeout = time.Second
const DefaultTimeFormat = "15:04:05 Jan 2 MST" const DefaultTimeFormat = "15:04:05 Jan 2 MST"
const HistorySize = 10
// Monitor data model type MonitorInterface interface {
type Monitor struct { ClockStart(*CachetMonitor, MonitorInterface, *sync.WaitGroup)
Name string `json:"name"` ClockStop()
URL string `json:"url"` tick(MonitorInterface)
Method string `json:"method"` test() bool
StrictTLS bool `json:"strict_tls"`
CheckInterval time.Duration `json:"interval"`
MetricID int `json:"metric_id"` Validate() []string
ComponentID int `json:"component_id"` GetMonitor() *AbstractMonitor
Describe() []string
}
// Threshold = percentage // AbstractMonitor data model
Threshold float32 `json:"threshold"` type AbstractMonitor struct {
ExpectedStatusCode int `json:"expected_status_code"` Name string
// compiled to Regexp Target string
ExpectedBody string `json:"expected_body"`
bodyRegexp *regexp.Regexp // (default)http, tcp, dns, icmp
Type string
Strict bool
Interval time.Duration
Timeout time.Duration
MetricID int `mapstructure:"metric_id"`
ComponentID int `mapstructure:"component_id"`
// Templating stuff
Template struct {
Investigating MessageTemplate
Fixed MessageTemplate
}
// Threshold = percentage / number of down incidents
Threshold float32
ThresholdCount bool `mapstructure:"threshold_count"`
// lag / average(lagHistory) * 100 = percentage above average lag
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
PerformanceThreshold float32
history []bool history []bool
lagHistory []float32
lastFailReason string lastFailReason string
incident *Incident incident *Incident
config *CachetMonitor config *CachetMonitor
@@ -44,17 +62,67 @@ type Monitor struct {
stopC chan bool stopC chan bool
} }
func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) { func (mon *AbstractMonitor) Validate() []string {
errs := []string{}
if len(mon.Name) == 0 {
errs = append(errs, "Name is required")
}
if mon.Interval < 1 {
mon.Interval = DefaultInterval
}
if mon.Timeout < 1 {
mon.Timeout = DefaultTimeout
}
if mon.Timeout > mon.Interval {
errs = append(errs, "Timeout greater than interval")
}
if mon.ComponentID == 0 && mon.MetricID == 0 {
errs = append(errs, "component_id & metric_id are unset")
}
if mon.Threshold <= 0 {
mon.Threshold = 100
}
if err := mon.Template.Fixed.Compile(); err != nil {
errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
}
if err := mon.Template.Investigating.Compile(); err != nil {
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
}
return errs
}
func (mon *AbstractMonitor) GetMonitor() *AbstractMonitor {
return mon
}
func (mon *AbstractMonitor) Describe() []string {
features := []string{"Type: " + mon.Type}
if len(mon.Name) > 0 {
features = append(features, "Name: "+mon.Name)
}
return features
}
func (mon *AbstractMonitor) ClockStart(cfg *CachetMonitor, iface MonitorInterface, wg *sync.WaitGroup) {
wg.Add(1) wg.Add(1)
mon.config = cfg mon.config = cfg
mon.stopC = make(chan bool) mon.stopC = make(chan bool)
mon.Tick() if cfg.Immediate {
mon.tick(iface)
}
ticker := time.NewTicker(mon.CheckInterval * time.Second) ticker := time.NewTicker(mon.Interval * time.Second)
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
mon.Tick() mon.tick(iface)
case <-mon.stopC: case <-mon.stopC:
wg.Done() wg.Done()
return return
@@ -62,174 +130,124 @@ func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
} }
} }
func (monitor *Monitor) Stop() { func (mon *AbstractMonitor) ClockStop() {
if monitor.Stopped() {
return
}
close(monitor.stopC)
}
func (monitor *Monitor) Stopped() bool {
select { select {
case <-monitor.stopC: case <-mon.stopC:
return true return
default: default:
return false close(mon.stopC)
} }
} }
func (monitor *Monitor) Tick() { func (mon *AbstractMonitor) test() bool { return false }
// TODO: test
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
reqStart := getMs() reqStart := getMs()
isUp := monitor.doRequest() up := iface.test()
lag := getMs() - reqStart lag := getMs() - reqStart
if len(monitor.history) == 9 { histSize := HistorySize
monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name) if mon.ThresholdCount {
histSize = int(mon.Threshold)
} }
if len(monitor.history) >= 10 {
monitor.history = monitor.history[len(monitor.history)-9:]
}
monitor.history = append(monitor.history, isUp)
monitor.AnalyseData()
if isUp == true && monitor.MetricID > 0 { if len(mon.history) == histSize-1 {
monitor.SendMetric(lag) logrus.Warnf("%v is now saturated\n", mon.Name)
}
if len(mon.history) >= histSize {
mon.history = mon.history[len(mon.history)-(histSize-1):]
}
mon.history = append(mon.history, up)
mon.AnalyseData()
// report lag
if mon.MetricID > 0 {
go mon.config.API.SendMetric(mon.MetricID, lag)
} }
} }
func (monitor *Monitor) doRequest() bool { // TODO: test
client := &http.Client{
Timeout: HttpTimeout,
}
if monitor.StrictTLS == false {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
resp, err := client.Get(monitor.URL)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
defer resp.Body.Close()
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
return false
}
if monitor.bodyRegexp != nil {
// check body
responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
match := monitor.bodyRegexp.Match(responseBody)
if !match {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
}
return match
}
return true
}
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident // AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
func (monitor *Monitor) AnalyseData() { func (mon *AbstractMonitor) AnalyseData() {
// look at the past few incidents // look at the past few incidents
numDown := 0 numDown := 0
for _, wasUp := range monitor.history { for _, wasUp := range mon.history {
if wasUp == false { if wasUp == false {
numDown++ numDown++
} }
} }
t := (float32(numDown) / float32(len(monitor.history))) * 100 t := (float32(numDown) / float32(len(mon.history))) * 100
monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second)) l := logrus.WithFields(logrus.Fields{
"monitor": mon.Name,
"time": time.Now().Format(mon.config.DateFormat),
})
if numDown == 0 {
l.Printf("monitor is up")
} else if mon.ThresholdCount {
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
} else {
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
}
if len(monitor.history) != 10 { histSize := HistorySize
if mon.ThresholdCount {
histSize = int(mon.Threshold)
}
if len(mon.history) != histSize {
// not saturated // not saturated
return return
} }
if t > monitor.Threshold && monitor.incident == nil { triggered := (mon.ThresholdCount && numDown == int(mon.Threshold)) || (!mon.ThresholdCount && t > mon.Threshold)
monitor.incident = &Incident{
Name: monitor.Name + " - " + monitor.config.SystemName, if triggered && mon.incident == nil {
ComponentID: monitor.ComponentID, // create incident
Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat), tplData := getTemplateData(mon)
tplData["FailReason"] = mon.lastFailReason
subject, message := mon.Template.Investigating.Exec(tplData)
mon.incident = &Incident{
Name: subject,
ComponentID: mon.ComponentID,
Message: message,
Notify: true, Notify: true,
} }
if len(monitor.lastFailReason) > 0 {
monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`"
}
// is down, create an incident // is down, create an incident
monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason) l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
// set investigating status // set investigating status
monitor.incident.SetInvestigating() mon.incident.SetInvestigating()
// create/update incident // create/update incident
if err := monitor.incident.Send(monitor.config); err != nil { if err := mon.incident.Send(mon.config); err != nil {
monitor.config.Logger.Printf("Error sending incident: %v\n", err) l.Printf("Error sending incident: %v", err)
}
} else if t < monitor.Threshold && monitor.incident != nil {
// was down, created an incident, its now ok, make it resolved.
monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name)
// resolve incident
monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message
monitor.incident.SetFixed()
monitor.incident.Send(monitor.config)
monitor.lastFailReason = ""
monitor.incident = nil
}
}
func (monitor *Monitor) ValidateConfiguration() error {
if len(monitor.ExpectedBody) > 0 {
exp, err := regexp.Compile(monitor.ExpectedBody)
if err != nil {
return err
} }
monitor.bodyRegexp = exp return
} }
if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 { // still triggered or no incident
return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty") if triggered || mon.incident == nil {
return
} }
if monitor.CheckInterval < 1 { // was down, created an incident, its now ok, make it resolved.
monitor.CheckInterval = DefaultInterval l.Warn("Resolving incident")
// resolve incident
tplData := getTemplateData(mon)
tplData["incident"] = mon.incident
subject, message := mon.Template.Fixed.Exec(tplData)
mon.incident.Name = subject
mon.incident.Message = message
mon.incident.SetFixed()
if err := mon.incident.Send(mon.config); err != nil {
l.Printf("Error sending incident: %v", err)
} }
monitor.Method = strings.ToUpper(monitor.Method) mon.lastFailReason = ""
switch monitor.Method { mon.incident = nil
case "GET", "POST", "DELETE", "OPTIONS", "HEAD":
break
case "":
monitor.Method = "GET"
default:
return fmt.Errorf("Unsupported check method: %v", monitor.Method)
}
if monitor.ComponentID == 0 && monitor.MetricID == 0 {
return errors.New("component_id & metric_id are unset")
}
if monitor.Threshold <= 0 {
monitor.Threshold = 100
}
return nil
} }

View File

@@ -4,6 +4,7 @@ Features
-------- --------
- [x] Creates & Resolves Incidents - [x] Creates & Resolves Incidents
- [x] Check URLs by response code and/or body contents
- [x] Posts monitor lag to cachet graphs - [x] Posts monitor lag to cachet graphs
- [x] Updates Component to Partial Outage - [x] Updates Component to Partial Outage
- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring) - [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring)
@@ -27,10 +28,17 @@ Configuration
"url": "Ping URL", "url": "Ping URL",
// optional, http method (defaults GET) // optional, http method (defaults GET)
"method": "get", "method": "get",
// optional, http Headers to add (default none)
"headers": [
// specify Name and Value of Http-Header, eg. Authorization
{ "name": "Authorization", "value": "Basic <hash>" }
],
// self-signed ssl certificate // self-signed ssl certificate
"strict_tls": true, "strict_tls": true,
// seconds between checks // seconds between checks
"interval": 10, "interval": 10,
// seconds for http timeout
"timeout": 5,
// post lag to cachet metric (graph) // post lag to cachet metric (graph)
// note either metric ID or component ID are required // note either metric ID or component ID are required
"metric_id": <metric id>, "metric_id": <metric id>,
@@ -89,3 +97,27 @@ Package usage
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside. When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor) [API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
## License
MIT License
Copyright (c) 2016 Castaway Labs LLC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

15
tcp.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
type TCPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
// same as output from net.JoinHostPort
// defaults to parsed config from /etc/resolv.conf when empty
DNSServer string
// Will be converted to FQDN
Domain string
Type string
// expected answers (regex)
Expect []string
}

53
template.go Normal file
View File

@@ -0,0 +1,53 @@
package cachet
import (
"bytes"
"text/template"
)
type MessageTemplate struct {
Subject string `json:"subject"`
Message string `json:"message"`
subjectTpl *template.Template
messageTpl *template.Template
}
func (t *MessageTemplate) SetDefault(d MessageTemplate) {
if len(t.Subject) == 0 {
t.Subject = d.Subject
}
if len(t.Message) == 0 {
t.Message = d.Message
}
}
// TODO: test
func (t *MessageTemplate) Compile() error {
var err error
if len(t.Subject) > 0 {
t.subjectTpl, err = compileTemplate(t.Subject)
}
if err == nil && len(t.Message) > 0 {
t.messageTpl, err = compileTemplate(t.Message)
}
return err
}
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
return t.exec(t.subjectTpl, data), t.exec(t.messageTpl, data)
}
func (t *MessageTemplate) exec(tpl *template.Template, data interface{}) string {
buf := new(bytes.Buffer)
tpl.Execute(buf, data)
return buf.String()
}
func compileTemplate(text string) (*template.Template, error) {
return template.New("").Parse(text)
}