Compare commits
20 Commits
v2.0
...
v3-alpha-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c04128ce36 | ||
|
|
1b93730121 | ||
|
|
85d92bcb07 | ||
|
|
0dc54e4e6e | ||
|
|
b3bc1d4405 | ||
|
|
b4fa33b8ad | ||
|
|
edfd4a51e6 | ||
|
|
a2d8128109 | ||
|
|
d43eca4b7d | ||
|
|
36bf228599 | ||
|
|
0cd6fa13a7 | ||
|
|
e910807973 | ||
|
|
9b29a0450c | ||
|
|
aaecc1669a | ||
|
|
48586eb0aa | ||
|
|
2c364f3d2f | ||
|
|
0de0baf5f9 | ||
|
|
3f4b9ced77 | ||
|
|
20e4dd1414 | ||
|
|
29b02fd164 |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
gin-bin
|
/config.yml
|
||||||
example.config.local.json
|
/config.json
|
||||||
|
examples/
|
||||||
79
api.go
Normal file
79
api.go
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CachetAPI struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Token string `json:"token"`
|
||||||
|
Insecure bool `json:"insecure"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CachetResponse struct {
|
||||||
|
Data json.RawMessage `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
|
func (api CachetAPI) Ping() error {
|
||||||
|
resp, _, err := api.NewRequest("GET", "/ping", nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != 200 {
|
||||||
|
return errors.New("API Responded with non-200 status code")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendMetric adds a data point to a cachet monitor
|
||||||
|
func (api CachetAPI) SendMetric(id int, lag int64) {
|
||||||
|
logrus.Debugf("Sending lag metric ID:%d RTT %vms", id, lag)
|
||||||
|
|
||||||
|
jsonBytes, _ := json.Marshal(map[string]interface{}{
|
||||||
|
"value": lag,
|
||||||
|
"timestamp": time.Now().Unix(),
|
||||||
|
})
|
||||||
|
|
||||||
|
resp, _, err := api.NewRequest("POST", "/metrics/"+strconv.Itoa(id)+"/points", jsonBytes)
|
||||||
|
if err != nil || resp.StatusCode != 200 {
|
||||||
|
logrus.Warnf("Could not log metric! ID: %d, err: %v", id, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
|
// NewRequest wraps http.NewRequest
|
||||||
|
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
|
||||||
|
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
|
||||||
|
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("X-Cachet-Token", api.Token)
|
||||||
|
|
||||||
|
transport := http.DefaultTransport.(*http.Transport)
|
||||||
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: api.Insecure}
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: transport,
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, CachetResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var body struct {
|
||||||
|
Data json.RawMessage `json:"data"`
|
||||||
|
}
|
||||||
|
err = json.NewDecoder(res.Body).Decode(&body)
|
||||||
|
|
||||||
|
return res, body, err
|
||||||
|
}
|
||||||
189
cli/main.go
189
cli/main.go
@@ -3,105 +3,125 @@ package main
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
cachet "github.com/castawaylabs/cachet-monitor"
|
cachet "github.com/castawaylabs/cachet-monitor"
|
||||||
|
docopt "github.com/docopt/docopt-go"
|
||||||
|
"github.com/mitchellh/mapstructure"
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
var configPath string
|
const usage = `cachet-monitor
|
||||||
var systemName string
|
|
||||||
var logPath string
|
Usage:
|
||||||
|
cachet-monitor (-c PATH | --config PATH) [--log=LOGPATH] [--name=NAME] [--immediate]
|
||||||
|
cachet-monitor -h | --help | --version
|
||||||
|
cachet-monitor print-config
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
PATH path to config.json
|
||||||
|
LOGPATH path to log output (defaults to STDOUT)
|
||||||
|
NAME name of this logger
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
cachet-monitor -c /root/cachet-monitor.json
|
||||||
|
cachet-monitor -c /root/cachet-monitor.json --log=/var/log/cachet-monitor.log --name="development machine"
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-c PATH.json --config PATH Path to configuration file
|
||||||
|
-h --help Show this screen.
|
||||||
|
--version Show version
|
||||||
|
--immediate Tick immediately (by default waits for first defined interval)
|
||||||
|
print-config Print example configuration
|
||||||
|
|
||||||
|
Environment varaibles:
|
||||||
|
CACHET_API override API url from configuration
|
||||||
|
CACHET_TOKEN override API token from configuration
|
||||||
|
CACHET_DEV set to enable dev logging`
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path")
|
arguments, _ := docopt.Parse(usage, nil, true, "cachet-monitor", false)
|
||||||
flag.StringVar(&systemName, "name", "", "System Name")
|
|
||||||
flag.StringVar(&logPath, "log", "", "Log path")
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
cfg, err := getConfiguration(configPath)
|
cfg, err := getConfiguration(arguments["--config"].(string))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
logrus.Panicf("Unable to start (reading config): %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(systemName) > 0 {
|
if immediate, ok := arguments["--immediate"]; ok {
|
||||||
cfg.SystemName = systemName
|
cfg.Immediate = immediate.(bool)
|
||||||
}
|
}
|
||||||
if len(logPath) > 0 {
|
|
||||||
cfg.LogPath = logPath
|
if name := arguments["--name"]; name != nil {
|
||||||
|
cfg.SystemName = name.(string)
|
||||||
}
|
}
|
||||||
|
logrus.SetOutput(getLogger(arguments["--log"]))
|
||||||
|
|
||||||
if len(os.Getenv("CACHET_API")) > 0 {
|
if len(os.Getenv("CACHET_API")) > 0 {
|
||||||
cfg.APIUrl = os.Getenv("CACHET_API")
|
cfg.API.URL = os.Getenv("CACHET_API")
|
||||||
}
|
}
|
||||||
if len(os.Getenv("CACHET_TOKEN")) > 0 {
|
if len(os.Getenv("CACHET_TOKEN")) > 0 {
|
||||||
cfg.APIToken = os.Getenv("CACHET_TOKEN")
|
cfg.API.Token = os.Getenv("CACHET_TOKEN")
|
||||||
|
}
|
||||||
|
if len(os.Getenv("CACHET_DEV")) > 0 {
|
||||||
|
logrus.SetLevel(logrus.DebugLevel)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cfg.ValidateConfiguration(); err != nil {
|
if valid := cfg.Validate(); !valid {
|
||||||
panic(err)
|
logrus.Errorf("Invalid configuration")
|
||||||
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors))
|
logrus.Debug("Configuration valid")
|
||||||
|
logrus.Infof("System: %s", cfg.SystemName)
|
||||||
|
logrus.Infof("API: %s", cfg.API.URL)
|
||||||
|
logrus.Infof("Monitors: %d\n", len(cfg.Monitors))
|
||||||
|
|
||||||
|
logrus.Infof("Pinging cachet")
|
||||||
|
if err := cfg.API.Ping(); err != nil {
|
||||||
|
logrus.Errorf("Cannot ping cachet!\n%v", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
logrus.Infof("Ping OK")
|
||||||
|
|
||||||
wg := &sync.WaitGroup{}
|
wg := &sync.WaitGroup{}
|
||||||
for _, mon := range cfg.Monitors {
|
for index, monitor := range cfg.Monitors {
|
||||||
cfg.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL)
|
logrus.Infof("Starting Monitor #%d: ", index)
|
||||||
|
logrus.Infof("Features: \n - %v", strings.Join(monitor.Describe(), "\n - "))
|
||||||
|
|
||||||
// print features
|
go monitor.ClockStart(cfg, monitor, wg)
|
||||||
if mon.ExpectedStatusCode > 0 {
|
|
||||||
cfg.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode)
|
|
||||||
}
|
|
||||||
if len(mon.ExpectedBody) > 0 {
|
|
||||||
cfg.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody)
|
|
||||||
}
|
|
||||||
if mon.MetricID > 0 {
|
|
||||||
cfg.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID)
|
|
||||||
}
|
|
||||||
if mon.ComponentID > 0 {
|
|
||||||
cfg.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID)
|
|
||||||
}
|
|
||||||
|
|
||||||
go mon.Start(cfg, wg)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
signals := make(chan os.Signal, 1)
|
signals := make(chan os.Signal, 1)
|
||||||
signal.Notify(signals, os.Interrupt, os.Kill)
|
signal.Notify(signals, os.Interrupt, os.Kill)
|
||||||
<-signals
|
<-signals
|
||||||
|
|
||||||
cfg.Logger.Println("Abort: Waiting monitors to finish")
|
logrus.Warnf("Abort: Waiting monitors to finish")
|
||||||
for _, mon := range cfg.Monitors {
|
for _, mon := range cfg.Monitors {
|
||||||
mon.Stop()
|
mon.GetMonitor().ClockStop()
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func getLogger(logPath string) *log.Logger {
|
func getLogger(logPath interface{}) *os.File {
|
||||||
var logWriter = os.Stdout
|
if logPath == nil || len(logPath.(string)) == 0 {
|
||||||
var err error
|
return os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
if len(logPath) > 0 {
|
file, err := os.Create(logPath.(string))
|
||||||
logWriter, err = os.Create(logPath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Unable to open file '%v' for logging\n", logPath)
|
logrus.Errorf("Unable to open file '%v' for logging: \n%v", logPath, err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
flags := log.Llongfile | log.Ldate | log.Ltime
|
return file
|
||||||
if len(os.Getenv("CACHET_DEV")) > 0 {
|
|
||||||
flags = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return log.New(logWriter, "", flags)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getConfiguration(path string) (*cachet.CachetMonitor, error) {
|
func getConfiguration(path string) (*cachet.CachetMonitor, error) {
|
||||||
@@ -114,26 +134,73 @@ func getConfiguration(path string) (*cachet.CachetMonitor, error) {
|
|||||||
// download config
|
// download config
|
||||||
response, err := http.Get(path)
|
response, err := http.Get(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.New("Cannot download network config: " + err.Error())
|
logrus.Warn("Unable to download network configuration")
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer response.Body.Close()
|
defer response.Body.Close()
|
||||||
data, _ = ioutil.ReadAll(response.Body)
|
data, _ = ioutil.ReadAll(response.Body)
|
||||||
|
|
||||||
fmt.Println("Downloaded network configuration.")
|
logrus.Info("Downloaded network configuration.")
|
||||||
} else {
|
} else {
|
||||||
data, err = ioutil.ReadFile(path)
|
data, err = ioutil.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.New("Config file '" + path + "' missing!")
|
return nil, errors.New("Unable to open file: '" + path + "'")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") {
|
||||||
fmt.Println(err)
|
err = yaml.Unmarshal(data, &cfg)
|
||||||
return nil, errors.New("Cannot parse config!")
|
} else {
|
||||||
|
err = json.Unmarshal(data, &cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.Logger = getLogger(cfg.LogPath)
|
if err != nil {
|
||||||
|
logrus.Warnf("Unable to parse configuration file")
|
||||||
|
}
|
||||||
|
|
||||||
return &cfg, nil
|
cfg.Monitors = make([]cachet.MonitorInterface, len(cfg.RawMonitors))
|
||||||
|
for index, rawMonitor := range cfg.RawMonitors {
|
||||||
|
var t cachet.MonitorInterface
|
||||||
|
var err error
|
||||||
|
|
||||||
|
// get default type
|
||||||
|
monType := cachet.GetMonitorType("")
|
||||||
|
if t, ok := rawMonitor["type"].(string); ok {
|
||||||
|
monType = cachet.GetMonitorType(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch monType {
|
||||||
|
case "http":
|
||||||
|
var s cachet.HTTPMonitor
|
||||||
|
err = mapstructure.Decode(rawMonitor, &s)
|
||||||
|
t = &s
|
||||||
|
case "dns":
|
||||||
|
var s cachet.DNSMonitor
|
||||||
|
err = mapstructure.Decode(rawMonitor, &s)
|
||||||
|
t = &s
|
||||||
|
case "icmp":
|
||||||
|
var s cachet.ICMPMonitor
|
||||||
|
err = mapstructure.Decode(rawMonitor, &s)
|
||||||
|
t = &s
|
||||||
|
case "tcp":
|
||||||
|
var s cachet.TCPMonitor
|
||||||
|
err = mapstructure.Decode(rawMonitor, &s)
|
||||||
|
t = &s
|
||||||
|
default:
|
||||||
|
logrus.Errorf("Invalid monitor type (index: %d) %v", index, monType)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
t.GetMonitor().Type = monType
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Unable to unmarshal monitor to type (index: %d): %v", index, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg.Monitors[index] = t
|
||||||
|
}
|
||||||
|
|
||||||
|
return &cfg, err
|
||||||
}
|
}
|
||||||
|
|||||||
80
config.go
80
config.go
@@ -1,65 +1,89 @@
|
|||||||
package cachet
|
package cachet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"log"
|
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CachetMonitor struct {
|
type CachetMonitor struct {
|
||||||
Logger *log.Logger `json:"-"`
|
SystemName string `json:"system_name" yaml:"system_name"`
|
||||||
|
DateFormat string `json:"date_format" yaml:"date_format"`
|
||||||
|
API CachetAPI `json:"api"`
|
||||||
|
RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"`
|
||||||
|
|
||||||
APIUrl string `json:"api_url"`
|
Monitors []MonitorInterface `json:"-" yaml:"-"`
|
||||||
APIToken string `json:"api_token"`
|
Immediate bool `json:"-" yaml:"-"`
|
||||||
SystemName string `json:"system_name"`
|
|
||||||
LogPath string `json:"log_path"`
|
|
||||||
InsecureAPI bool `json:"insecure_api"`
|
|
||||||
|
|
||||||
Monitors []*Monitor `json:"monitors"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cfg *CachetMonitor) ValidateConfiguration() error {
|
// Validate configuration
|
||||||
if cfg.Logger == nil {
|
func (cfg *CachetMonitor) Validate() bool {
|
||||||
cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime)
|
valid := true
|
||||||
}
|
|
||||||
|
|
||||||
if len(cfg.SystemName) == 0 {
|
if len(cfg.SystemName) == 0 {
|
||||||
// get hostname
|
// get hostname
|
||||||
cfg.SystemName = getHostname()
|
cfg.SystemName = getHostname()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 {
|
if len(cfg.DateFormat) == 0 {
|
||||||
return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n")
|
cfg.DateFormat = DefaultTimeFormat
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 {
|
||||||
|
logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor")
|
||||||
|
valid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(cfg.Monitors) == 0 {
|
if len(cfg.Monitors) == 0 {
|
||||||
return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n")
|
logrus.Warnf("No monitors defined!\nSee help for example configuration")
|
||||||
|
valid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, monitor := range cfg.Monitors {
|
for index, monitor := range cfg.Monitors {
|
||||||
if err := monitor.ValidateConfiguration(); err != nil {
|
if errs := monitor.Validate(); len(errs) > 0 {
|
||||||
return err
|
logrus.Warnf("Monitor validation errors (index %d): %v", index, "\n - "+strings.Join(errs, "\n - "))
|
||||||
|
valid = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return valid
|
||||||
}
|
}
|
||||||
|
|
||||||
// getHostname returns id of the current system
|
// getHostname returns id of the current system
|
||||||
func getHostname() string {
|
func getHostname() string {
|
||||||
hostname, err := os.Hostname()
|
hostname, err := os.Hostname()
|
||||||
if err != nil || len(hostname) == 0 {
|
if err == nil && len(hostname) > 0 {
|
||||||
addrs, err := net.InterfaceAddrs()
|
return hostname
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
addrs, err := net.InterfaceAddrs()
|
||||||
|
if err != nil || len(addrs) == 0 {
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, addr := range addrs {
|
return addrs[0].String()
|
||||||
return addr.String()
|
}
|
||||||
}
|
|
||||||
|
func getMs() int64 {
|
||||||
|
return time.Now().UnixNano() / int64(time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetMonitorType(t string) string {
|
||||||
|
if len(t) == 0 {
|
||||||
|
return "http"
|
||||||
}
|
}
|
||||||
|
|
||||||
return hostname
|
return strings.ToLower(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
|
||||||
|
return map[string]interface{}{
|
||||||
|
"SystemName": monitor.config.SystemName,
|
||||||
|
"API": monitor.config.API,
|
||||||
|
"Monitor": monitor,
|
||||||
|
"now": time.Now().Format(monitor.config.DateFormat),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
15
config_test.go
Normal file
15
config_test.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetMonitorType(t *testing.T) {
|
||||||
|
if monType := GetMonitorType(""); monType != "http" {
|
||||||
|
t.Error("monitor type `` should default to http")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mt := GetMonitorType("HTTP"); mt != "http" {
|
||||||
|
t.Error("does not return correct monitor type")
|
||||||
|
}
|
||||||
|
}
|
||||||
5
dns.go
Normal file
5
dns.go
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
type DNSMonitor struct {
|
||||||
|
AbstractMonitor `mapstructure:",squash"`
|
||||||
|
}
|
||||||
@@ -1,17 +1,22 @@
|
|||||||
{
|
{
|
||||||
"api_url": "https://demo.cachethq.io/api/v1",
|
"api": {
|
||||||
"api_token": "9yMHsdioQosnyVK4iCVR",
|
"url": "https://demo.cachethq.io/api/v1",
|
||||||
"interval": 5,
|
"token": "9yMHsdioQosnyVK4iCVR",
|
||||||
|
"insecure": true
|
||||||
|
},
|
||||||
"monitors": [
|
"monitors": [
|
||||||
{
|
{
|
||||||
"name": "nodegear frontend",
|
"name": "google",
|
||||||
"url": "https://nodegear.io/ping",
|
"url": "https://google.com",
|
||||||
"metric_id": 1,
|
|
||||||
"threshold": 80,
|
"threshold": 80,
|
||||||
"component_id": null,
|
"component_id": 1,
|
||||||
|
"interval": 10,
|
||||||
|
"timeout": 5,
|
||||||
|
"headers": {
|
||||||
|
"Authorization": "Basic <hash>"
|
||||||
|
},
|
||||||
"expected_status_code": 200,
|
"expected_status_code": 200,
|
||||||
"strict_tls": true
|
"strict_tls": true
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"insecure_api": false
|
|
||||||
}
|
}
|
||||||
14
example.config.yml
Normal file
14
example.config.yml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
api:
|
||||||
|
url: https://demo.cachethq.io/api/v1
|
||||||
|
token: 9yMHsdioQosnyVK4iCVR
|
||||||
|
monitors:
|
||||||
|
- name: google
|
||||||
|
target: https://google.com
|
||||||
|
threshold: 80
|
||||||
|
component_id: 1
|
||||||
|
interval: 10
|
||||||
|
timeout: 5
|
||||||
|
headers:
|
||||||
|
Authorization: Basic <hash>
|
||||||
|
expected_status_code: 200
|
||||||
|
strict: true
|
||||||
133
http.go
133
http.go
@@ -1,58 +1,125 @@
|
|||||||
package cachet
|
package cachet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) {
|
// Investigating template
|
||||||
req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody))
|
var defaultHTTPInvestigatingTpl = MessageTemplate{
|
||||||
|
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||||
|
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
{{ .FailReason }}`,
|
||||||
req.Header.Set("X-Cachet-Token", monitor.APIToken)
|
}
|
||||||
|
|
||||||
client := &http.Client{}
|
// Fixed template
|
||||||
if monitor.InsecureAPI == true {
|
var defaultHTTPFixedTpl = MessageTemplate{
|
||||||
client.Transport = &http.Transport{
|
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
Message: `**Resolved** - {{ .now }}
|
||||||
}
|
|
||||||
|
- - -
|
||||||
|
|
||||||
|
{{ .incident.Message }}`,
|
||||||
|
}
|
||||||
|
|
||||||
|
type HTTPMonitor struct {
|
||||||
|
AbstractMonitor `mapstructure:",squash"`
|
||||||
|
|
||||||
|
Method string
|
||||||
|
ExpectedStatusCode int `mapstructure:"expected_status_code"`
|
||||||
|
Headers map[string]string
|
||||||
|
|
||||||
|
// compiled to Regexp
|
||||||
|
ExpectedBody string `mapstructure:"expected_body"`
|
||||||
|
bodyRegexp *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
|
func (monitor *HTTPMonitor) test() bool {
|
||||||
|
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
|
||||||
|
for k, v := range monitor.Headers {
|
||||||
|
req.Header.Add(k, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := client.Do(req)
|
transport := http.DefaultTransport.(*http.Transport)
|
||||||
|
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: monitor.Strict == false}
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: time.Duration(monitor.Timeout * time.Second),
|
||||||
|
Transport: transport,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, []byte{}, err
|
monitor.lastFailReason = err.Error()
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
defer res.Body.Close()
|
defer resp.Body.Close()
|
||||||
body, _ := ioutil.ReadAll(res.Body)
|
|
||||||
|
|
||||||
return res, body, nil
|
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
||||||
}
|
monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
|
||||||
|
return false
|
||||||
// SendMetric sends lag metric point
|
|
||||||
func (monitor *Monitor) SendMetric(delay int64) error {
|
|
||||||
if monitor.MetricID == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonBytes, _ := json.Marshal(&map[string]interface{}{
|
if monitor.bodyRegexp != nil {
|
||||||
"value": delay,
|
// check response body
|
||||||
})
|
responseBody, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
resp, _, err := monitor.config.makeRequest("POST", "/metrics/"+strconv.Itoa(monitor.MetricID)+"/points", jsonBytes)
|
monitor.lastFailReason = err.Error()
|
||||||
if err != nil || resp.StatusCode != 200 {
|
return false
|
||||||
return fmt.Errorf("Could not log data point!\n%v\n", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
if !monitor.bodyRegexp.Match(responseBody) {
|
||||||
|
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMs() int64 {
|
// TODO: test
|
||||||
return time.Now().UnixNano() / int64(time.Millisecond)
|
func (mon *HTTPMonitor) Validate() []string {
|
||||||
|
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
|
||||||
|
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
|
||||||
|
|
||||||
|
errs := mon.AbstractMonitor.Validate()
|
||||||
|
|
||||||
|
if len(mon.ExpectedBody) > 0 {
|
||||||
|
exp, err := regexp.Compile(mon.ExpectedBody)
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, "Regexp compilation failure: "+err.Error())
|
||||||
|
} else {
|
||||||
|
mon.bodyRegexp = exp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(mon.ExpectedBody) == 0 && mon.ExpectedStatusCode == 0 {
|
||||||
|
errs = append(errs, "Both 'expected_body' and 'expected_status_code' fields empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
mon.Method = strings.ToUpper(mon.Method)
|
||||||
|
switch mon.Method {
|
||||||
|
case "GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD":
|
||||||
|
break
|
||||||
|
case "":
|
||||||
|
mon.Method = "GET"
|
||||||
|
default:
|
||||||
|
errs = append(errs, "Unsupported HTTP method: "+mon.Method)
|
||||||
|
}
|
||||||
|
|
||||||
|
return errs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mon *HTTPMonitor) Describe() []string {
|
||||||
|
features := mon.AbstractMonitor.Describe()
|
||||||
|
features = append(features, "Method: "+mon.Method)
|
||||||
|
|
||||||
|
return features
|
||||||
}
|
}
|
||||||
|
|||||||
5
icmp.go
Normal file
5
icmp.go
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
type ICMPMonitor struct {
|
||||||
|
AbstractMonitor `mapstructure:",squash"`
|
||||||
|
}
|
||||||
24
incident.go
24
incident.go
@@ -4,6 +4,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Incident Cachet data model
|
// Incident Cachet data model
|
||||||
@@ -33,7 +35,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cfg.Logger.Printf("cannot fetch component: %v", err)
|
logrus.Warnf("cannot fetch component: %v", err)
|
||||||
}
|
}
|
||||||
case 4:
|
case 4:
|
||||||
// fixed
|
// fixed
|
||||||
@@ -49,21 +51,19 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
|
|||||||
|
|
||||||
jsonBytes, _ := json.Marshal(incident)
|
jsonBytes, _ := json.Marshal(incident)
|
||||||
|
|
||||||
resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes)
|
resp, body, err := cfg.API.NewRequest(requestType, requestURL, jsonBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var data struct {
|
var data struct {
|
||||||
Incident struct {
|
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
} `json:"data"`
|
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(body, &data); err != nil {
|
if err := json.Unmarshal(body.Data, &data); err != nil {
|
||||||
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body))
|
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body.Data))
|
||||||
}
|
}
|
||||||
|
|
||||||
incident.ID = data.Incident.ID
|
incident.ID = data.ID
|
||||||
if resp.StatusCode != 200 {
|
if resp.StatusCode != 200 {
|
||||||
return fmt.Errorf("Could not create/update incident!")
|
return fmt.Errorf("Could not create/update incident!")
|
||||||
}
|
}
|
||||||
@@ -72,7 +72,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
|
func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
|
||||||
resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
|
resp, body, err := cfg.API.NewRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@@ -82,15 +82,13 @@ func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var data struct {
|
var data struct {
|
||||||
Component struct {
|
|
||||||
Status int `json:"status"`
|
Status int `json:"status"`
|
||||||
} `json:"data"`
|
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(body, &data); err != nil {
|
if err := json.Unmarshal(body.Data, &data); err != nil {
|
||||||
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err)
|
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body.Data), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return data.Component.Status, nil
|
return data.Status, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetInvestigating sets status to Investigating
|
// SetInvestigating sets status to Investigating
|
||||||
|
|||||||
342
monitor.go
342
monitor.go
@@ -1,41 +1,59 @@
|
|||||||
package cachet
|
package cachet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/tls"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"net/http"
|
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/Sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
const HttpTimeout = time.Duration(time.Second)
|
const DefaultInterval = time.Second * 60
|
||||||
const DefaultInterval = 60
|
const DefaultTimeout = time.Second
|
||||||
const DefaultTimeFormat = "15:04:05 Jan 2 MST"
|
const DefaultTimeFormat = "15:04:05 Jan 2 MST"
|
||||||
|
const HistorySize = 10
|
||||||
|
|
||||||
// Monitor data model
|
type MonitorInterface interface {
|
||||||
type Monitor struct {
|
ClockStart(*CachetMonitor, MonitorInterface, *sync.WaitGroup)
|
||||||
Name string `json:"name"`
|
ClockStop()
|
||||||
URL string `json:"url"`
|
tick(MonitorInterface)
|
||||||
Method string `json:"method"`
|
test() bool
|
||||||
StrictTLS bool `json:"strict_tls"`
|
|
||||||
CheckInterval time.Duration `json:"interval"`
|
|
||||||
|
|
||||||
MetricID int `json:"metric_id"`
|
Validate() []string
|
||||||
ComponentID int `json:"component_id"`
|
GetMonitor() *AbstractMonitor
|
||||||
|
Describe() []string
|
||||||
|
}
|
||||||
|
|
||||||
// Threshold = percentage
|
// AbstractMonitor data model
|
||||||
Threshold float32 `json:"threshold"`
|
type AbstractMonitor struct {
|
||||||
ExpectedStatusCode int `json:"expected_status_code"`
|
Name string
|
||||||
// compiled to Regexp
|
Target string
|
||||||
ExpectedBody string `json:"expected_body"`
|
|
||||||
bodyRegexp *regexp.Regexp
|
// (default)http, tcp, dns, icmp
|
||||||
|
Type string
|
||||||
|
Strict bool
|
||||||
|
|
||||||
|
Interval time.Duration
|
||||||
|
Timeout time.Duration
|
||||||
|
|
||||||
|
MetricID int `mapstructure:"metric_id"`
|
||||||
|
ComponentID int `mapstructure:"component_id"`
|
||||||
|
|
||||||
|
// Templating stuff
|
||||||
|
Template struct {
|
||||||
|
Investigating MessageTemplate
|
||||||
|
Fixed MessageTemplate
|
||||||
|
}
|
||||||
|
|
||||||
|
// Threshold = percentage / number of down incidents
|
||||||
|
Threshold float32
|
||||||
|
ThresholdCount bool `mapstructure:"threshold_count"`
|
||||||
|
|
||||||
|
// lag / average(lagHistory) * 100 = percentage above average lag
|
||||||
|
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
|
||||||
|
PerformanceThreshold float32
|
||||||
|
|
||||||
history []bool
|
history []bool
|
||||||
|
lagHistory []float32
|
||||||
lastFailReason string
|
lastFailReason string
|
||||||
incident *Incident
|
incident *Incident
|
||||||
config *CachetMonitor
|
config *CachetMonitor
|
||||||
@@ -44,17 +62,67 @@ type Monitor struct {
|
|||||||
stopC chan bool
|
stopC chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
|
func (mon *AbstractMonitor) Validate() []string {
|
||||||
|
errs := []string{}
|
||||||
|
|
||||||
|
if len(mon.Name) == 0 {
|
||||||
|
errs = append(errs, "Name is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mon.Interval < 1 {
|
||||||
|
mon.Interval = DefaultInterval
|
||||||
|
}
|
||||||
|
if mon.Timeout < 1 {
|
||||||
|
mon.Timeout = DefaultTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
if mon.Timeout > mon.Interval {
|
||||||
|
errs = append(errs, "Timeout greater than interval")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mon.ComponentID == 0 && mon.MetricID == 0 {
|
||||||
|
errs = append(errs, "component_id & metric_id are unset")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mon.Threshold <= 0 {
|
||||||
|
mon.Threshold = 100
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mon.Template.Fixed.Compile(); err != nil {
|
||||||
|
errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
|
||||||
|
}
|
||||||
|
if err := mon.Template.Investigating.Compile(); err != nil {
|
||||||
|
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return errs
|
||||||
|
}
|
||||||
|
func (mon *AbstractMonitor) GetMonitor() *AbstractMonitor {
|
||||||
|
return mon
|
||||||
|
}
|
||||||
|
func (mon *AbstractMonitor) Describe() []string {
|
||||||
|
features := []string{"Type: " + mon.Type}
|
||||||
|
|
||||||
|
if len(mon.Name) > 0 {
|
||||||
|
features = append(features, "Name: "+mon.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
return features
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mon *AbstractMonitor) ClockStart(cfg *CachetMonitor, iface MonitorInterface, wg *sync.WaitGroup) {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
mon.config = cfg
|
mon.config = cfg
|
||||||
mon.stopC = make(chan bool)
|
mon.stopC = make(chan bool)
|
||||||
mon.Tick()
|
if cfg.Immediate {
|
||||||
|
mon.tick(iface)
|
||||||
|
}
|
||||||
|
|
||||||
ticker := time.NewTicker(mon.CheckInterval * time.Second)
|
ticker := time.NewTicker(mon.Interval * time.Second)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
mon.Tick()
|
mon.tick(iface)
|
||||||
case <-mon.stopC:
|
case <-mon.stopC:
|
||||||
wg.Done()
|
wg.Done()
|
||||||
return
|
return
|
||||||
@@ -62,174 +130,124 @@ func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) Stop() {
|
func (mon *AbstractMonitor) ClockStop() {
|
||||||
if monitor.Stopped() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
close(monitor.stopC)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (monitor *Monitor) Stopped() bool {
|
|
||||||
select {
|
select {
|
||||||
case <-monitor.stopC:
|
case <-mon.stopC:
|
||||||
return true
|
return
|
||||||
default:
|
default:
|
||||||
return false
|
close(mon.stopC)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) Tick() {
|
func (mon *AbstractMonitor) test() bool { return false }
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
|
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
||||||
reqStart := getMs()
|
reqStart := getMs()
|
||||||
isUp := monitor.doRequest()
|
up := iface.test()
|
||||||
lag := getMs() - reqStart
|
lag := getMs() - reqStart
|
||||||
|
|
||||||
if len(monitor.history) == 9 {
|
histSize := HistorySize
|
||||||
monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name)
|
if mon.ThresholdCount {
|
||||||
|
histSize = int(mon.Threshold)
|
||||||
}
|
}
|
||||||
if len(monitor.history) >= 10 {
|
|
||||||
monitor.history = monitor.history[len(monitor.history)-9:]
|
|
||||||
}
|
|
||||||
monitor.history = append(monitor.history, isUp)
|
|
||||||
monitor.AnalyseData()
|
|
||||||
|
|
||||||
if isUp == true && monitor.MetricID > 0 {
|
if len(mon.history) == histSize-1 {
|
||||||
monitor.SendMetric(lag)
|
logrus.Warnf("%v is now saturated\n", mon.Name)
|
||||||
|
}
|
||||||
|
if len(mon.history) >= histSize {
|
||||||
|
mon.history = mon.history[len(mon.history)-(histSize-1):]
|
||||||
|
}
|
||||||
|
mon.history = append(mon.history, up)
|
||||||
|
mon.AnalyseData()
|
||||||
|
|
||||||
|
// report lag
|
||||||
|
if mon.MetricID > 0 {
|
||||||
|
go mon.config.API.SendMetric(mon.MetricID, lag)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) doRequest() bool {
|
// TODO: test
|
||||||
client := &http.Client{
|
|
||||||
Timeout: HttpTimeout,
|
|
||||||
}
|
|
||||||
if monitor.StrictTLS == false {
|
|
||||||
client.Transport = &http.Transport{
|
|
||||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, err := client.Get(monitor.URL)
|
|
||||||
if err != nil {
|
|
||||||
monitor.lastFailReason = err.Error()
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
|
||||||
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if monitor.bodyRegexp != nil {
|
|
||||||
// check body
|
|
||||||
responseBody, err := ioutil.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
monitor.lastFailReason = err.Error()
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
match := monitor.bodyRegexp.Match(responseBody)
|
|
||||||
if !match {
|
|
||||||
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
|
|
||||||
}
|
|
||||||
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
||||||
func (monitor *Monitor) AnalyseData() {
|
func (mon *AbstractMonitor) AnalyseData() {
|
||||||
// look at the past few incidents
|
// look at the past few incidents
|
||||||
numDown := 0
|
numDown := 0
|
||||||
for _, wasUp := range monitor.history {
|
for _, wasUp := range mon.history {
|
||||||
if wasUp == false {
|
if wasUp == false {
|
||||||
numDown++
|
numDown++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t := (float32(numDown) / float32(len(monitor.history))) * 100
|
t := (float32(numDown) / float32(len(mon.history))) * 100
|
||||||
monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second))
|
l := logrus.WithFields(logrus.Fields{
|
||||||
|
"monitor": mon.Name,
|
||||||
|
"time": time.Now().Format(mon.config.DateFormat),
|
||||||
|
})
|
||||||
|
if numDown == 0 {
|
||||||
|
l.Printf("monitor is up")
|
||||||
|
} else if mon.ThresholdCount {
|
||||||
|
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
|
||||||
|
} else {
|
||||||
|
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
|
||||||
|
}
|
||||||
|
|
||||||
if len(monitor.history) != 10 {
|
histSize := HistorySize
|
||||||
|
if mon.ThresholdCount {
|
||||||
|
histSize = int(mon.Threshold)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(mon.history) != histSize {
|
||||||
// not saturated
|
// not saturated
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if t > monitor.Threshold && monitor.incident == nil {
|
triggered := (mon.ThresholdCount && numDown == int(mon.Threshold)) || (!mon.ThresholdCount && t > mon.Threshold)
|
||||||
monitor.incident = &Incident{
|
|
||||||
Name: monitor.Name + " - " + monitor.config.SystemName,
|
if triggered && mon.incident == nil {
|
||||||
ComponentID: monitor.ComponentID,
|
// create incident
|
||||||
Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat),
|
tplData := getTemplateData(mon)
|
||||||
|
tplData["FailReason"] = mon.lastFailReason
|
||||||
|
|
||||||
|
subject, message := mon.Template.Investigating.Exec(tplData)
|
||||||
|
mon.incident = &Incident{
|
||||||
|
Name: subject,
|
||||||
|
ComponentID: mon.ComponentID,
|
||||||
|
Message: message,
|
||||||
Notify: true,
|
Notify: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(monitor.lastFailReason) > 0 {
|
// is down, create an incident
|
||||||
monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`"
|
l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
|
||||||
|
// set investigating status
|
||||||
|
mon.incident.SetInvestigating()
|
||||||
|
// create/update incident
|
||||||
|
if err := mon.incident.Send(mon.config); err != nil {
|
||||||
|
l.Printf("Error sending incident: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// is down, create an incident
|
return
|
||||||
monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason)
|
|
||||||
// set investigating status
|
|
||||||
monitor.incident.SetInvestigating()
|
|
||||||
// create/update incident
|
|
||||||
if err := monitor.incident.Send(monitor.config); err != nil {
|
|
||||||
monitor.config.Logger.Printf("Error sending incident: %v\n", err)
|
|
||||||
}
|
}
|
||||||
} else if t < monitor.Threshold && monitor.incident != nil {
|
|
||||||
|
// still triggered or no incident
|
||||||
|
if triggered || mon.incident == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// was down, created an incident, its now ok, make it resolved.
|
// was down, created an incident, its now ok, make it resolved.
|
||||||
monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name)
|
l.Warn("Resolving incident")
|
||||||
|
|
||||||
// resolve incident
|
// resolve incident
|
||||||
monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message
|
tplData := getTemplateData(mon)
|
||||||
monitor.incident.SetFixed()
|
tplData["incident"] = mon.incident
|
||||||
monitor.incident.Send(monitor.config)
|
|
||||||
|
|
||||||
monitor.lastFailReason = ""
|
subject, message := mon.Template.Fixed.Exec(tplData)
|
||||||
monitor.incident = nil
|
mon.incident.Name = subject
|
||||||
|
mon.incident.Message = message
|
||||||
|
mon.incident.SetFixed()
|
||||||
|
if err := mon.incident.Send(mon.config); err != nil {
|
||||||
|
l.Printf("Error sending incident: %v", err)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
mon.lastFailReason = ""
|
||||||
func (monitor *Monitor) ValidateConfiguration() error {
|
mon.incident = nil
|
||||||
if len(monitor.ExpectedBody) > 0 {
|
|
||||||
exp, err := regexp.Compile(monitor.ExpectedBody)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
monitor.bodyRegexp = exp
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 {
|
|
||||||
return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty")
|
|
||||||
}
|
|
||||||
|
|
||||||
if monitor.CheckInterval < 1 {
|
|
||||||
monitor.CheckInterval = DefaultInterval
|
|
||||||
}
|
|
||||||
|
|
||||||
monitor.Method = strings.ToUpper(monitor.Method)
|
|
||||||
switch monitor.Method {
|
|
||||||
case "GET", "POST", "DELETE", "OPTIONS", "HEAD":
|
|
||||||
break
|
|
||||||
case "":
|
|
||||||
monitor.Method = "GET"
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("Unsupported check method: %v", monitor.Method)
|
|
||||||
}
|
|
||||||
|
|
||||||
if monitor.ComponentID == 0 && monitor.MetricID == 0 {
|
|
||||||
return errors.New("component_id & metric_id are unset")
|
|
||||||
}
|
|
||||||
|
|
||||||
if monitor.Threshold <= 0 {
|
|
||||||
monitor.Threshold = 100
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
32
readme.md
32
readme.md
@@ -4,6 +4,7 @@ Features
|
|||||||
--------
|
--------
|
||||||
|
|
||||||
- [x] Creates & Resolves Incidents
|
- [x] Creates & Resolves Incidents
|
||||||
|
- [x] Check URLs by response code and/or body contents
|
||||||
- [x] Posts monitor lag to cachet graphs
|
- [x] Posts monitor lag to cachet graphs
|
||||||
- [x] Updates Component to Partial Outage
|
- [x] Updates Component to Partial Outage
|
||||||
- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring)
|
- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring)
|
||||||
@@ -27,10 +28,17 @@ Configuration
|
|||||||
"url": "Ping URL",
|
"url": "Ping URL",
|
||||||
// optional, http method (defaults GET)
|
// optional, http method (defaults GET)
|
||||||
"method": "get",
|
"method": "get",
|
||||||
|
// optional, http Headers to add (default none)
|
||||||
|
"headers": [
|
||||||
|
// specify Name and Value of Http-Header, eg. Authorization
|
||||||
|
{ "name": "Authorization", "value": "Basic <hash>" }
|
||||||
|
],
|
||||||
// self-signed ssl certificate
|
// self-signed ssl certificate
|
||||||
"strict_tls": true,
|
"strict_tls": true,
|
||||||
// seconds between checks
|
// seconds between checks
|
||||||
"interval": 10,
|
"interval": 10,
|
||||||
|
// seconds for http timeout
|
||||||
|
"timeout": 5,
|
||||||
// post lag to cachet metric (graph)
|
// post lag to cachet metric (graph)
|
||||||
// note either metric ID or component ID are required
|
// note either metric ID or component ID are required
|
||||||
"metric_id": <metric id>,
|
"metric_id": <metric id>,
|
||||||
@@ -89,3 +97,27 @@ Package usage
|
|||||||
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
|
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
|
||||||
|
|
||||||
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
|
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2016 Castaway Labs LLC
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
15
tcp.go
Normal file
15
tcp.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
type TCPMonitor struct {
|
||||||
|
AbstractMonitor `mapstructure:",squash"`
|
||||||
|
|
||||||
|
// same as output from net.JoinHostPort
|
||||||
|
// defaults to parsed config from /etc/resolv.conf when empty
|
||||||
|
DNSServer string
|
||||||
|
|
||||||
|
// Will be converted to FQDN
|
||||||
|
Domain string
|
||||||
|
Type string
|
||||||
|
// expected answers (regex)
|
||||||
|
Expect []string
|
||||||
|
}
|
||||||
53
template.go
Normal file
53
template.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"text/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MessageTemplate struct {
|
||||||
|
Subject string `json:"subject"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
|
||||||
|
subjectTpl *template.Template
|
||||||
|
messageTpl *template.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *MessageTemplate) SetDefault(d MessageTemplate) {
|
||||||
|
if len(t.Subject) == 0 {
|
||||||
|
t.Subject = d.Subject
|
||||||
|
}
|
||||||
|
if len(t.Message) == 0 {
|
||||||
|
t.Message = d.Message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
|
func (t *MessageTemplate) Compile() error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if len(t.Subject) > 0 {
|
||||||
|
t.subjectTpl, err = compileTemplate(t.Subject)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil && len(t.Message) > 0 {
|
||||||
|
t.messageTpl, err = compileTemplate(t.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
|
||||||
|
return t.exec(t.subjectTpl, data), t.exec(t.messageTpl, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *MessageTemplate) exec(tpl *template.Template, data interface{}) string {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
|
||||||
|
tpl.Execute(buf, data)
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func compileTemplate(text string) (*template.Template, error) {
|
||||||
|
return template.New("").Parse(text)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user