21 Commits

Author SHA1 Message Date
Matej Kramny
c04128ce36 compile message for Fixed status
- better logging
2017-02-06 23:05:39 -08:00
Matej Kramny
1b93730121 compile template message 2017-02-06 21:56:08 -08:00
Matej Kramny
85d92bcb07 rename monitor -> mon 2017-02-06 10:57:02 -08:00
Matej Kramny
0dc54e4e6e - customisable time format
- custom messages
- configure threshold count instead of uptime %
2017-02-05 23:43:38 -08:00
Matej Kramny
b3bc1d4405 - compile message templates
- send metrics to cachet
- fix http default configuration
2017-02-05 19:27:01 -08:00
Matej Kramny
b4fa33b8ad - immediate tick flag
- reword Start -> ClockStart etc
2017-02-04 22:40:31 -08:00
Matej Kramny
edfd4a51e6 Print monitor features 2017-02-04 22:15:24 -08:00
Matej Kramny
a2d8128109 huuman friendly config! 2017-02-04 21:49:13 -08:00
Matej Kramny
d43eca4b7d - yaml & json supported 2017-02-04 21:48:27 -08:00
Matej Kramny
36bf228599 a compiling proof of concept
- abstract type
- http, tcp, icmp & dns monitor types
- unmarshal from json into any monitor type
2017-02-04 18:23:53 -08:00
Matej Kramny
0cd6fa13a7 Merge branch 'master' into v3
# Conflicts:
#	cli/main.go
2017-02-04 16:12:42 -08:00
Matej Kramny
e910807973 basic refactor + new prototype 2017-02-04 16:02:22 -08:00
Matej Kramny
9b29a0450c Merge pull request #40 from to-kn/add_http_header_support
Add http header support
2017-02-04 14:45:21 -08:00
Matej Kramny
aaecc1669a Merge pull request #41 from yacloud-io/master
Support making request with proxy
2016-07-27 16:30:15 +01:00
Yi Tao Jiang
48586eb0aa Support making request with proxy 2016-07-27 23:24:54 +08:00
Tobias Knipping
2c364f3d2f add support for specifying http-headers and really use Method spezified 2016-07-24 16:34:30 +02:00
Matej Kramny
0de0baf5f9 Merge pull request #31 from faizshukri/fix/timeout-exceed
Timeout exceeded while awaiting headers
2016-06-22 15:59:54 +01:00
Faiz Shukri
3f4b9ced77 Add timeout customization 2016-06-16 13:38:51 +08:00
Matej Kramny
20e4dd1414 Add to readme 2016-05-19 19:43:01 +01:00
Matej Kramny
29b02fd164 Update example JSON 2016-05-19 19:41:19 +01:00
Matej Kramny
5c4f0c2e69 Print descriptions from cli 2016-05-19 19:34:36 +01:00
15 changed files with 712 additions and 315 deletions

5
.gitignore vendored
View File

@@ -1,2 +1,3 @@
gin-bin
example.config.local.json
/config.yml
/config.json
examples/

79
api.go Normal file
View File

@@ -0,0 +1,79 @@
package cachet
import (
"bytes"
"crypto/tls"
"encoding/json"
"errors"
"net/http"
"strconv"
"time"
"github.com/Sirupsen/logrus"
)
type CachetAPI struct {
URL string `json:"url"`
Token string `json:"token"`
Insecure bool `json:"insecure"`
}
type CachetResponse struct {
Data json.RawMessage `json:"data"`
}
// TODO: test
func (api CachetAPI) Ping() error {
resp, _, err := api.NewRequest("GET", "/ping", nil)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return errors.New("API Responded with non-200 status code")
}
return nil
}
// SendMetric adds a data point to a cachet monitor
func (api CachetAPI) SendMetric(id int, lag int64) {
logrus.Debugf("Sending lag metric ID:%d RTT %vms", id, lag)
jsonBytes, _ := json.Marshal(map[string]interface{}{
"value": lag,
"timestamp": time.Now().Unix(),
})
resp, _, err := api.NewRequest("POST", "/metrics/"+strconv.Itoa(id)+"/points", jsonBytes)
if err != nil || resp.StatusCode != 200 {
logrus.Warnf("Could not log metric! ID: %d, err: %v", id, err)
}
}
// TODO: test
// NewRequest wraps http.NewRequest
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Cachet-Token", api.Token)
transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: api.Insecure}
client := &http.Client{
Transport: transport,
}
res, err := client.Do(req)
if err != nil {
return nil, CachetResponse{}, err
}
var body struct {
Data json.RawMessage `json:"data"`
}
err = json.NewDecoder(res.Body).Decode(&body)
return res, body, err
}

View File

@@ -3,89 +3,125 @@ package main
import (
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"os/signal"
"strings"
"sync"
"github.com/Sirupsen/logrus"
cachet "github.com/castawaylabs/cachet-monitor"
docopt "github.com/docopt/docopt-go"
"github.com/mitchellh/mapstructure"
"gopkg.in/yaml.v2"
)
var configPath string
var systemName string
var logPath string
const usage = `cachet-monitor
Usage:
cachet-monitor (-c PATH | --config PATH) [--log=LOGPATH] [--name=NAME] [--immediate]
cachet-monitor -h | --help | --version
cachet-monitor print-config
Arguments:
PATH path to config.json
LOGPATH path to log output (defaults to STDOUT)
NAME name of this logger
Examples:
cachet-monitor -c /root/cachet-monitor.json
cachet-monitor -c /root/cachet-monitor.json --log=/var/log/cachet-monitor.log --name="development machine"
Options:
-c PATH.json --config PATH Path to configuration file
-h --help Show this screen.
--version Show version
--immediate Tick immediately (by default waits for first defined interval)
print-config Print example configuration
Environment varaibles:
CACHET_API override API url from configuration
CACHET_TOKEN override API token from configuration
CACHET_DEV set to enable dev logging`
func main() {
flag.StringVar(&configPath, "c", "/etc/cachet-monitor.config.json", "Config path")
flag.StringVar(&systemName, "name", "", "System Name")
flag.StringVar(&logPath, "log", "", "Log path")
flag.Parse()
arguments, _ := docopt.Parse(usage, nil, true, "cachet-monitor", false)
cfg, err := getConfiguration(configPath)
cfg, err := getConfiguration(arguments["--config"].(string))
if err != nil {
panic(err)
logrus.Panicf("Unable to start (reading config): %v", err)
}
if len(systemName) > 0 {
cfg.SystemName = systemName
if immediate, ok := arguments["--immediate"]; ok {
cfg.Immediate = immediate.(bool)
}
if len(logPath) > 0 {
cfg.LogPath = logPath
if name := arguments["--name"]; name != nil {
cfg.SystemName = name.(string)
}
logrus.SetOutput(getLogger(arguments["--log"]))
if len(os.Getenv("CACHET_API")) > 0 {
cfg.APIUrl = os.Getenv("CACHET_API")
cfg.API.URL = os.Getenv("CACHET_API")
}
if len(os.Getenv("CACHET_TOKEN")) > 0 {
cfg.APIToken = os.Getenv("CACHET_TOKEN")
cfg.API.Token = os.Getenv("CACHET_TOKEN")
}
if len(os.Getenv("CACHET_DEV")) > 0 {
logrus.SetLevel(logrus.DebugLevel)
}
if err := cfg.ValidateConfiguration(); err != nil {
panic(err)
if valid := cfg.Validate(); !valid {
logrus.Errorf("Invalid configuration")
os.Exit(1)
}
cfg.Logger.Printf("System: %s\nAPI: %s\nMonitors: %d\n\n", cfg.SystemName, cfg.APIUrl, len(cfg.Monitors))
logrus.Debug("Configuration valid")
logrus.Infof("System: %s", cfg.SystemName)
logrus.Infof("API: %s", cfg.API.URL)
logrus.Infof("Monitors: %d\n", len(cfg.Monitors))
logrus.Infof("Pinging cachet")
if err := cfg.API.Ping(); err != nil {
logrus.Errorf("Cannot ping cachet!\n%v", err)
os.Exit(1)
}
logrus.Infof("Ping OK")
wg := &sync.WaitGroup{}
for _, mon := range cfg.Monitors {
go mon.Start(cfg, wg)
for index, monitor := range cfg.Monitors {
logrus.Infof("Starting Monitor #%d: ", index)
logrus.Infof("Features: \n - %v", strings.Join(monitor.Describe(), "\n - "))
go monitor.ClockStart(cfg, monitor, wg)
}
signals := make(chan os.Signal, 1)
signal.Notify(signals, os.Interrupt, os.Kill)
<-signals
cfg.Logger.Println("Abort: Waiting monitors to finish")
logrus.Warnf("Abort: Waiting monitors to finish")
for _, mon := range cfg.Monitors {
mon.Stop()
mon.GetMonitor().ClockStop()
}
wg.Wait()
}
func getLogger(logPath string) *log.Logger {
var logWriter = os.Stdout
var err error
if len(logPath) > 0 {
logWriter, err = os.Create(logPath)
if err != nil {
fmt.Printf("Unable to open file '%v' for logging\n", logPath)
os.Exit(1)
}
func getLogger(logPath interface{}) *os.File {
if logPath == nil || len(logPath.(string)) == 0 {
return os.Stdout
}
flags := log.Llongfile | log.Ldate | log.Ltime
if len(os.Getenv("CACHET_DEV")) > 0 {
flags = 0
file, err := os.Create(logPath.(string))
if err != nil {
logrus.Errorf("Unable to open file '%v' for logging: \n%v", logPath, err)
os.Exit(1)
}
return log.New(logWriter, "", flags)
return file
}
func getConfiguration(path string) (*cachet.CachetMonitor, error) {
@@ -98,26 +134,73 @@ func getConfiguration(path string) (*cachet.CachetMonitor, error) {
// download config
response, err := http.Get(path)
if err != nil {
return nil, errors.New("Cannot download network config: " + err.Error())
logrus.Warn("Unable to download network configuration")
return nil, err
}
defer response.Body.Close()
data, _ = ioutil.ReadAll(response.Body)
fmt.Println("Downloaded network configuration.")
logrus.Info("Downloaded network configuration.")
} else {
data, err = ioutil.ReadFile(path)
if err != nil {
return nil, errors.New("Config file '" + path + "' missing!")
return nil, errors.New("Unable to open file: '" + path + "'")
}
}
if err := json.Unmarshal(data, &cfg); err != nil {
fmt.Println(err)
return nil, errors.New("Cannot parse config!")
if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") {
err = yaml.Unmarshal(data, &cfg)
} else {
err = json.Unmarshal(data, &cfg)
}
cfg.Logger = getLogger(cfg.LogPath)
if err != nil {
logrus.Warnf("Unable to parse configuration file")
}
return &cfg, nil
cfg.Monitors = make([]cachet.MonitorInterface, len(cfg.RawMonitors))
for index, rawMonitor := range cfg.RawMonitors {
var t cachet.MonitorInterface
var err error
// get default type
monType := cachet.GetMonitorType("")
if t, ok := rawMonitor["type"].(string); ok {
monType = cachet.GetMonitorType(t)
}
switch monType {
case "http":
var s cachet.HTTPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "dns":
var s cachet.DNSMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "icmp":
var s cachet.ICMPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
case "tcp":
var s cachet.TCPMonitor
err = mapstructure.Decode(rawMonitor, &s)
t = &s
default:
logrus.Errorf("Invalid monitor type (index: %d) %v", index, monType)
continue
}
t.GetMonitor().Type = monType
if err != nil {
logrus.Errorf("Unable to unmarshal monitor to type (index: %d): %v", index, err)
continue
}
cfg.Monitors[index] = t
}
return &cfg, err
}

View File

@@ -1,65 +1,89 @@
package cachet
import (
"errors"
"log"
"net"
"os"
"strings"
"time"
"github.com/Sirupsen/logrus"
)
type CachetMonitor struct {
Logger *log.Logger `json:"-"`
SystemName string `json:"system_name" yaml:"system_name"`
DateFormat string `json:"date_format" yaml:"date_format"`
API CachetAPI `json:"api"`
RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"`
APIUrl string `json:"api_url"`
APIToken string `json:"api_token"`
SystemName string `json:"system_name"`
LogPath string `json:"log_path"`
InsecureAPI bool `json:"insecure_api"`
Monitors []*Monitor `json:"monitors"`
Monitors []MonitorInterface `json:"-" yaml:"-"`
Immediate bool `json:"-" yaml:"-"`
}
func (cfg *CachetMonitor) ValidateConfiguration() error {
if cfg.Logger == nil {
cfg.Logger = log.New(os.Stdout, "", log.Llongfile|log.Ldate|log.Ltime)
}
// Validate configuration
func (cfg *CachetMonitor) Validate() bool {
valid := true
if len(cfg.SystemName) == 0 {
// get hostname
cfg.SystemName = getHostname()
}
if len(cfg.APIToken) == 0 || len(cfg.APIUrl) == 0 {
return errors.New("API URL or API Token not set. cachet-monitor won't be able to report incidents.\n\nPlease set:\n CACHET_API and CACHET_TOKEN environment variable to override settings.\n\nGet help at https://github.com/castawaylabs/cachet-monitor\n")
if len(cfg.DateFormat) == 0 {
cfg.DateFormat = DefaultTimeFormat
}
if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 {
logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor")
valid = false
}
if len(cfg.Monitors) == 0 {
return errors.New("No monitors defined!\nSee sample configuration: https://github.com/castawaylabs/cachet-monitor/blob/master/example.config.json\n")
logrus.Warnf("No monitors defined!\nSee help for example configuration")
valid = false
}
for _, monitor := range cfg.Monitors {
if err := monitor.ValidateConfiguration(); err != nil {
return err
for index, monitor := range cfg.Monitors {
if errs := monitor.Validate(); len(errs) > 0 {
logrus.Warnf("Monitor validation errors (index %d): %v", index, "\n - "+strings.Join(errs, "\n - "))
valid = false
}
}
return nil
return valid
}
// getHostname returns id of the current system
func getHostname() string {
hostname, err := os.Hostname()
if err != nil || len(hostname) == 0 {
addrs, err := net.InterfaceAddrs()
if err != nil {
return "unknown"
}
for _, addr := range addrs {
return addr.String()
}
if err == nil && len(hostname) > 0 {
return hostname
}
return hostname
addrs, err := net.InterfaceAddrs()
if err != nil || len(addrs) == 0 {
return "unknown"
}
return addrs[0].String()
}
func getMs() int64 {
return time.Now().UnixNano() / int64(time.Millisecond)
}
func GetMonitorType(t string) string {
if len(t) == 0 {
return "http"
}
return strings.ToLower(t)
}
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
return map[string]interface{}{
"SystemName": monitor.config.SystemName,
"API": monitor.config.API,
"Monitor": monitor,
"now": time.Now().Format(monitor.config.DateFormat),
}
}

15
config_test.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
import (
"testing"
)
func TestGetMonitorType(t *testing.T) {
if monType := GetMonitorType(""); monType != "http" {
t.Error("monitor type `` should default to http")
}
if mt := GetMonitorType("HTTP"); mt != "http" {
t.Error("does not return correct monitor type")
}
}

5
dns.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type DNSMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -1,17 +1,22 @@
{
"api_url": "https://demo.cachethq.io/api/v1",
"api_token": "9yMHsdioQosnyVK4iCVR",
"interval": 5,
"api": {
"url": "https://demo.cachethq.io/api/v1",
"token": "9yMHsdioQosnyVK4iCVR",
"insecure": true
},
"monitors": [
{
"name": "nodegear frontend",
"url": "https://nodegear.io/ping",
"metric_id": 1,
"name": "google",
"url": "https://google.com",
"threshold": 80,
"component_id": null,
"component_id": 1,
"interval": 10,
"timeout": 5,
"headers": {
"Authorization": "Basic <hash>"
},
"expected_status_code": 200,
"strict_tls": true
}
],
"insecure_api": false
}
]
}

14
example.config.yml Normal file
View File

@@ -0,0 +1,14 @@
api:
url: https://demo.cachethq.io/api/v1
token: 9yMHsdioQosnyVK4iCVR
monitors:
- name: google
target: https://google.com
threshold: 80
component_id: 1
interval: 10
timeout: 5
headers:
Authorization: Basic <hash>
expected_status_code: 200
strict: true

135
http.go
View File

@@ -1,58 +1,125 @@
package cachet
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"time"
)
func (monitor *CachetMonitor) makeRequest(requestType string, url string, reqBody []byte) (*http.Response, []byte, error) {
req, err := http.NewRequest(requestType, monitor.APIUrl+url, bytes.NewBuffer(reqBody))
// Investigating template
var defaultHTTPInvestigatingTpl = MessageTemplate{
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Cachet-Token", monitor.APIToken)
{{ .FailReason }}`,
}
client := &http.Client{}
if monitor.InsecureAPI == true {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
// Fixed template
var defaultHTTPFixedTpl = MessageTemplate{
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
Message: `**Resolved** - {{ .now }}
- - -
{{ .incident.Message }}`,
}
type HTTPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
Method string
ExpectedStatusCode int `mapstructure:"expected_status_code"`
Headers map[string]string
// compiled to Regexp
ExpectedBody string `mapstructure:"expected_body"`
bodyRegexp *regexp.Regexp
}
// TODO: test
func (monitor *HTTPMonitor) test() bool {
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
for k, v := range monitor.Headers {
req.Header.Add(k, v)
}
transport := http.DefaultTransport.(*http.Transport)
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: monitor.Strict == false}
client := &http.Client{
Timeout: time.Duration(monitor.Timeout * time.Second),
Transport: transport,
}
resp, err := client.Do(req)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
defer resp.Body.Close()
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
return false
}
if monitor.bodyRegexp != nil {
// check response body
responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
if !monitor.bodyRegexp.Match(responseBody) {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
return false
}
}
res, err := client.Do(req)
if err != nil {
return nil, []byte{}, err
}
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
return res, body, nil
return true
}
// SendMetric sends lag metric point
func (monitor *Monitor) SendMetric(delay int64) error {
if monitor.MetricID == 0 {
return nil
// TODO: test
func (mon *HTTPMonitor) Validate() []string {
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
errs := mon.AbstractMonitor.Validate()
if len(mon.ExpectedBody) > 0 {
exp, err := regexp.Compile(mon.ExpectedBody)
if err != nil {
errs = append(errs, "Regexp compilation failure: "+err.Error())
} else {
mon.bodyRegexp = exp
}
}
jsonBytes, _ := json.Marshal(&map[string]interface{}{
"value": delay,
})
resp, _, err := monitor.config.makeRequest("POST", "/metrics/"+strconv.Itoa(monitor.MetricID)+"/points", jsonBytes)
if err != nil || resp.StatusCode != 200 {
return fmt.Errorf("Could not log data point!\n%v\n", err)
if len(mon.ExpectedBody) == 0 && mon.ExpectedStatusCode == 0 {
errs = append(errs, "Both 'expected_body' and 'expected_status_code' fields empty")
}
return nil
mon.Method = strings.ToUpper(mon.Method)
switch mon.Method {
case "GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD":
break
case "":
mon.Method = "GET"
default:
errs = append(errs, "Unsupported HTTP method: "+mon.Method)
}
return errs
}
func getMs() int64 {
return time.Now().UnixNano() / int64(time.Millisecond)
func (mon *HTTPMonitor) Describe() []string {
features := mon.AbstractMonitor.Describe()
features = append(features, "Method: "+mon.Method)
return features
}

5
icmp.go Normal file
View File

@@ -0,0 +1,5 @@
package cachet
type ICMPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
}

View File

@@ -4,6 +4,8 @@ import (
"encoding/json"
"fmt"
"strconv"
"github.com/Sirupsen/logrus"
)
// Incident Cachet data model
@@ -33,7 +35,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
}
if err != nil {
cfg.Logger.Printf("cannot fetch component: %v", err)
logrus.Warnf("cannot fetch component: %v", err)
}
case 4:
// fixed
@@ -49,21 +51,19 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
jsonBytes, _ := json.Marshal(incident)
resp, body, err := cfg.makeRequest(requestType, requestURL, jsonBytes)
resp, body, err := cfg.API.NewRequest(requestType, requestURL, jsonBytes)
if err != nil {
return err
}
var data struct {
Incident struct {
ID int `json:"id"`
} `json:"data"`
ID int `json:"id"`
}
if err := json.Unmarshal(body, &data); err != nil {
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body))
if err := json.Unmarshal(body.Data, &data); err != nil {
return fmt.Errorf("Cannot parse incident body: %v, %v", err, string(body.Data))
}
incident.ID = data.Incident.ID
incident.ID = data.ID
if resp.StatusCode != 200 {
return fmt.Errorf("Could not create/update incident!")
}
@@ -72,7 +72,7 @@ func (incident *Incident) Send(cfg *CachetMonitor) error {
}
func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
resp, body, err := cfg.makeRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
resp, body, err := cfg.API.NewRequest("GET", "/components/"+strconv.Itoa(incident.ComponentID), nil)
if err != nil {
return 0, err
}
@@ -82,15 +82,13 @@ func (incident *Incident) GetComponentStatus(cfg *CachetMonitor) (int, error) {
}
var data struct {
Component struct {
Status int `json:"status"`
} `json:"data"`
Status int `json:"status"`
}
if err := json.Unmarshal(body, &data); err != nil {
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body), err)
if err := json.Unmarshal(body.Data, &data); err != nil {
return 0, fmt.Errorf("Cannot parse component body: %v. Err = %v", string(body.Data), err)
}
return data.Component.Status, nil
return data.Status, nil
}
// SetInvestigating sets status to Investigating

View File

@@ -1,41 +1,59 @@
package cachet
import (
"crypto/tls"
"errors"
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/Sirupsen/logrus"
)
const HttpTimeout = time.Duration(time.Second)
const DefaultInterval = 60
const DefaultInterval = time.Second * 60
const DefaultTimeout = time.Second
const DefaultTimeFormat = "15:04:05 Jan 2 MST"
const HistorySize = 10
// Monitor data model
type Monitor struct {
Name string `json:"name"`
URL string `json:"url"`
Method string `json:"method"`
StrictTLS bool `json:"strict_tls"`
CheckInterval time.Duration `json:"interval"`
type MonitorInterface interface {
ClockStart(*CachetMonitor, MonitorInterface, *sync.WaitGroup)
ClockStop()
tick(MonitorInterface)
test() bool
MetricID int `json:"metric_id"`
ComponentID int `json:"component_id"`
Validate() []string
GetMonitor() *AbstractMonitor
Describe() []string
}
// Threshold = percentage
Threshold float32 `json:"threshold"`
ExpectedStatusCode int `json:"expected_status_code"`
// compiled to Regexp
ExpectedBody string `json:"expected_body"`
bodyRegexp *regexp.Regexp
// AbstractMonitor data model
type AbstractMonitor struct {
Name string
Target string
// (default)http, tcp, dns, icmp
Type string
Strict bool
Interval time.Duration
Timeout time.Duration
MetricID int `mapstructure:"metric_id"`
ComponentID int `mapstructure:"component_id"`
// Templating stuff
Template struct {
Investigating MessageTemplate
Fixed MessageTemplate
}
// Threshold = percentage / number of down incidents
Threshold float32
ThresholdCount bool `mapstructure:"threshold_count"`
// lag / average(lagHistory) * 100 = percentage above average lag
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
PerformanceThreshold float32
history []bool
lagHistory []float32
lastFailReason string
incident *Incident
config *CachetMonitor
@@ -44,34 +62,67 @@ type Monitor struct {
stopC chan bool
}
func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
func (mon *AbstractMonitor) Validate() []string {
errs := []string{}
if len(mon.Name) == 0 {
errs = append(errs, "Name is required")
}
if mon.Interval < 1 {
mon.Interval = DefaultInterval
}
if mon.Timeout < 1 {
mon.Timeout = DefaultTimeout
}
if mon.Timeout > mon.Interval {
errs = append(errs, "Timeout greater than interval")
}
if mon.ComponentID == 0 && mon.MetricID == 0 {
errs = append(errs, "component_id & metric_id are unset")
}
if mon.Threshold <= 0 {
mon.Threshold = 100
}
if err := mon.Template.Fixed.Compile(); err != nil {
errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
}
if err := mon.Template.Investigating.Compile(); err != nil {
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
}
return errs
}
func (mon *AbstractMonitor) GetMonitor() *AbstractMonitor {
return mon
}
func (mon *AbstractMonitor) Describe() []string {
features := []string{"Type: " + mon.Type}
if len(mon.Name) > 0 {
features = append(features, "Name: "+mon.Name)
}
return features
}
func (mon *AbstractMonitor) ClockStart(cfg *CachetMonitor, iface MonitorInterface, wg *sync.WaitGroup) {
wg.Add(1)
mon.config = cfg
mon.stopC = make(chan bool)
mon.config.Logger.Printf(" Starting %s: %d seconds check interval\n - %v %s", mon.Name, mon.CheckInterval, mon.Method, mon.URL)
// print features
if mon.ExpectedStatusCode > 0 {
mon.config.Logger.Printf(" - Expect HTTP %d", mon.ExpectedStatusCode)
}
if len(mon.ExpectedBody) > 0 {
mon.config.Logger.Printf(" - Expect Body to match \"%v\"", mon.ExpectedBody)
}
if mon.MetricID > 0 {
mon.config.Logger.Printf(" - Log lag to metric id %d\n", mon.MetricID)
}
if mon.ComponentID > 0 {
mon.config.Logger.Printf(" - Update component id %d\n\n", mon.ComponentID)
if cfg.Immediate {
mon.tick(iface)
}
mon.Tick()
ticker := time.NewTicker(mon.CheckInterval * time.Second)
ticker := time.NewTicker(mon.Interval * time.Second)
for {
select {
case <-ticker.C:
mon.Tick()
mon.tick(iface)
case <-mon.stopC:
wg.Done()
return
@@ -79,174 +130,124 @@ func (mon *Monitor) Start(cfg *CachetMonitor, wg *sync.WaitGroup) {
}
}
func (monitor *Monitor) Stop() {
if monitor.Stopped() {
return
}
close(monitor.stopC)
}
func (monitor *Monitor) Stopped() bool {
func (mon *AbstractMonitor) ClockStop() {
select {
case <-monitor.stopC:
return true
case <-mon.stopC:
return
default:
return false
close(mon.stopC)
}
}
func (monitor *Monitor) Tick() {
func (mon *AbstractMonitor) test() bool { return false }
// TODO: test
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
reqStart := getMs()
isUp := monitor.doRequest()
up := iface.test()
lag := getMs() - reqStart
if len(monitor.history) == 9 {
monitor.config.Logger.Printf("%v is now saturated\n", monitor.Name)
histSize := HistorySize
if mon.ThresholdCount {
histSize = int(mon.Threshold)
}
if len(monitor.history) >= 10 {
monitor.history = monitor.history[len(monitor.history)-9:]
}
monitor.history = append(monitor.history, isUp)
monitor.AnalyseData()
if isUp == true && monitor.MetricID > 0 {
monitor.SendMetric(lag)
if len(mon.history) == histSize-1 {
logrus.Warnf("%v is now saturated\n", mon.Name)
}
if len(mon.history) >= histSize {
mon.history = mon.history[len(mon.history)-(histSize-1):]
}
mon.history = append(mon.history, up)
mon.AnalyseData()
// report lag
if mon.MetricID > 0 {
go mon.config.API.SendMetric(mon.MetricID, lag)
}
}
func (monitor *Monitor) doRequest() bool {
client := &http.Client{
Timeout: HttpTimeout,
}
if monitor.StrictTLS == false {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
resp, err := client.Get(monitor.URL)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
defer resp.Body.Close()
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
return false
}
if monitor.bodyRegexp != nil {
// check body
responseBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
monitor.lastFailReason = err.Error()
return false
}
match := monitor.bodyRegexp.Match(responseBody)
if !match {
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
}
return match
}
return true
}
// TODO: test
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
func (monitor *Monitor) AnalyseData() {
func (mon *AbstractMonitor) AnalyseData() {
// look at the past few incidents
numDown := 0
for _, wasUp := range monitor.history {
for _, wasUp := range mon.history {
if wasUp == false {
numDown++
}
}
t := (float32(numDown) / float32(len(monitor.history))) * 100
monitor.config.Logger.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second))
t := (float32(numDown) / float32(len(mon.history))) * 100
l := logrus.WithFields(logrus.Fields{
"monitor": mon.Name,
"time": time.Now().Format(mon.config.DateFormat),
})
if numDown == 0 {
l.Printf("monitor is up")
} else if mon.ThresholdCount {
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
} else {
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
}
if len(monitor.history) != 10 {
histSize := HistorySize
if mon.ThresholdCount {
histSize = int(mon.Threshold)
}
if len(mon.history) != histSize {
// not saturated
return
}
if t > monitor.Threshold && monitor.incident == nil {
monitor.incident = &Incident{
Name: monitor.Name + " - " + monitor.config.SystemName,
ComponentID: monitor.ComponentID,
Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat),
triggered := (mon.ThresholdCount && numDown == int(mon.Threshold)) || (!mon.ThresholdCount && t > mon.Threshold)
if triggered && mon.incident == nil {
// create incident
tplData := getTemplateData(mon)
tplData["FailReason"] = mon.lastFailReason
subject, message := mon.Template.Investigating.Exec(tplData)
mon.incident = &Incident{
Name: subject,
ComponentID: mon.ComponentID,
Message: message,
Notify: true,
}
if len(monitor.lastFailReason) > 0 {
monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`"
}
// is down, create an incident
monitor.config.Logger.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason)
l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
// set investigating status
monitor.incident.SetInvestigating()
mon.incident.SetInvestigating()
// create/update incident
if err := monitor.incident.Send(monitor.config); err != nil {
monitor.config.Logger.Printf("Error sending incident: %v\n", err)
}
} else if t < monitor.Threshold && monitor.incident != nil {
// was down, created an incident, its now ok, make it resolved.
monitor.config.Logger.Printf("%v resolved downtime incident", monitor.Name)
// resolve incident
monitor.incident.Message = "\n**Resolved** - " + time.Now().Format(DefaultTimeFormat) + "\n\n - - - \n\n" + monitor.incident.Message
monitor.incident.SetFixed()
monitor.incident.Send(monitor.config)
monitor.lastFailReason = ""
monitor.incident = nil
}
}
func (monitor *Monitor) ValidateConfiguration() error {
if len(monitor.ExpectedBody) > 0 {
exp, err := regexp.Compile(monitor.ExpectedBody)
if err != nil {
return err
if err := mon.incident.Send(mon.config); err != nil {
l.Printf("Error sending incident: %v", err)
}
monitor.bodyRegexp = exp
return
}
if len(monitor.ExpectedBody) == 0 && monitor.ExpectedStatusCode == 0 {
return errors.New("Nothing to check, both 'expected_body' and 'expected_status_code' fields empty")
// still triggered or no incident
if triggered || mon.incident == nil {
return
}
if monitor.CheckInterval < 1 {
monitor.CheckInterval = DefaultInterval
// was down, created an incident, its now ok, make it resolved.
l.Warn("Resolving incident")
// resolve incident
tplData := getTemplateData(mon)
tplData["incident"] = mon.incident
subject, message := mon.Template.Fixed.Exec(tplData)
mon.incident.Name = subject
mon.incident.Message = message
mon.incident.SetFixed()
if err := mon.incident.Send(mon.config); err != nil {
l.Printf("Error sending incident: %v", err)
}
monitor.Method = strings.ToUpper(monitor.Method)
switch monitor.Method {
case "GET", "POST", "DELETE", "OPTIONS", "HEAD":
break
case "":
monitor.Method = "GET"
default:
return fmt.Errorf("Unsupported check method: %v", monitor.Method)
}
if monitor.ComponentID == 0 && monitor.MetricID == 0 {
return errors.New("component_id & metric_id are unset")
}
if monitor.Threshold <= 0 {
monitor.Threshold = 100
}
return nil
mon.lastFailReason = ""
mon.incident = nil
}

View File

@@ -4,6 +4,7 @@ Features
--------
- [x] Creates & Resolves Incidents
- [x] Check URLs by response code and/or body contents
- [x] Posts monitor lag to cachet graphs
- [x] Updates Component to Partial Outage
- [x] Updates Component to Major Outage if already in Partial Outage (works well with distributed monitoring)
@@ -27,10 +28,17 @@ Configuration
"url": "Ping URL",
// optional, http method (defaults GET)
"method": "get",
// optional, http Headers to add (default none)
"headers": [
// specify Name and Value of Http-Header, eg. Authorization
{ "name": "Authorization", "value": "Basic <hash>" }
],
// self-signed ssl certificate
"strict_tls": true,
// seconds between checks
"interval": 10,
// seconds for http timeout
"timeout": 5,
// post lag to cachet metric (graph)
// note either metric ID or component ID are required
"metric_id": <metric id>,
@@ -89,3 +97,27 @@ Package usage
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
## License
MIT License
Copyright (c) 2016 Castaway Labs LLC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

15
tcp.go Normal file
View File

@@ -0,0 +1,15 @@
package cachet
type TCPMonitor struct {
AbstractMonitor `mapstructure:",squash"`
// same as output from net.JoinHostPort
// defaults to parsed config from /etc/resolv.conf when empty
DNSServer string
// Will be converted to FQDN
Domain string
Type string
// expected answers (regex)
Expect []string
}

53
template.go Normal file
View File

@@ -0,0 +1,53 @@
package cachet
import (
"bytes"
"text/template"
)
type MessageTemplate struct {
Subject string `json:"subject"`
Message string `json:"message"`
subjectTpl *template.Template
messageTpl *template.Template
}
func (t *MessageTemplate) SetDefault(d MessageTemplate) {
if len(t.Subject) == 0 {
t.Subject = d.Subject
}
if len(t.Message) == 0 {
t.Message = d.Message
}
}
// TODO: test
func (t *MessageTemplate) Compile() error {
var err error
if len(t.Subject) > 0 {
t.subjectTpl, err = compileTemplate(t.Subject)
}
if err == nil && len(t.Message) > 0 {
t.messageTpl, err = compileTemplate(t.Message)
}
return err
}
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
return t.exec(t.subjectTpl, data), t.exec(t.messageTpl, data)
}
func (t *MessageTemplate) exec(tpl *template.Template, data interface{}) string {
buf := new(bytes.Buffer)
tpl.Execute(buf, data)
return buf.String()
}
func compileTemplate(text string) (*template.Template, error) {
return template.New("").Parse(text)
}