huge refactor
- extendable backends - better project structure - better cli interface
This commit is contained in:
143
monitors/dns.go
Normal file
143
monitors/dns.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package monitors
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Investigating template
|
||||
var defaultDNSInvestigatingTpl = MessageTemplate{
|
||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||
Message: `{{ .Monitor.Name }} DNS check **failed** (server time: {{ .now }})
|
||||
|
||||
{{ .FailReason }}`,
|
||||
}
|
||||
|
||||
// Fixed template
|
||||
var defaultDNSFixedTpl = MessageTemplate{
|
||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||
Message: `**Resolved** - {{ .now }}
|
||||
|
||||
- - -
|
||||
|
||||
{{ .incident.Message }}`,
|
||||
}
|
||||
|
||||
type DNSAnswer struct {
|
||||
Regex string
|
||||
regexp *regexp.Regexp
|
||||
Exact string
|
||||
}
|
||||
|
||||
type DNSMonitor struct {
|
||||
AbstractMonitor `mapstructure:",squash"`
|
||||
|
||||
// IP:port format or blank to use system defined DNS
|
||||
DNS string
|
||||
|
||||
// A(default), AAAA, MX, ...
|
||||
Question string
|
||||
question uint16
|
||||
|
||||
Answers []DNSAnswer
|
||||
}
|
||||
|
||||
func (monitor *DNSMonitor) Validate(validate backendValidateFunc) []string {
|
||||
monitor.Template.Investigating.SetDefault(defaultDNSInvestigatingTpl)
|
||||
monitor.Template.Fixed.SetDefault(defaultDNSFixedTpl)
|
||||
|
||||
errs := monitor.AbstractMonitor.Validate(validate)
|
||||
|
||||
if len(monitor.DNS) == 0 {
|
||||
config, _ := dns.ClientConfigFromFile("/etc/resolv.conf")
|
||||
if len(config.Servers) > 0 {
|
||||
monitor.DNS = net.JoinHostPort(config.Servers[0], config.Port)
|
||||
}
|
||||
}
|
||||
|
||||
if len(monitor.DNS) == 0 {
|
||||
monitor.DNS = "8.8.8.8:53"
|
||||
}
|
||||
|
||||
if len(monitor.Question) == 0 {
|
||||
monitor.Question = "A"
|
||||
}
|
||||
monitor.Question = strings.ToUpper(monitor.Question)
|
||||
|
||||
monitor.question = findDNSType(monitor.Question)
|
||||
if monitor.question == 0 {
|
||||
errs = append(errs, "Could not look up DNS question type")
|
||||
}
|
||||
|
||||
for i, a := range monitor.Answers {
|
||||
if len(a.Regex) > 0 {
|
||||
monitor.Answers[i].regexp, _ = regexp.Compile(a.Regex)
|
||||
}
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
func (monitor *DNSMonitor) test() (bool, []error) {
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion(dns.Fqdn(monitor.Target), monitor.question)
|
||||
m.RecursionDesired = true
|
||||
|
||||
c := new(dns.Client)
|
||||
r, _, err := c.Exchange(m, monitor.DNS)
|
||||
if err != nil {
|
||||
logrus.Warnf("DNS error: %v", err)
|
||||
return false, []error{err}
|
||||
}
|
||||
|
||||
if r.Rcode != dns.RcodeSuccess {
|
||||
return false, []error{errors.New("Invalid status code returned")}
|
||||
}
|
||||
|
||||
for _, check := range monitor.Answers {
|
||||
found := false
|
||||
for _, answer := range r.Answer {
|
||||
found = matchAnswer(answer, check)
|
||||
if found {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
logrus.Warnf("DNS check failed: %v. Not found in any of %v", check, r.Answer)
|
||||
return false, []error{errors.New("Record not found")}
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func findDNSType(t string) uint16 {
|
||||
for rr, strType := range dns.TypeToString {
|
||||
if t == strType {
|
||||
return rr
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func matchAnswer(answer dns.RR, check DNSAnswer) bool {
|
||||
fields := []string{}
|
||||
for i := 0; i < dns.NumField(answer); i++ {
|
||||
fields = append(fields, dns.Field(answer, i+1))
|
||||
}
|
||||
|
||||
str := strings.Join(fields, " ")
|
||||
|
||||
if check.regexp != nil {
|
||||
return check.regexp.Match([]byte(str))
|
||||
}
|
||||
|
||||
return str == check.Exact
|
||||
}
|
||||
124
monitors/http.go
Normal file
124
monitors/http.go
Normal file
@@ -0,0 +1,124 @@
|
||||
package monitors
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Investigating template
|
||||
var defaultHTTPInvestigatingTpl = MessageTemplate{
|
||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||
Message: `{{ .Monitor.Name }} HTTP check **failed** (server time: {{ .now }})
|
||||
|
||||
{{ .FailReason }}`,
|
||||
}
|
||||
|
||||
// Fixed template
|
||||
var defaultHTTPFixedTpl = MessageTemplate{
|
||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||
Message: `**Resolved** - {{ .now }}
|
||||
|
||||
- - -
|
||||
|
||||
{{ .incident.Message }}`,
|
||||
}
|
||||
|
||||
type HTTPMonitor struct {
|
||||
AbstractMonitor `mapstructure:",squash"`
|
||||
|
||||
Method string
|
||||
ExpectedStatusCode int `mapstructure:"expected_status_code"`
|
||||
Headers map[string]string
|
||||
|
||||
// compiled to Regexp
|
||||
ExpectedBody string `mapstructure:"expected_body"`
|
||||
bodyRegexp *regexp.Regexp
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (monitor *HTTPMonitor) test() (bool, []error) {
|
||||
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
|
||||
for k, v := range monitor.Headers {
|
||||
req.Header.Add(k, v)
|
||||
}
|
||||
|
||||
transport := http.DefaultTransport.(*http.Transport)
|
||||
transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: monitor.Strict == false}
|
||||
client := &http.Client{
|
||||
Timeout: time.Duration(monitor.Timeout * time.Second),
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false, []error{err}
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
||||
fail := "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
|
||||
return false, []error{errors.New(fail)}
|
||||
}
|
||||
|
||||
if monitor.bodyRegexp != nil {
|
||||
// check response body
|
||||
responseBody, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false, []error{err}
|
||||
}
|
||||
|
||||
if !monitor.bodyRegexp.Match(responseBody) {
|
||||
fail := "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
|
||||
return false, []error{errors.New(fail)}
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (mon *HTTPMonitor) Validate(validate backendValidateFunc) []string {
|
||||
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
|
||||
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
|
||||
|
||||
errs := mon.AbstractMonitor.Validate(validate)
|
||||
|
||||
if len(mon.ExpectedBody) > 0 {
|
||||
exp, err := regexp.Compile(mon.ExpectedBody)
|
||||
if err != nil {
|
||||
errs = append(errs, "Regexp compilation failure: "+err.Error())
|
||||
} else {
|
||||
mon.bodyRegexp = exp
|
||||
}
|
||||
}
|
||||
|
||||
if len(mon.ExpectedBody) == 0 && mon.ExpectedStatusCode == 0 {
|
||||
errs = append(errs, "Both 'expected_body' and 'expected_status_code' fields empty")
|
||||
}
|
||||
|
||||
mon.Method = strings.ToUpper(mon.Method)
|
||||
switch mon.Method {
|
||||
case "GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD":
|
||||
break
|
||||
case "":
|
||||
mon.Method = "GET"
|
||||
default:
|
||||
errs = append(errs, "Unsupported HTTP method: "+mon.Method)
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
func (mon *HTTPMonitor) Describe() []string {
|
||||
features := mon.AbstractMonitor.Describe()
|
||||
features = append(features, "Method: "+mon.Method)
|
||||
|
||||
return features
|
||||
}
|
||||
257
monitors/monitor.go
Normal file
257
monitors/monitor.go
Normal file
@@ -0,0 +1,257 @@
|
||||
package monitors
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const DefaultInterval = time.Second * 60
|
||||
const DefaultTimeout = time.Second
|
||||
const HistorySize = 10
|
||||
|
||||
type MonitorStatus string
|
||||
|
||||
const (
|
||||
MonitorStatusUp = "up"
|
||||
MonitorStatusDown = "down"
|
||||
MonitorStatusNotSaturated = "unsaturated"
|
||||
)
|
||||
|
||||
type backendValidateFunc = func(monitor *AbstractMonitor) []string
|
||||
type MonitorTestFunc func() (up bool, errs []error)
|
||||
type MonitorTickFunc func(monitor MonitorInterface, status MonitorStatus, errs []error, lag int64)
|
||||
|
||||
type MonitorInterface interface {
|
||||
Start(MonitorTestFunc, *sync.WaitGroup, MonitorTickFunc, bool)
|
||||
Stop()
|
||||
|
||||
tick(MonitorTestFunc) (status MonitorStatus, errors []error, lag int64)
|
||||
test() (bool, []error)
|
||||
|
||||
Validate(validate backendValidateFunc) []string
|
||||
Describe() []string
|
||||
|
||||
GetMonitor() *AbstractMonitor
|
||||
GetTestFunc() MonitorTestFunc
|
||||
GetLastStatus() MonitorStatus
|
||||
UpdateLastStatus(status MonitorStatus) (old MonitorStatus)
|
||||
}
|
||||
|
||||
// AbstractMonitor data model
|
||||
type AbstractMonitor struct {
|
||||
Name string
|
||||
Target string
|
||||
|
||||
// (default)http / dns
|
||||
Type string
|
||||
Strict bool
|
||||
|
||||
Interval time.Duration
|
||||
Timeout time.Duration
|
||||
Params map[string]interface{}
|
||||
|
||||
// Templating stuff
|
||||
Template MonitorTemplates
|
||||
|
||||
// Threshold = percentage / number of down incidents
|
||||
Threshold float32
|
||||
ThresholdCount bool `mapstructure:"threshold_count"`
|
||||
|
||||
// lag / average(lagHistory) * 100 = percentage above average lag
|
||||
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
|
||||
// PerformanceThreshold float32
|
||||
|
||||
history []bool
|
||||
lastStatus MonitorStatus
|
||||
|
||||
// Closed when mon.Stop() is called
|
||||
stopC chan bool
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) Validate(validate backendValidateFunc) []string {
|
||||
errs := []string{}
|
||||
|
||||
if len(mon.Name) == 0 {
|
||||
errs = append(errs, "Name is required")
|
||||
}
|
||||
|
||||
if mon.Interval < 1 {
|
||||
mon.Interval = DefaultInterval
|
||||
}
|
||||
if mon.Timeout < 1 {
|
||||
mon.Timeout = DefaultTimeout
|
||||
}
|
||||
|
||||
if mon.Timeout > mon.Interval {
|
||||
errs = append(errs, "Timeout greater than interval")
|
||||
}
|
||||
|
||||
// get the backend to validate the monitor
|
||||
errs = append(errs, validate(mon)...)
|
||||
|
||||
if mon.Threshold <= 0 {
|
||||
mon.Threshold = 100
|
||||
}
|
||||
|
||||
// if len(mon.Template.Fixed.Message) == 0 || len(mon.Template.Fixed.Subject) == 0 {
|
||||
// errs = append(errs, "\"fixed\" template empty/missing")
|
||||
// }
|
||||
// if len(mon.Template.Investigating.Message) == 0 || len(mon.Template.Investigating.Subject) == 0 {
|
||||
// errs = append(errs, "\"investigating\" template empty/missing")
|
||||
// }
|
||||
if err := mon.Template.Fixed.Compile(); err != nil {
|
||||
errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
|
||||
}
|
||||
if err := mon.Template.Investigating.Compile(); err != nil {
|
||||
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) GetMonitor() *AbstractMonitor {
|
||||
return mon
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) Describe() []string {
|
||||
features := []string{"Type: " + mon.Type}
|
||||
|
||||
if len(mon.Name) > 0 {
|
||||
features = append(features, "Name: "+mon.Name)
|
||||
}
|
||||
|
||||
return features
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) Start(testFunc MonitorTestFunc, wg *sync.WaitGroup, tickFunc MonitorTickFunc, immediate bool) {
|
||||
wg.Add(1)
|
||||
|
||||
mon.stopC = make(chan bool)
|
||||
if immediate {
|
||||
status, errs, lag := mon.tick(testFunc)
|
||||
tickFunc(mon, status, errs, lag)
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(mon.Interval * time.Second)
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
status, errs, lag := mon.tick(testFunc)
|
||||
tickFunc(mon, status, errs, lag)
|
||||
case <-mon.stopC:
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) Stop() {
|
||||
select {
|
||||
case <-mon.stopC:
|
||||
return
|
||||
default:
|
||||
close(mon.stopC)
|
||||
}
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) tick(testFunc MonitorTestFunc) (status MonitorStatus, errors []error, lag int64) {
|
||||
reqStart := getMs()
|
||||
up, errs := testFunc()
|
||||
lag = getMs() - reqStart
|
||||
|
||||
histSize := HistorySize
|
||||
if mon.ThresholdCount {
|
||||
histSize = int(mon.Threshold)
|
||||
}
|
||||
|
||||
if len(mon.history) == histSize-1 {
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"monitor": mon.Name,
|
||||
}).Warn("monitor saturated")
|
||||
}
|
||||
if len(mon.history) >= histSize {
|
||||
mon.history = mon.history[len(mon.history)-(histSize-1):]
|
||||
}
|
||||
mon.history = append(mon.history, up)
|
||||
status = mon.GetStatus()
|
||||
errors = errs
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
||||
func (mon *AbstractMonitor) GetStatus() MonitorStatus {
|
||||
numDown := 0
|
||||
for _, wasUp := range mon.history {
|
||||
if wasUp == false {
|
||||
numDown++
|
||||
}
|
||||
}
|
||||
|
||||
t := (float32(numDown) / float32(len(mon.history))) * 100
|
||||
logFields := logrus.Fields{"monitor": mon.Name}
|
||||
// stop reporting time for jsonformatter, it's there by default
|
||||
if _, ok := logrus.StandardLogger().Formatter.(*logrus.JSONFormatter); !ok {
|
||||
logFields["t"] = time.Now()
|
||||
}
|
||||
l := logrus.WithFields(logFields)
|
||||
|
||||
symbol := "⚠️"
|
||||
if t == 100 {
|
||||
symbol = "❌"
|
||||
}
|
||||
if numDown == 0 {
|
||||
l.Printf("👍 up")
|
||||
} else if mon.ThresholdCount {
|
||||
l.Printf("%v down (%d/%d)", symbol, numDown, int(mon.Threshold))
|
||||
} else {
|
||||
l.Printf("%v down %.0f%%/%.0f%%", symbol, t, mon.Threshold)
|
||||
}
|
||||
|
||||
histSize := HistorySize
|
||||
if mon.ThresholdCount {
|
||||
histSize = int(mon.Threshold)
|
||||
}
|
||||
|
||||
if len(mon.history) != histSize {
|
||||
// not saturated
|
||||
return MonitorStatusNotSaturated
|
||||
}
|
||||
|
||||
var down bool
|
||||
if mon.ThresholdCount {
|
||||
down = numDown >= int(mon.Threshold)
|
||||
} else {
|
||||
down = t >= mon.Threshold
|
||||
}
|
||||
|
||||
if !down {
|
||||
return MonitorStatusUp
|
||||
}
|
||||
|
||||
return MonitorStatusDown
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) GetTestFunc() MonitorTestFunc {
|
||||
return mon.test
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) GetLastStatus() MonitorStatus {
|
||||
return mon.lastStatus
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) UpdateLastStatus(status MonitorStatus) (old MonitorStatus) {
|
||||
old = mon.lastStatus
|
||||
mon.lastStatus = status
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (mon *AbstractMonitor) test() (bool, []error) { return false, nil }
|
||||
|
||||
func getMs() int64 {
|
||||
return time.Now().UnixNano() / int64(time.Millisecond)
|
||||
}
|
||||
58
monitors/template.go
Normal file
58
monitors/template.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package monitors
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
type MonitorTemplates struct {
|
||||
Investigating MessageTemplate
|
||||
Fixed MessageTemplate
|
||||
}
|
||||
|
||||
type MessageTemplate struct {
|
||||
Subject string `json:"subject"`
|
||||
Message string `json:"message"`
|
||||
|
||||
subjectTpl *template.Template
|
||||
messageTpl *template.Template
|
||||
}
|
||||
|
||||
func (t *MessageTemplate) SetDefault(d MessageTemplate) {
|
||||
if len(t.Subject) == 0 {
|
||||
t.Subject = d.Subject
|
||||
}
|
||||
if len(t.Message) == 0 {
|
||||
t.Message = d.Message
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (t *MessageTemplate) Compile() error {
|
||||
var err error
|
||||
|
||||
if len(t.Subject) > 0 {
|
||||
t.subjectTpl, err = compileTemplate(t.Subject)
|
||||
}
|
||||
|
||||
if err == nil && len(t.Message) > 0 {
|
||||
t.messageTpl, err = compileTemplate(t.Message)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
|
||||
return t.exec(t.subjectTpl, data), t.exec(t.messageTpl, data)
|
||||
}
|
||||
|
||||
func (t *MessageTemplate) exec(tpl *template.Template, data interface{}) string {
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
tpl.Execute(buf, data)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func compileTemplate(text string) (*template.Template, error) {
|
||||
return template.New("").Parse(text)
|
||||
}
|
||||
Reference in New Issue
Block a user