compile message for Fixed status
- better logging
This commit is contained in:
2
api.go
2
api.go
@@ -22,6 +22,7 @@ type CachetResponse struct {
|
|||||||
Data json.RawMessage `json:"data"`
|
Data json.RawMessage `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
func (api CachetAPI) Ping() error {
|
func (api CachetAPI) Ping() error {
|
||||||
resp, _, err := api.NewRequest("GET", "/ping", nil)
|
resp, _, err := api.NewRequest("GET", "/ping", nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -50,6 +51,7 @@ func (api CachetAPI) SendMetric(id int, lag int64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
// NewRequest wraps http.NewRequest
|
// NewRequest wraps http.NewRequest
|
||||||
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
|
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
|
||||||
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
|
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ func GetMonitorType(t string) string {
|
|||||||
return "http"
|
return "http"
|
||||||
}
|
}
|
||||||
|
|
||||||
return t
|
return strings.ToLower(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
|
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
|
||||||
|
|||||||
15
config_test.go
Normal file
15
config_test.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package cachet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetMonitorType(t *testing.T) {
|
||||||
|
if monType := GetMonitorType(""); monType != "http" {
|
||||||
|
t.Error("monitor type `` should default to http")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mt := GetMonitorType("HTTP"); mt != "http" {
|
||||||
|
t.Error("does not return correct monitor type")
|
||||||
|
}
|
||||||
|
}
|
||||||
19
http.go
19
http.go
@@ -8,14 +8,12 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Investigating template
|
// Investigating template
|
||||||
var defaultHTTPInvestigatingTpl = MessageTemplate{
|
var defaultHTTPInvestigatingTpl = MessageTemplate{
|
||||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||||
Message: `{{ .Monitor.Name }} check **failed** - {{ .now }}
|
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
|
||||||
|
|
||||||
{{ .FailReason }}`,
|
{{ .FailReason }}`,
|
||||||
}
|
}
|
||||||
@@ -42,6 +40,7 @@ type HTTPMonitor struct {
|
|||||||
bodyRegexp *regexp.Regexp
|
bodyRegexp *regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
func (monitor *HTTPMonitor) test() bool {
|
func (monitor *HTTPMonitor) test() bool {
|
||||||
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
|
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
|
||||||
for k, v := range monitor.Headers {
|
for k, v := range monitor.Headers {
|
||||||
@@ -64,35 +63,33 @@ func (monitor *HTTPMonitor) test() bool {
|
|||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
||||||
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
|
monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if monitor.bodyRegexp != nil {
|
if monitor.bodyRegexp != nil {
|
||||||
// check body
|
// check response body
|
||||||
responseBody, err := ioutil.ReadAll(resp.Body)
|
responseBody, err := ioutil.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
monitor.lastFailReason = err.Error()
|
monitor.lastFailReason = err.Error()
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
match := monitor.bodyRegexp.Match(responseBody)
|
if !monitor.bodyRegexp.Match(responseBody) {
|
||||||
if !match {
|
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
|
||||||
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
return match
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
func (mon *HTTPMonitor) Validate() []string {
|
func (mon *HTTPMonitor) Validate() []string {
|
||||||
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
|
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
|
||||||
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
|
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
|
||||||
|
|
||||||
errs := mon.AbstractMonitor.Validate()
|
errs := mon.AbstractMonitor.Validate()
|
||||||
logrus.Warnf("%#v", mon.Template.Investigating)
|
|
||||||
|
|
||||||
if len(mon.ExpectedBody) > 0 {
|
if len(mon.ExpectedBody) > 0 {
|
||||||
exp, err := regexp.Compile(mon.ExpectedBody)
|
exp, err := regexp.Compile(mon.ExpectedBody)
|
||||||
|
|||||||
38
monitor.go
38
monitor.go
@@ -48,7 +48,12 @@ type AbstractMonitor struct {
|
|||||||
Threshold float32
|
Threshold float32
|
||||||
ThresholdCount bool `mapstructure:"threshold_count"`
|
ThresholdCount bool `mapstructure:"threshold_count"`
|
||||||
|
|
||||||
|
// lag / average(lagHistory) * 100 = percentage above average lag
|
||||||
|
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
|
||||||
|
PerformanceThreshold float32
|
||||||
|
|
||||||
history []bool
|
history []bool
|
||||||
|
lagHistory []float32
|
||||||
lastFailReason string
|
lastFailReason string
|
||||||
incident *Incident
|
incident *Incident
|
||||||
config *CachetMonitor
|
config *CachetMonitor
|
||||||
@@ -136,6 +141,7 @@ func (mon *AbstractMonitor) ClockStop() {
|
|||||||
|
|
||||||
func (mon *AbstractMonitor) test() bool { return false }
|
func (mon *AbstractMonitor) test() bool { return false }
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
||||||
reqStart := getMs()
|
reqStart := getMs()
|
||||||
up := iface.test()
|
up := iface.test()
|
||||||
@@ -161,6 +167,7 @@ func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
||||||
func (mon *AbstractMonitor) AnalyseData() {
|
func (mon *AbstractMonitor) AnalyseData() {
|
||||||
// look at the past few incidents
|
// look at the past few incidents
|
||||||
@@ -172,10 +179,16 @@ func (mon *AbstractMonitor) AnalyseData() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t := (float32(numDown) / float32(len(mon.history))) * 100
|
t := (float32(numDown) / float32(len(mon.history))) * 100
|
||||||
if mon.ThresholdCount {
|
l := logrus.WithFields(logrus.Fields{
|
||||||
logrus.Printf("%s %d/%d down at %v", mon.Name, numDown, int(mon.Threshold), time.Now().Format(mon.config.DateFormat))
|
"monitor": mon.Name,
|
||||||
|
"time": time.Now().Format(mon.config.DateFormat),
|
||||||
|
})
|
||||||
|
if numDown == 0 {
|
||||||
|
l.Printf("monitor is up")
|
||||||
|
} else if mon.ThresholdCount {
|
||||||
|
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
|
||||||
} else {
|
} else {
|
||||||
logrus.Printf("%s %.2f%%/%.2f%% down at %v", mon.Name, t, mon.Threshold, time.Now().Format(mon.config.DateFormat))
|
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
|
||||||
}
|
}
|
||||||
|
|
||||||
histSize := HistorySize
|
histSize := HistorySize
|
||||||
@@ -204,12 +217,12 @@ func (mon *AbstractMonitor) AnalyseData() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// is down, create an incident
|
// is down, create an incident
|
||||||
logrus.Warnf("%v: creating incident. Monitor is down: %v", mon.Name, mon.lastFailReason)
|
l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
|
||||||
// set investigating status
|
// set investigating status
|
||||||
mon.incident.SetInvestigating()
|
mon.incident.SetInvestigating()
|
||||||
// create/update incident
|
// create/update incident
|
||||||
if err := mon.incident.Send(mon.config); err != nil {
|
if err := mon.incident.Send(mon.config); err != nil {
|
||||||
logrus.Printf("Error sending incident: %v\n", err)
|
l.Printf("Error sending incident: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
@@ -220,15 +233,20 @@ func (mon *AbstractMonitor) AnalyseData() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
logrus.Warnf("Resolving incident")
|
|
||||||
|
|
||||||
// was down, created an incident, its now ok, make it resolved.
|
// was down, created an incident, its now ok, make it resolved.
|
||||||
logrus.Printf("%v resolved downtime incident", mon.Name)
|
l.Warn("Resolving incident")
|
||||||
|
|
||||||
// resolve incident
|
// resolve incident
|
||||||
mon.incident.Message = "\n**Resolved** - " + time.Now().Format(mon.config.DateFormat) + "\n\n - - - \n\n" + mon.incident.Message
|
tplData := getTemplateData(mon)
|
||||||
|
tplData["incident"] = mon.incident
|
||||||
|
|
||||||
|
subject, message := mon.Template.Fixed.Exec(tplData)
|
||||||
|
mon.incident.Name = subject
|
||||||
|
mon.incident.Message = message
|
||||||
mon.incident.SetFixed()
|
mon.incident.SetFixed()
|
||||||
mon.incident.Send(mon.config)
|
if err := mon.incident.Send(mon.config); err != nil {
|
||||||
|
l.Printf("Error sending incident: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
mon.lastFailReason = ""
|
mon.lastFailReason = ""
|
||||||
mon.incident = nil
|
mon.incident = nil
|
||||||
|
|||||||
24
readme.md
24
readme.md
@@ -97,3 +97,27 @@ Package usage
|
|||||||
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
|
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
|
||||||
|
|
||||||
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
|
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2016 Castaway Labs LLC
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -22,6 +22,7 @@ func (t *MessageTemplate) SetDefault(d MessageTemplate) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: test
|
||||||
func (t *MessageTemplate) Compile() error {
|
func (t *MessageTemplate) Compile() error {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
@@ -29,7 +30,7 @@ func (t *MessageTemplate) Compile() error {
|
|||||||
t.subjectTpl, err = compileTemplate(t.Subject)
|
t.subjectTpl, err = compileTemplate(t.Subject)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil && len(t.Message) > 0 {
|
if err == nil && len(t.Message) > 0 {
|
||||||
t.messageTpl, err = compileTemplate(t.Message)
|
t.messageTpl, err = compileTemplate(t.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user