compile message for Fixed status
- better logging
This commit is contained in:
2
api.go
2
api.go
@@ -22,6 +22,7 @@ type CachetResponse struct {
|
||||
Data json.RawMessage `json:"data"`
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (api CachetAPI) Ping() error {
|
||||
resp, _, err := api.NewRequest("GET", "/ping", nil)
|
||||
if err != nil {
|
||||
@@ -50,6 +51,7 @@ func (api CachetAPI) SendMetric(id int, lag int64) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
// NewRequest wraps http.NewRequest
|
||||
func (api CachetAPI) NewRequest(requestType, url string, reqBody []byte) (*http.Response, CachetResponse, error) {
|
||||
req, err := http.NewRequest(requestType, api.URL+url, bytes.NewBuffer(reqBody))
|
||||
|
||||
@@ -76,7 +76,7 @@ func GetMonitorType(t string) string {
|
||||
return "http"
|
||||
}
|
||||
|
||||
return t
|
||||
return strings.ToLower(t)
|
||||
}
|
||||
|
||||
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
|
||||
|
||||
15
config_test.go
Normal file
15
config_test.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package cachet
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetMonitorType(t *testing.T) {
|
||||
if monType := GetMonitorType(""); monType != "http" {
|
||||
t.Error("monitor type `` should default to http")
|
||||
}
|
||||
|
||||
if mt := GetMonitorType("HTTP"); mt != "http" {
|
||||
t.Error("does not return correct monitor type")
|
||||
}
|
||||
}
|
||||
19
http.go
19
http.go
@@ -8,14 +8,12 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Investigating template
|
||||
var defaultHTTPInvestigatingTpl = MessageTemplate{
|
||||
Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
|
||||
Message: `{{ .Monitor.Name }} check **failed** - {{ .now }}
|
||||
Message: `{{ .Monitor.Name }} check **failed** (server time: {{ .now }})
|
||||
|
||||
{{ .FailReason }}`,
|
||||
}
|
||||
@@ -42,6 +40,7 @@ type HTTPMonitor struct {
|
||||
bodyRegexp *regexp.Regexp
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (monitor *HTTPMonitor) test() bool {
|
||||
req, err := http.NewRequest(monitor.Method, monitor.Target, nil)
|
||||
for k, v := range monitor.Headers {
|
||||
@@ -64,35 +63,33 @@ func (monitor *HTTPMonitor) test() bool {
|
||||
defer resp.Body.Close()
|
||||
|
||||
if monitor.ExpectedStatusCode > 0 && resp.StatusCode != monitor.ExpectedStatusCode {
|
||||
monitor.lastFailReason = "Unexpected response code: " + strconv.Itoa(resp.StatusCode) + ". Expected " + strconv.Itoa(monitor.ExpectedStatusCode)
|
||||
monitor.lastFailReason = "Expected HTTP response status: " + strconv.Itoa(monitor.ExpectedStatusCode) + ", got: " + strconv.Itoa(resp.StatusCode)
|
||||
return false
|
||||
}
|
||||
|
||||
if monitor.bodyRegexp != nil {
|
||||
// check body
|
||||
// check response body
|
||||
responseBody, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
monitor.lastFailReason = err.Error()
|
||||
return false
|
||||
}
|
||||
|
||||
match := monitor.bodyRegexp.Match(responseBody)
|
||||
if !match {
|
||||
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ". Expected to match " + monitor.ExpectedBody
|
||||
if !monitor.bodyRegexp.Match(responseBody) {
|
||||
monitor.lastFailReason = "Unexpected body: " + string(responseBody) + ".\nExpected to match: " + monitor.ExpectedBody
|
||||
return false
|
||||
}
|
||||
|
||||
return match
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (mon *HTTPMonitor) Validate() []string {
|
||||
mon.Template.Investigating.SetDefault(defaultHTTPInvestigatingTpl)
|
||||
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
|
||||
|
||||
errs := mon.AbstractMonitor.Validate()
|
||||
logrus.Warnf("%#v", mon.Template.Investigating)
|
||||
|
||||
if len(mon.ExpectedBody) > 0 {
|
||||
exp, err := regexp.Compile(mon.ExpectedBody)
|
||||
|
||||
38
monitor.go
38
monitor.go
@@ -48,7 +48,12 @@ type AbstractMonitor struct {
|
||||
Threshold float32
|
||||
ThresholdCount bool `mapstructure:"threshold_count"`
|
||||
|
||||
// lag / average(lagHistory) * 100 = percentage above average lag
|
||||
// PerformanceThreshold sets the % limit above which this monitor will trigger degraded-performance
|
||||
PerformanceThreshold float32
|
||||
|
||||
history []bool
|
||||
lagHistory []float32
|
||||
lastFailReason string
|
||||
incident *Incident
|
||||
config *CachetMonitor
|
||||
@@ -136,6 +141,7 @@ func (mon *AbstractMonitor) ClockStop() {
|
||||
|
||||
func (mon *AbstractMonitor) test() bool { return false }
|
||||
|
||||
// TODO: test
|
||||
func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
||||
reqStart := getMs()
|
||||
up := iface.test()
|
||||
@@ -161,6 +167,7 @@ func (mon *AbstractMonitor) tick(iface MonitorInterface) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
// AnalyseData decides if the monitor is statistically up or down and creates / resolves an incident
|
||||
func (mon *AbstractMonitor) AnalyseData() {
|
||||
// look at the past few incidents
|
||||
@@ -172,10 +179,16 @@ func (mon *AbstractMonitor) AnalyseData() {
|
||||
}
|
||||
|
||||
t := (float32(numDown) / float32(len(mon.history))) * 100
|
||||
if mon.ThresholdCount {
|
||||
logrus.Printf("%s %d/%d down at %v", mon.Name, numDown, int(mon.Threshold), time.Now().Format(mon.config.DateFormat))
|
||||
l := logrus.WithFields(logrus.Fields{
|
||||
"monitor": mon.Name,
|
||||
"time": time.Now().Format(mon.config.DateFormat),
|
||||
})
|
||||
if numDown == 0 {
|
||||
l.Printf("monitor is up")
|
||||
} else if mon.ThresholdCount {
|
||||
l.Printf("monitor down %d/%d", numDown, int(mon.Threshold))
|
||||
} else {
|
||||
logrus.Printf("%s %.2f%%/%.2f%% down at %v", mon.Name, t, mon.Threshold, time.Now().Format(mon.config.DateFormat))
|
||||
l.Printf("monitor down %.2f%%/%.2f%%", t, mon.Threshold)
|
||||
}
|
||||
|
||||
histSize := HistorySize
|
||||
@@ -204,12 +217,12 @@ func (mon *AbstractMonitor) AnalyseData() {
|
||||
}
|
||||
|
||||
// is down, create an incident
|
||||
logrus.Warnf("%v: creating incident. Monitor is down: %v", mon.Name, mon.lastFailReason)
|
||||
l.Warnf("creating incident. Monitor is down: %v", mon.lastFailReason)
|
||||
// set investigating status
|
||||
mon.incident.SetInvestigating()
|
||||
// create/update incident
|
||||
if err := mon.incident.Send(mon.config); err != nil {
|
||||
logrus.Printf("Error sending incident: %v\n", err)
|
||||
l.Printf("Error sending incident: %v", err)
|
||||
}
|
||||
|
||||
return
|
||||
@@ -220,15 +233,20 @@ func (mon *AbstractMonitor) AnalyseData() {
|
||||
return
|
||||
}
|
||||
|
||||
logrus.Warnf("Resolving incident")
|
||||
|
||||
// was down, created an incident, its now ok, make it resolved.
|
||||
logrus.Printf("%v resolved downtime incident", mon.Name)
|
||||
l.Warn("Resolving incident")
|
||||
|
||||
// resolve incident
|
||||
mon.incident.Message = "\n**Resolved** - " + time.Now().Format(mon.config.DateFormat) + "\n\n - - - \n\n" + mon.incident.Message
|
||||
tplData := getTemplateData(mon)
|
||||
tplData["incident"] = mon.incident
|
||||
|
||||
subject, message := mon.Template.Fixed.Exec(tplData)
|
||||
mon.incident.Name = subject
|
||||
mon.incident.Message = message
|
||||
mon.incident.SetFixed()
|
||||
mon.incident.Send(mon.config)
|
||||
if err := mon.incident.Send(mon.config); err != nil {
|
||||
l.Printf("Error sending incident: %v", err)
|
||||
}
|
||||
|
||||
mon.lastFailReason = ""
|
||||
mon.incident = nil
|
||||
|
||||
24
readme.md
24
readme.md
@@ -97,3 +97,27 @@ Package usage
|
||||
When using `cachet-monitor` as a package in another program, you should follow what `cli/main.go` does. It is important to call `ValidateConfiguration` on `CachetMonitor` and all the monitors inside.
|
||||
|
||||
[API Documentation](https://godoc.org/github.com/CastawayLabs/cachet-monitor)
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2016 Castaway Labs LLC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -22,6 +22,7 @@ func (t *MessageTemplate) SetDefault(d MessageTemplate) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: test
|
||||
func (t *MessageTemplate) Compile() error {
|
||||
var err error
|
||||
|
||||
@@ -29,7 +30,7 @@ func (t *MessageTemplate) Compile() error {
|
||||
t.subjectTpl, err = compileTemplate(t.Subject)
|
||||
}
|
||||
|
||||
if err != nil && len(t.Message) > 0 {
|
||||
if err == nil && len(t.Message) > 0 {
|
||||
t.messageTpl, err = compileTemplate(t.Message)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user