Merge branch 'monitor_interval' of https://github.com/Soulou/cachet-monitor into Soulou-monitor_interval
* 'monitor_interval' of https://github.com/Soulou/cachet-monitor: update readme Implement 'inteval' config parameter -> number of seconds between checks # Conflicts: # cachet/config.go # main.go # monitor.go # readme.md
This commit is contained in:
85
monitor.go
85
monitor.go
@@ -3,8 +3,12 @@ package cachet
|
|||||||
import (
|
import (
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -12,18 +16,22 @@ const timeout = time.Duration(time.Second)
|
|||||||
|
|
||||||
// Monitor data model
|
// Monitor data model
|
||||||
type Monitor struct {
|
type Monitor struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
MetricID int `json:"metric_id"`
|
MetricID int `json:"metric_id"`
|
||||||
Threshold float32 `json:"threshold"`
|
Threshold float32 `json:"threshold"`
|
||||||
ComponentID *int `json:"component_id"`
|
ComponentID *int `json:"component_id"`
|
||||||
ExpectedStatusCode int `json:"expected_status_code"`
|
ExpectedStatusCode int `json:"expected_status_code"`
|
||||||
StrictTLS *bool `json:"strict_tls"`
|
StrictTLS *bool `json:"strict_tls"`
|
||||||
|
Interval time.Duration `json:"interval"`
|
||||||
|
|
||||||
History []bool `json:"-"`
|
History []bool `json:"-"`
|
||||||
LastFailReason *string `json:"-"`
|
LastFailReason *string `json:"-"`
|
||||||
Incident *Incident `json:"-"`
|
Incident *Incident `json:"-"`
|
||||||
config *CachetMonitor
|
config *CachetMonitor
|
||||||
|
|
||||||
|
// Closed when mon.Stop() is called
|
||||||
|
stopC chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cfg *CachetMonitor) Run() {
|
func (cfg *CachetMonitor) Run() {
|
||||||
@@ -34,17 +42,47 @@ func (cfg *CachetMonitor) Run() {
|
|||||||
if mon.MetricID > 0 {
|
if mon.MetricID > 0 {
|
||||||
cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID)
|
cfg.Logger.Printf(" - Logs lag to metric id: %d\n", mon.MetricID)
|
||||||
}
|
}
|
||||||
|
if mon.ComponentID != nil && *mon.ComponentID > 0 {
|
||||||
|
cfg.Logger.Printf(" - Updates component id: %d\n", *mon.ComponentID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.Logger.Println()
|
cfg.Logger.Println()
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
|
||||||
ticker := time.NewTicker(time.Duration(cfg.Interval) * time.Second)
|
for _, mon := range cfg.Monitors {
|
||||||
for range ticker.C {
|
wg.Add(1)
|
||||||
for _, mon := range cfg.Monitors {
|
mon.config = cfg
|
||||||
mon.config = cfg
|
mon.stopC = make(chan bool)
|
||||||
go mon.Run()
|
|
||||||
}
|
go func(mon *Monitor) {
|
||||||
|
if mon.Interval < 1 {
|
||||||
|
mon.Interval = time.Duration(cfg.Interval)
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(mon.Interval * time.Second)
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
mon.Run()
|
||||||
|
case <-mon.StopC():
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(mon)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
signals := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(signals, os.Interrupt, os.Kill)
|
||||||
|
<-signals
|
||||||
|
|
||||||
|
log.Println("Waiting monitors to end current operation")
|
||||||
|
for _, mon := range cfg.Monitors {
|
||||||
|
mon.Stop()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run loop
|
// Run loop
|
||||||
@@ -64,6 +102,27 @@ func (monitor *Monitor) Run() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (monitor *Monitor) Stop() {
|
||||||
|
if monitor.Stopped() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
close(monitor.stopC)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (monitor *Monitor) StopC() <-chan bool {
|
||||||
|
return monitor.stopC
|
||||||
|
}
|
||||||
|
|
||||||
|
func (monitor *Monitor) Stopped() bool {
|
||||||
|
select {
|
||||||
|
case <-monitor.stopC:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) doRequest() bool {
|
func (monitor *Monitor) doRequest() bool {
|
||||||
client := &http.Client{
|
client := &http.Client{
|
||||||
Timeout: timeout,
|
Timeout: timeout,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ Features
|
|||||||
--------
|
--------
|
||||||
|
|
||||||
- [x] Creates & Resolves Incidents
|
- [x] Creates & Resolves Incidents
|
||||||
- [x] Posts monitor lag every second * config.Interval
|
- [x] Posts monitor lag (interval configurable)
|
||||||
- [x] Updates Component to Partial Outage
|
- [x] Updates Component to Partial Outage
|
||||||
- [x] Updates Component to Major Outage if in Partial Outage
|
- [x] Updates Component to Major Outage if in Partial Outage
|
||||||
- [x] Can be run on multiple servers and geo regions
|
- [x] Can be run on multiple servers and geo regions
|
||||||
@@ -30,7 +30,8 @@ Configuration
|
|||||||
"component_id": <component id from cachet>,
|
"component_id": <component id from cachet>,
|
||||||
"threshold": 80,
|
"threshold": 80,
|
||||||
"expected_status_code": 200,
|
"expected_status_code": 200,
|
||||||
"strict_tls": true
|
"strict_tls": true,
|
||||||
|
"interval": 5
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"insecure_api": false
|
"insecure_api": false
|
||||||
@@ -45,6 +46,7 @@ Configuration
|
|||||||
- `component_id` is optional
|
- `component_id` is optional
|
||||||
- `threshold` is a percentage
|
- `threshold` is a percentage
|
||||||
- `expected_status_code` is a http response code
|
- `expected_status_code` is a http response code
|
||||||
|
- `interval` is the duration in seconds between two checks.
|
||||||
- GET request will be performed on the `url`
|
- GET request will be performed on the `url`
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
|
|||||||
Reference in New Issue
Block a user