PKG_NAME:=watchcat
PKG_VERSION:=1
-PKG_RELEASE:=22
+PKG_RELEASE:=23
PKG_MAINTAINER:=Roger D <rogerdammit@gmail.com>
PKG_LICENSE:=GPL-2.0
--- /dev/null
+# watchcat timing notes
+
+This file documents the intended timing for the `restart_iface` and
+`run_script` paths in `watchcat.sh`, especially around the optional
+`reset_failure_timer` flag.
+
+The main point is that the repeated restart window in the default behavior is
+intentional. It is not an accidental side effect.
+
+## Terms
+
+- `failure_period`: how long failed reachability must continue before the
+ recovery action is triggered
+- `ping_frequency_interval`: how often reachability is checked
+- `recovery action`: either restarting the interface or running the configured
+ script
+
+## Default behavior
+
+By default, watchcat is meant to keep retrying during a sustained outage.
+
+This is useful for cases like WireGuard or OpenVPN, where the upstream
+internet may recover before the monitored path through the tunnel becomes
+usable again. Since watchcat is probing through the monitored interface, it
+may continue to see failed checks until that interface is restarted again.
+
+In this mode, if failed checks continue and the outage lasts long enough to
+cross multiple trigger windows, multiple recovery attempts are expected.
+
+Example:
+
+- `failure_period=60`
+- failed checks continue throughout the outage
+- the recovery action itself takes 15 seconds
+
+```text
+t=0 outage starts
+t=60 restart #1 starts
+t=75 restart #1 finishes
+t=120 restart #2
+```
+
+If connectivity has recovered by the next check, there should be no further
+restart. If failed checks continue and another trigger window is crossed,
+another restart is expected.
+
+## `reset_failure_timer=1`
+
+This mode is more conservative.
+
+Once the recovery action finishes, a fresh failure window starts from that
+point. Time spent inside the recovery action no longer counts toward the next
+trigger.
+
+Example:
+
+- `failure_period=60`
+- failed checks continue throughout the outage
+- the recovery action itself takes 15 seconds
+
+```text
+t=0 outage starts
+t=60 restart #1 starts
+t=75 restart #1 finishes
+t=135 restart #2 would be the earliest next retry
+```
+
+This mode is useful when repeated or closely spaced recovery actions are less
+desirable and a fresh failure window is preferred after each completed action.
option mode 'ping_reboot'
option pinghosts '8.8.8.8'
option forcedelay '30'
+ # For restart_iface and run_script, start a fresh failure window after
+ # each recovery action finishes before allowing another restart.
+ # option reset_failure_timer '1'
config_get interface "$1" interface
config_get mmifacename "$1" mmifacename "null"
config_get_bool unlockbands "$1" unlockbands "0"
+ config_get_bool reset_failure_timer "$1" reset_failure_timer "0"
config_get addressfamily "$1" addressfamily "any"
config_get script "$1" script
;;
restart_iface)
procd_open_instance "watchcat_${1}"
- procd_set_param command /usr/bin/watchcat.sh "restart_iface" "$period" "$pinghosts" "$pingperiod" "$pingsize" "$interface" "$mmifacename" "$unlockbands" "$addressfamily"
+ procd_set_param command /usr/bin/watchcat.sh \
+ "restart_iface" "$period" "$pinghosts" "$pingperiod" \
+ "$pingsize" "$interface" "$mmifacename" "$unlockbands" \
+ "$addressfamily" "" "$reset_failure_timer"
procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
procd_close_instance
;;
run_script)
procd_open_instance "watchcat_${1}"
- procd_set_param command /usr/bin/watchcat.sh "run_script" "$period" "$pinghosts" "$pingperiod" "$pingsize" "$interface" "$addressfamily" "$script"
+ procd_set_param command /usr/bin/watchcat.sh \
+ "run_script" "$period" "$pinghosts" "$pingperiod" \
+ "$pingsize" "$interface" "$addressfamily" "$script" \
+ "$reset_failure_timer"
procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
procd_close_instance
;;
mm_iface_unlock_bands="$7"
address_family="$8"
script="$9"
+ reset_failure_timer=""
+ if [ "$#" -gt 9 ]; then
+ shift 9
+ reset_failure_timer="$1"
+ fi
time_now="$(cat /proc/uptime)"
time_now="${time_now%%.*}"
fi
fi
/etc/init.d/watchcat start
- # Restart timer cycle.
+ # Optionally start a fresh failure window after the recovery action
+ # finishes instead of continuing to count the original outage.
+ if [ "$reset_failure_timer" = "1" ]; then
+ time_now="$(cat /proc/uptime)"
+ time_now="${time_now%%.*}"
+ time_lastcheck="$time_now"
+ fi
time_lastcheck_withinternet="$time_now"
}
watchcat_ping "$2" "$3" "$4" "$5" "$6" "$7" "$8"
;;
restart_iface)
- # args from init script: period pinghosts pingperiod pingsize interface mmifacename unlockbands addressfamily
- watchcat_monitor_network "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" ""
+ shift
+ # args from init script: period pinghosts pingperiod pingsize interface
+ # mmifacename unlockbands addressfamily script reset_failure_timer
+ failure_period="$1"
+ ping_hosts="$2"
+ ping_frequency_interval="$3"
+ ping_size="$4"
+ iface="$5"
+ mm_iface_name="$6"
+ mm_iface_unlock_bands="$7"
+ address_family="$8"
+ script="$9"
+ reset_failure_timer=""
+ if [ "$#" -gt 9 ]; then
+ shift 9
+ reset_failure_timer="$1"
+ fi
+ watchcat_monitor_network "$failure_period" "$ping_hosts" \
+ "$ping_frequency_interval" "$ping_size" "$iface" \
+ "$mm_iface_name" "$mm_iface_unlock_bands" \
+ "$address_family" "$script" "$reset_failure_timer"
;;
run_script)
- # args from init script: period pinghosts pingperiod pingsize interface addressfamily script
- watchcat_monitor_network "$2" "$3" "$4" "$5" "$6" "" "" "$7" "$8"
+ shift
+ # args from init script: period pinghosts pingperiod pingsize interface
+ # addressfamily script reset_failure_timer
+ failure_period="$1"
+ ping_hosts="$2"
+ ping_frequency_interval="$3"
+ ping_size="$4"
+ iface="$5"
+ address_family="$6"
+ script="$7"
+ reset_failure_timer=""
+ if [ "$#" -gt 7 ]; then
+ shift 7
+ reset_failure_timer="$1"
+ fi
+ watchcat_monitor_network "$failure_period" "$ping_hosts" \
+ "$ping_frequency_interval" "$ping_size" "$iface" "" "" \
+ "$address_family" "$script" "$reset_failure_timer"
;;
*)
echo "Error: invalid mode selected: $mode"
--- /dev/null
+#!/bin/sh
+
+exit 0