From: Dharmik Parmar Date: Mon, 11 May 2026 18:04:08 +0000 (+0530) Subject: watchcat: add optional failure timer reset X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=94502932205c017112e616701ec93bd3887e7d2f;p=openwrt-packages.git watchcat: add optional failure timer reset Add an opt-in reset_failure_timer option for restart_iface and run_script modes. When enabled, watchcat starts a fresh failure window after the recovery action finishes before allowing another recovery action. The existing behavior remains the default. Document the intended default and reset_failure_timer timing behavior in TIMINGS.md and use a safer string comparison for the reset_failure_timer check. Signed-off-by: Dharmik Parmar --- diff --git a/utils/watchcat/Makefile b/utils/watchcat/Makefile index b66a237ac..88fb0bfc7 100644 --- a/utils/watchcat/Makefile +++ b/utils/watchcat/Makefile @@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=watchcat PKG_VERSION:=1 -PKG_RELEASE:=22 +PKG_RELEASE:=23 PKG_MAINTAINER:=Roger D PKG_LICENSE:=GPL-2.0 diff --git a/utils/watchcat/TIMINGS.md b/utils/watchcat/TIMINGS.md new file mode 100644 index 000000000..960648b85 --- /dev/null +++ b/utils/watchcat/TIMINGS.md @@ -0,0 +1,69 @@ +# watchcat timing notes + +This file documents the intended timing for the `restart_iface` and +`run_script` paths in `watchcat.sh`, especially around the optional +`reset_failure_timer` flag. + +The main point is that the repeated restart window in the default behavior is +intentional. It is not an accidental side effect. + +## Terms + +- `failure_period`: how long failed reachability must continue before the + recovery action is triggered +- `ping_frequency_interval`: how often reachability is checked +- `recovery action`: either restarting the interface or running the configured + script + +## Default behavior + +By default, watchcat is meant to keep retrying during a sustained outage. + +This is useful for cases like WireGuard or OpenVPN, where the upstream +internet may recover before the monitored path through the tunnel becomes +usable again. Since watchcat is probing through the monitored interface, it +may continue to see failed checks until that interface is restarted again. + +In this mode, if failed checks continue and the outage lasts long enough to +cross multiple trigger windows, multiple recovery attempts are expected. + +Example: + +- `failure_period=60` +- failed checks continue throughout the outage +- the recovery action itself takes 15 seconds + +```text +t=0 outage starts +t=60 restart #1 starts +t=75 restart #1 finishes +t=120 restart #2 +``` + +If connectivity has recovered by the next check, there should be no further +restart. If failed checks continue and another trigger window is crossed, +another restart is expected. + +## `reset_failure_timer=1` + +This mode is more conservative. + +Once the recovery action finishes, a fresh failure window starts from that +point. Time spent inside the recovery action no longer counts toward the next +trigger. + +Example: + +- `failure_period=60` +- failed checks continue throughout the outage +- the recovery action itself takes 15 seconds + +```text +t=0 outage starts +t=60 restart #1 starts +t=75 restart #1 finishes +t=135 restart #2 would be the earliest next retry +``` + +This mode is useful when repeated or closely spaced recovery actions are less +desirable and a fresh failure window is preferred after each completed action. diff --git a/utils/watchcat/files/watchcat.config b/utils/watchcat/files/watchcat.config index ed6544cc3..0513d4119 100644 --- a/utils/watchcat/files/watchcat.config +++ b/utils/watchcat/files/watchcat.config @@ -3,3 +3,6 @@ config watchcat option mode 'ping_reboot' option pinghosts '8.8.8.8' option forcedelay '30' + # For restart_iface and run_script, start a fresh failure window after + # each recovery action finishes before allowing another restart. + # option reset_failure_timer '1' diff --git a/utils/watchcat/files/watchcat.init b/utils/watchcat/files/watchcat.init index 9bfc68d0e..75a8f0124 100644 --- a/utils/watchcat/files/watchcat.init +++ b/utils/watchcat/files/watchcat.init @@ -41,6 +41,7 @@ config_watchcat() { config_get interface "$1" interface config_get mmifacename "$1" mmifacename "null" config_get_bool unlockbands "$1" unlockbands "0" + config_get_bool reset_failure_timer "$1" reset_failure_timer "0" config_get addressfamily "$1" addressfamily "any" config_get script "$1" script @@ -110,13 +111,19 @@ config_watchcat() { ;; restart_iface) procd_open_instance "watchcat_${1}" - procd_set_param command /usr/bin/watchcat.sh "restart_iface" "$period" "$pinghosts" "$pingperiod" "$pingsize" "$interface" "$mmifacename" "$unlockbands" "$addressfamily" + procd_set_param command /usr/bin/watchcat.sh \ + "restart_iface" "$period" "$pinghosts" "$pingperiod" \ + "$pingsize" "$interface" "$mmifacename" "$unlockbands" \ + "$addressfamily" "" "$reset_failure_timer" procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}" procd_close_instance ;; run_script) procd_open_instance "watchcat_${1}" - procd_set_param command /usr/bin/watchcat.sh "run_script" "$period" "$pinghosts" "$pingperiod" "$pingsize" "$interface" "$addressfamily" "$script" + procd_set_param command /usr/bin/watchcat.sh \ + "run_script" "$period" "$pinghosts" "$pingperiod" \ + "$pingsize" "$interface" "$addressfamily" "$script" \ + "$reset_failure_timer" procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}" procd_close_instance ;; diff --git a/utils/watchcat/files/watchcat.sh b/utils/watchcat/files/watchcat.sh index 7770c4381..a5f406283 100644 --- a/utils/watchcat/files/watchcat.sh +++ b/utils/watchcat/files/watchcat.sh @@ -110,6 +110,11 @@ watchcat_monitor_network() { mm_iface_unlock_bands="$7" address_family="$8" script="$9" + reset_failure_timer="" + if [ "$#" -gt 9 ]; then + shift 9 + reset_failure_timer="$1" + fi time_now="$(cat /proc/uptime)" time_now="${time_now%%.*}" @@ -177,7 +182,13 @@ watchcat_monitor_network() { fi fi /etc/init.d/watchcat start - # Restart timer cycle. + # Optionally start a fresh failure window after the recovery action + # finishes instead of continuing to count the original outage. + if [ "$reset_failure_timer" = "1" ]; then + time_now="$(cat /proc/uptime)" + time_now="${time_now%%.*}" + time_lastcheck="$time_now" + fi time_lastcheck_withinternet="$time_now" } @@ -260,12 +271,47 @@ ping_reboot) watchcat_ping "$2" "$3" "$4" "$5" "$6" "$7" "$8" ;; restart_iface) - # args from init script: period pinghosts pingperiod pingsize interface mmifacename unlockbands addressfamily - watchcat_monitor_network "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" "" + shift + # args from init script: period pinghosts pingperiod pingsize interface + # mmifacename unlockbands addressfamily script reset_failure_timer + failure_period="$1" + ping_hosts="$2" + ping_frequency_interval="$3" + ping_size="$4" + iface="$5" + mm_iface_name="$6" + mm_iface_unlock_bands="$7" + address_family="$8" + script="$9" + reset_failure_timer="" + if [ "$#" -gt 9 ]; then + shift 9 + reset_failure_timer="$1" + fi + watchcat_monitor_network "$failure_period" "$ping_hosts" \ + "$ping_frequency_interval" "$ping_size" "$iface" \ + "$mm_iface_name" "$mm_iface_unlock_bands" \ + "$address_family" "$script" "$reset_failure_timer" ;; run_script) - # args from init script: period pinghosts pingperiod pingsize interface addressfamily script - watchcat_monitor_network "$2" "$3" "$4" "$5" "$6" "" "" "$7" "$8" + shift + # args from init script: period pinghosts pingperiod pingsize interface + # addressfamily script reset_failure_timer + failure_period="$1" + ping_hosts="$2" + ping_frequency_interval="$3" + ping_size="$4" + iface="$5" + address_family="$6" + script="$7" + reset_failure_timer="" + if [ "$#" -gt 7 ]; then + shift 7 + reset_failure_timer="$1" + fi + watchcat_monitor_network "$failure_period" "$ping_hosts" \ + "$ping_frequency_interval" "$ping_size" "$iface" "" "" \ + "$address_family" "$script" "$reset_failure_timer" ;; *) echo "Error: invalid mode selected: $mode" diff --git a/utils/watchcat/test-version.sh b/utils/watchcat/test-version.sh new file mode 100755 index 000000000..c52d3c26b --- /dev/null +++ b/utils/watchcat/test-version.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +exit 0