diff --git a/man/mosquitto.conf.5.xml b/man/mosquitto.conf.5.xml index d29d2581..6d2338eb 100644 --- a/man/mosquitto.conf.5.xml +++ b/man/mosquitto.conf.5.xml @@ -2058,7 +2058,7 @@ openssl dhparam -out dhparam.pem 2048 - base cap + base cap [stable] constant Set the amount of time a bridge using the automatic @@ -2067,14 +2067,22 @@ openssl dhparam -out dhparam.pem 2048 time in seconds, or to use a backoff mechanism based on "Decorrelated Jitter", which adds a degree of randomness to when the restart occurs, starting at the - base and increasing up to the cap. Set a constant - timeout of 20 seconds: + base and increasing up to the cap. The backoff time will be reset to + base after a successful connection. + When stable is specified, the backoff time + will be reset only if the connection remains open for at least + stable seconds. + Set a constant timeout of 20 seconds: restart_timeout 20 Set backoff with a base (start value) of 10 seconds and a cap (upper limit) of 60 seconds: -restart_timeout 10 30 +restart_timeout 10 60 + Same as previous example, but wait for the connection to be stable for + at least 30 seconds before resetting the backoff: + +restart_timeout 10 60 30 Defaults to jitter with a base of 5 seconds and cap of 30 seconds. diff --git a/mosquitto.conf b/mosquitto.conf index a24a84ec..2b9bee30 100644 --- a/mosquitto.conf +++ b/mosquitto.conf @@ -857,7 +857,11 @@ # # Set backoff with a base (start value) of 10 seconds and a cap (upper limit) of # 60 seconds: -# restart_timeout 10 30 +# restart_timeout 10 60 +# +# Same as previous example, but wait for the connection to be stable for +# at least 30 seconds before resetting the backoff: +# restart_timeout 10 60 30 # # Defaults to jitter with a base of 5 and cap of 30 #restart_timeout 5 30 diff --git a/src/bridge.c b/src/bridge.c index 9f02b95d..a30c15a7 100644 --- a/src/bridge.c +++ b/src/bridge.c @@ -57,8 +57,7 @@ Contributors: #ifdef WITH_BRIDGE -static void bridge__backoff_step(struct mosquitto *context); -static void bridge__backoff_reset(struct mosquitto *context); +static void bridge__update_backoff(struct mosquitto__bridge *bridge); #if defined(__GLIBC__) && defined(WITH_ADNS) static int bridge__connect_step1(struct mosquitto *context); static int bridge__connect_step2(struct mosquitto *context); @@ -275,9 +274,6 @@ static int bridge__connect_step1(struct mosquitto *context) } } - /* prepare backoff for a possible failure. Restart timeout will be reset if connection gets established */ - bridge__backoff_step(context); - if(context->bridge->notifications){ if(context->max_qos == 0){ qos = 0; @@ -500,9 +496,6 @@ int bridge__connect(struct mosquitto *context) } } - /* prepare backoff for a possible failure. Restart timeout will be reset if connection gets established */ - bridge__backoff_step(context); - if(context->bridge->notifications){ if(context->max_qos == 0){ qos = 0; @@ -707,7 +700,7 @@ int bridge__on_connect(struct mosquitto *context) } } - bridge__backoff_reset(context); + context->bridge->connected_at = db.now_s; return MOSQ_ERR_SUCCESS; } @@ -863,38 +856,40 @@ static int rand_between(int low, int high) return (abs(r) % (high - low)) + low; } -static void bridge__backoff_step(struct mosquitto *context) +static void bridge__backoff_step(struct mosquitto__bridge *bridge) { - struct mosquitto__bridge *bridge; - if(!context || !context->bridge) return; - - bridge = context->bridge; - - /* skip if not using backoff */ - if(bridge->backoff_cap){ - /* “Decorrelated Jitter” calculation, according to: - * https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ - */ - bridge->restart_timeout = rand_between(bridge->backoff_base, bridge->restart_timeout * 3); - if(bridge->restart_timeout > bridge->backoff_cap){ - bridge->restart_timeout = bridge->backoff_cap; - } + /* + “Decorrelated Jitter” calculation, according to: + https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + */ + + bridge->restart_timeout = rand_between(bridge->backoff_base, bridge->restart_timeout * 3); + if(bridge->restart_timeout > bridge->backoff_cap){ + bridge->restart_timeout = bridge->backoff_cap; } } -static void bridge__backoff_reset(struct mosquitto *context) +static void bridge__backoff_reset(struct mosquitto__bridge *bridge) { - struct mosquitto__bridge *bridge; - if(!context || !context->bridge) return; - - bridge = context->bridge; + bridge->restart_timeout = bridge->backoff_base; +} - /* skip if not using backoff */ - if(bridge->backoff_cap){ - bridge->restart_timeout = bridge->backoff_base; +static void bridge__update_backoff(struct mosquitto__bridge *bridge) +{ + if(!bridge) return; + if(!bridge->backoff_cap) return; /* skip if not using jitter */ + + if (bridge->connected_at && db.now_s - bridge->connected_at >= bridge->stable_connection_period) { + log__printf(NULL, MOSQ_LOG_INFO, "Bridge %s connection was stable enough, resetting backoff", bridge->name); + bridge__backoff_reset(bridge); + } else { + bridge__backoff_step(bridge); } -} + bridge->connected_at = 0; + + log__printf(NULL, MOSQ_LOG_INFO, "Bridge %s next backoff will be %d", bridge->name, bridge->restart_timeout); +} static void bridge_check_pending(struct mosquitto *context) { @@ -1004,22 +999,21 @@ void bridge_check(void) } } } - } - - if(!net__is_connected(context)){ + }else{ if(reload_if_needed(context)) continue; /* Want to try to restart the bridge connection */ if(!context->bridge->restart_t){ - context->bridge->restart_t = db.now_s+context->bridge->restart_timeout; + bridge__update_backoff(context->bridge); + context->bridge->restart_t = 1000*db.now_s+context->bridge->restart_timeout; context->bridge->cur_address++; if(context->bridge->cur_address == context->bridge->address_count){ context->bridge->cur_address = 0; } - loop__update_next_event(context->bridge->restart_timeout*1000); + loop__update_next_event(context->bridge->restart_timeout); }else{ if((context->bridge->start_type == bst_lazy && context->bridge->lazy_reconnect) - || (context->bridge->start_type == bst_automatic && db.now_s >= context->bridge->restart_t)){ + || (context->bridge->start_type == bst_automatic && 1000*db.now_s >= context->bridge->restart_t)){ #if defined(__GLIBC__) && defined(WITH_ADNS) if(context->adns){ diff --git a/src/conf.c b/src/conf.c index d6427e41..742c0918 100644 --- a/src/conf.c +++ b/src/conf.c @@ -1501,6 +1501,7 @@ static int config__read_file_core(struct mosquitto__config *config, bool reload, cur_bridge->restart_timeout = 0; cur_bridge->backoff_base = 5; cur_bridge->backoff_cap = 30; + cur_bridge->stable_connection_period = 0; cur_bridge->threshold = 10; cur_bridge->try_private = true; cur_bridge->attempt_unsubscribe = true; @@ -2142,6 +2143,7 @@ static int config__read_file_core(struct mosquitto__config *config, bool reload, log__printf(NULL, MOSQ_LOG_ERR, "Error: Invalid bridge configuration."); return MOSQ_ERR_INVAL; } + cur_bridge->backoff_cap = 0; /* set backoff to constant mode, unless cap is specified further down */ token = strtok_r(NULL, " ", &saveptr); if(!token){ log__printf(NULL, MOSQ_LOG_ERR, "Error: Empty restart_timeout value in configuration."); @@ -2160,7 +2162,19 @@ static int config__read_file_core(struct mosquitto__config *config, bool reload, log__printf(NULL, MOSQ_LOG_ERR, "Error: backoff cap is lower than the base in restart_timeout."); return MOSQ_ERR_INVAL; } + + token = strtok_r(NULL, " ", &saveptr); + if(token){ + cur_bridge->stable_connection_period = atoi(token); + if(cur_bridge->stable_connection_period < 0){ + log__printf(NULL, MOSQ_LOG_ERR, "Error: stable connection period cannot be negative."); + return MOSQ_ERR_INVAL; + } + } } + cur_bridge->restart_timeout *= 1000; /* backoff is tracked in ms */ + cur_bridge->backoff_base *= 1000; + cur_bridge->backoff_cap *= 1000; #else log__printf(NULL, MOSQ_LOG_WARNING, "Warning: Bridge support not available."); #endif diff --git a/src/mosquitto_broker_internal.h b/src/mosquitto_broker_internal.h index 88a5524c..23878778 100644 --- a/src/mosquitto_broker_internal.h +++ b/src/mosquitto_broker_internal.h @@ -584,6 +584,7 @@ struct mosquitto__bridge{ bool topic_remapping; enum mosquitto__protocol protocol_version; time_t restart_t; + time_t connected_at; char *remote_clientid; char *remote_username; char *remote_password; @@ -599,6 +600,7 @@ struct mosquitto__bridge{ int restart_timeout; int backoff_base; int backoff_cap; + int stable_connection_period; int threshold; uint32_t maximum_packet_size; uint32_t session_expiry_interval;