Keepalive fix, auto-reconnect on disconnect, tab icon fix, video playback guard

Keepalive: tokio::time::sleep inside select! was resetting every iteration —
keepalives never fired. Switched to tokio::time::interval which ticks reliably.
This caused connections to be zombie-reaped (10min timeout with no pings).

Auto-reconnect: unexpected disconnects (stream error, not SocialDisconnectNotice)
now attempt direct reconnect after 3s delay using last known address from peers
table or social route. Falls back to notify_growth() if direct reconnect fails.

Tab icons: updateTabBadge was using textContent which destroyed the icon and
label spans inside tab buttons. Now updates only the .tab-label span and manages
a separate .tab-badge element.

Video playback: feed re-render skipped while any video or audio is actively
playing, preventing echo from DOM destruction and media element recreation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Scott Reimers 2026-03-22 23:27:41 -04:00
parent 68afc40b16
commit 6320a82852
4 changed files with 118 additions and 12 deletions

View file

@ -3197,6 +3197,13 @@ impl ConnectionManager {
self.log_activity(ActivityLevel::Warn, ActivityCategory::Connection, "Mesh empty".into(), None);
self.notify_recovery();
}
// Signal growth loop to fill the empty slot (don't wait 10min for rebalance)
let total_slots = self.preferred_slots + self.local_slots + self.wide_slots;
if remaining < total_slots {
self.notify_growth();
}
}
/// Notify watchers that a previously disconnected peer has reconnected.
@ -4495,7 +4502,10 @@ impl ConnectionManager {
last_activity: Arc<AtomicU64>,
) {
let our_stable_id = conn.stable_id();
let keepalive_interval = std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS);
// Use interval (not sleep) so the timer ticks reliably even when other select branches fire.
// tokio::time::sleep inside select! restarts on every loop iteration — keepalive would never fire.
let mut keepalive_tick = tokio::time::interval(std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS));
keepalive_tick.tick().await; // consume the immediate first tick
loop {
tokio::select! {
uni_result = conn.accept_uni() => {
@ -4534,7 +4544,7 @@ impl ConnectionManager {
}
}
}
_ = tokio::time::sleep(keepalive_interval) => {
_ = keepalive_tick.tick() => {
// Send lightweight keepalive ping — keeps NAT mapping alive
// and prevents zombie detection on the remote side
if let Ok(mut send) = conn.open_uni().await {
@ -4551,15 +4561,72 @@ impl ConnectionManager {
}
}
// Connection ended — only clean up if this is still the active connection
// (a reconnect may have already replaced our entry with a newer connection)
let mut cm = conn_mgr.lock().await;
let is_current = cm.connections.get(&remote_node_id)
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
// Connection ended unexpectedly — clean up and attempt reconnect
let (is_current, peer_addr, has_social_route) = {
let mut cm = conn_mgr.lock().await;
let is_current = cm.connections.get(&remote_node_id)
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
if is_current {
// Gather reconnect info before disconnect clears it
let storage = cm.storage.get().await;
let addr = storage.get_peer_record(&remote_node_id).ok().flatten()
.and_then(|r| r.addresses.first().cloned())
.or_else(|| storage.get_social_route(&remote_node_id).ok().flatten()
.and_then(|r| r.addresses.first().cloned()));
let has_route = storage.has_social_route(&remote_node_id).unwrap_or(false);
drop(storage);
cm.disconnect_peer(&remote_node_id).await;
(true, addr, has_route)
} else {
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
(false, None, false)
}
};
// Attempt reconnect for unexpected disconnects (not intentional SocialDisconnectNotice)
if is_current {
cm.disconnect_peer(&remote_node_id).await;
} else {
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
if let Some(addr) = peer_addr {
let cm_arc = Arc::clone(&conn_mgr);
tokio::spawn(async move {
// Brief delay to let the disconnect settle and avoid reconnect storms
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
// Check if already reconnected (by the other side or growth loop)
{
let cm = cm_arc.lock().await;
if cm.connections.contains_key(&remote_node_id) || cm.sessions.contains_key(&remote_node_id) {
return; // Already reconnected
}
}
if let Ok(eid) = iroh::EndpointId::from_bytes(&remote_node_id) {
let ep_addr = iroh::EndpointAddr::from(eid).with_ip_addr(addr);
let endpoint = {
let cm = cm_arc.lock().await;
cm.endpoint.clone()
};
match ConnectionManager::connect_to_unlocked(&endpoint, ep_addr).await {
Ok(conn) => {
let mut cm = cm_arc.lock().await;
if !cm.connections.contains_key(&remote_node_id) {
cm.register_new_connection(remote_node_id, conn, &[addr], PeerSlotKind::Local).await;
info!(peer = hex::encode(remote_node_id), "Auto-reconnected after unexpected disconnect");
cm.log_activity(ActivityLevel::Info, ActivityCategory::Connection,
format!("Auto-reconnected to {}", &hex::encode(remote_node_id)[..8]), Some(remote_node_id));
}
}
Err(e) => {
debug!(peer = hex::encode(remote_node_id), error = %e, "Auto-reconnect failed");
// Signal growth loop as fallback
let cm = cm_arc.lock().await;
cm.notify_growth();
}
}
}
});
} else {
// No known address — signal growth loop to find new peers
let cm = conn_mgr.lock().await;
cm.notify_growth();
}
}
}