Keepalive fix, auto-reconnect on disconnect, tab icon fix, video playback guard
Keepalive: tokio::time::sleep inside select! was resetting every iteration — keepalives never fired. Switched to tokio::time::interval which ticks reliably. This caused connections to be zombie-reaped (10min timeout with no pings). Auto-reconnect: unexpected disconnects (stream error, not SocialDisconnectNotice) now attempt direct reconnect after 3s delay using last known address from peers table or social route. Falls back to notify_growth() if direct reconnect fails. Tab icons: updateTabBadge was using textContent which destroyed the icon and label spans inside tab buttons. Now updates only the .tab-label span and manages a separate .tab-badge element. Video playback: feed re-render skipped while any video or audio is actively playing, preventing echo from DOM destruction and media element recreation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
68afc40b16
commit
6320a82852
4 changed files with 118 additions and 12 deletions
|
|
@ -3197,6 +3197,13 @@ impl ConnectionManager {
|
|||
self.log_activity(ActivityLevel::Warn, ActivityCategory::Connection, "Mesh empty".into(), None);
|
||||
self.notify_recovery();
|
||||
}
|
||||
|
||||
// Signal growth loop to fill the empty slot (don't wait 10min for rebalance)
|
||||
let total_slots = self.preferred_slots + self.local_slots + self.wide_slots;
|
||||
if remaining < total_slots {
|
||||
self.notify_growth();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Notify watchers that a previously disconnected peer has reconnected.
|
||||
|
|
@ -4495,7 +4502,10 @@ impl ConnectionManager {
|
|||
last_activity: Arc<AtomicU64>,
|
||||
) {
|
||||
let our_stable_id = conn.stable_id();
|
||||
let keepalive_interval = std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS);
|
||||
// Use interval (not sleep) so the timer ticks reliably even when other select branches fire.
|
||||
// tokio::time::sleep inside select! restarts on every loop iteration — keepalive would never fire.
|
||||
let mut keepalive_tick = tokio::time::interval(std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS));
|
||||
keepalive_tick.tick().await; // consume the immediate first tick
|
||||
loop {
|
||||
tokio::select! {
|
||||
uni_result = conn.accept_uni() => {
|
||||
|
|
@ -4534,7 +4544,7 @@ impl ConnectionManager {
|
|||
}
|
||||
}
|
||||
}
|
||||
_ = tokio::time::sleep(keepalive_interval) => {
|
||||
_ = keepalive_tick.tick() => {
|
||||
// Send lightweight keepalive ping — keeps NAT mapping alive
|
||||
// and prevents zombie detection on the remote side
|
||||
if let Ok(mut send) = conn.open_uni().await {
|
||||
|
|
@ -4551,15 +4561,72 @@ impl ConnectionManager {
|
|||
}
|
||||
}
|
||||
|
||||
// Connection ended — only clean up if this is still the active connection
|
||||
// (a reconnect may have already replaced our entry with a newer connection)
|
||||
let mut cm = conn_mgr.lock().await;
|
||||
let is_current = cm.connections.get(&remote_node_id)
|
||||
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
|
||||
// Connection ended unexpectedly — clean up and attempt reconnect
|
||||
let (is_current, peer_addr, has_social_route) = {
|
||||
let mut cm = conn_mgr.lock().await;
|
||||
let is_current = cm.connections.get(&remote_node_id)
|
||||
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
|
||||
if is_current {
|
||||
// Gather reconnect info before disconnect clears it
|
||||
let storage = cm.storage.get().await;
|
||||
let addr = storage.get_peer_record(&remote_node_id).ok().flatten()
|
||||
.and_then(|r| r.addresses.first().cloned())
|
||||
.or_else(|| storage.get_social_route(&remote_node_id).ok().flatten()
|
||||
.and_then(|r| r.addresses.first().cloned()));
|
||||
let has_route = storage.has_social_route(&remote_node_id).unwrap_or(false);
|
||||
drop(storage);
|
||||
cm.disconnect_peer(&remote_node_id).await;
|
||||
(true, addr, has_route)
|
||||
} else {
|
||||
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
|
||||
(false, None, false)
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt reconnect for unexpected disconnects (not intentional SocialDisconnectNotice)
|
||||
if is_current {
|
||||
cm.disconnect_peer(&remote_node_id).await;
|
||||
} else {
|
||||
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
|
||||
if let Some(addr) = peer_addr {
|
||||
let cm_arc = Arc::clone(&conn_mgr);
|
||||
tokio::spawn(async move {
|
||||
// Brief delay to let the disconnect settle and avoid reconnect storms
|
||||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||
// Check if already reconnected (by the other side or growth loop)
|
||||
{
|
||||
let cm = cm_arc.lock().await;
|
||||
if cm.connections.contains_key(&remote_node_id) || cm.sessions.contains_key(&remote_node_id) {
|
||||
return; // Already reconnected
|
||||
}
|
||||
}
|
||||
if let Ok(eid) = iroh::EndpointId::from_bytes(&remote_node_id) {
|
||||
let ep_addr = iroh::EndpointAddr::from(eid).with_ip_addr(addr);
|
||||
let endpoint = {
|
||||
let cm = cm_arc.lock().await;
|
||||
cm.endpoint.clone()
|
||||
};
|
||||
match ConnectionManager::connect_to_unlocked(&endpoint, ep_addr).await {
|
||||
Ok(conn) => {
|
||||
let mut cm = cm_arc.lock().await;
|
||||
if !cm.connections.contains_key(&remote_node_id) {
|
||||
cm.register_new_connection(remote_node_id, conn, &[addr], PeerSlotKind::Local).await;
|
||||
info!(peer = hex::encode(remote_node_id), "Auto-reconnected after unexpected disconnect");
|
||||
cm.log_activity(ActivityLevel::Info, ActivityCategory::Connection,
|
||||
format!("Auto-reconnected to {}", &hex::encode(remote_node_id)[..8]), Some(remote_node_id));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(peer = hex::encode(remote_node_id), error = %e, "Auto-reconnect failed");
|
||||
// Signal growth loop as fallback
|
||||
let cm = cm_arc.lock().await;
|
||||
cm.notify_growth();
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// No known address — signal growth loop to find new peers
|
||||
let cm = conn_mgr.lock().await;
|
||||
cm.notify_growth();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -591,8 +591,24 @@ const TAB_BASE_LABELS = { feed: 'Feed', myposts: 'My Posts', people: 'People', m
|
|||
function updateTabBadge(tabName, count) {
|
||||
const tab = document.querySelector(`.tab[data-tab="${tabName}"]`);
|
||||
if (!tab) return;
|
||||
// Update the label span (preserve icon span)
|
||||
const label = tab.querySelector('.tab-label');
|
||||
const base = TAB_BASE_LABELS[tabName] || tabName;
|
||||
tab.textContent = count > 0 ? `${base} (${count})` : base;
|
||||
if (label) {
|
||||
label.textContent = base;
|
||||
}
|
||||
// Update or create badge span
|
||||
let badge = tab.querySelector('.tab-badge');
|
||||
if (count > 0) {
|
||||
if (!badge) {
|
||||
badge = document.createElement('span');
|
||||
badge.className = 'tab-badge';
|
||||
tab.appendChild(badge);
|
||||
}
|
||||
badge.textContent = count;
|
||||
} else if (badge) {
|
||||
badge.remove();
|
||||
}
|
||||
}
|
||||
|
||||
let _lastFeedViewMs = 0;
|
||||
|
|
@ -743,6 +759,18 @@ async function loadFeed(force) {
|
|||
} catch (_) {}
|
||||
}
|
||||
|
||||
// Skip full re-render if any video/audio is actively playing (prevents echo/restart)
|
||||
const mediaPlaying = [...feedList.querySelectorAll('video, audio')].some(el => !el.paused);
|
||||
if (mediaPlaying) {
|
||||
// Don't destroy the DOM while media is playing — re-render on next cycle when stopped
|
||||
return;
|
||||
}
|
||||
|
||||
// Revoke old object URLs to prevent memory leaks
|
||||
feedList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => {
|
||||
if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src);
|
||||
});
|
||||
|
||||
// Preserve expanded comment threads
|
||||
const expandedComments = new Set();
|
||||
feedList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
||||
|
|
@ -780,6 +808,13 @@ async function loadMyPosts(force) {
|
|||
const fp = mine.map(p => `${p.id}:${(p.reactionCounts||[]).map(r=>r.emoji+r.count).join(',')}:${p.commentCount||0}`).join('|');
|
||||
if (!force && fp === _myPostsFingerprint) return;
|
||||
_myPostsFingerprint = fp;
|
||||
// Skip re-render if media is playing
|
||||
const mediaPlaying = [...myPostsList.querySelectorAll('video, audio')].some(el => !el.paused);
|
||||
if (mediaPlaying) return;
|
||||
// Revoke old blob URLs
|
||||
myPostsList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => {
|
||||
if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src);
|
||||
});
|
||||
const expandedComments = new Set();
|
||||
myPostsList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
||||
const postEl = el.closest('.post');
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
<p>This is the canonical technical reference for ItsGoin. It describes the vision, the architecture, and the current state of every subsystem — with full implementation detail. This document is versioned; each update records what changed.</p>
|
||||
<div class="card" style="margin-top: 1rem;">
|
||||
<strong style="font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em;">Changelog</strong>
|
||||
<p style="margin-top: 0.5rem;"><strong>v0.4.3</strong> (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility.</p>
|
||||
<p style="margin-top: 0.5rem;"><strong>v0.4.3</strong> (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility. Keepalive fix — tokio::time::sleep inside select! was resetting every loop iteration, keepalives never fired; switched to tokio::time::interval. Auto-reconnect on unexpected disconnect — 3s delay then direct reconnect to last known address; falls back to growth loop. notify_growth on disconnect — immediately signals growth loop to fill empty slot instead of waiting 10min rebalance. Tab badge fix — updateTabBadge was using textContent which destroyed icon+label spans; now updates only the label and manages badge span separately. Feed re-render skip during media playback — prevents video echo from DOM destruction.</p>
|
||||
<p><strong>v0.4.2</strong> (2026-03-22): Welcome screen — startup shows “How’s it goin?” with staggered counters (connections, posts, messages, reacts, comments) while backend bootstraps. Status ticker — header ticker for new posts, messages, reactions, comments, connection changes. Notification improvements — Tauri plugin → Web Notification → notify-rust fallback chain, Linux native notifications. Responsive text scaling — Small/Normal/Large (100%/150%/200%), persisted via settings. Diagnostics popover — diagnostics moved from inline section to overlay, connections on-demand, timers removed. Share details lightbox with QR code. Connect string prefers external address (UPnP/public IPv6/observed). Stale N1 fix — disconnected social routes excluded from N1 share. Replication handler fix — actively fetches posts + blobs from requester after accepting replication. Hole punch fix — target-side registers publicly routable remote address for relay introduction. Replication semaphore (3 concurrent max). Peer labels show truncated node ID.</p>
|
||||
<p><strong>v0.4.1</strong> (2026-03-21): Security hardening — reaction signatures (ed25519), comment signature verification on receipt, reaction removal authorization, BlobHeader author verification. Lock contention fixes — ManifestPush discovery (cm lock released during I/O), pull request handler (filter without lock), pull sender (split into brief locks), engagement checker (batch writes per chunk). Data cleanup — post deletion cleans downstream/upstream/seen tables.</p>
|
||||
<p><strong>v0.4.0</strong> (2026-03-21): Protocol v4 — header-driven sync. ManifestPush as primary post notification. Slim PullSyncRequest (per-author timestamps, not full post ID list). Tiered engagement checks (5min/1hr/4hr/24hr by content age). Multi-upstream (3 max) with fallback chain. Auto-prefetch followed authors <90d. Self Last Encounter per-author tracking. Encrypted-but-not-for-us CDN caching. Serial engagement polling. ~90% bandwidth reduction for established nodes.</p>
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@
|
|||
<li><strong>Bottom nav bar</strong> — Mobile/tablet (≤768px) gets a fixed bottom navigation bar with icon tabs. Desktop keeps the top tab bar.</li>
|
||||
<li><strong>Text size update</strong> — Five options: XS (75%), S (100%), M (125% default), L (150%), XL (200%). Persisted to localStorage for instant restore on startup.</li>
|
||||
<li><strong>Startup fix</strong> — Fixed blocking_lock panic that prevented app from launching (async runtime conflict). StoragePool reduced to 4 connections for Android compatibility.</li>
|
||||
<li><strong>Keepalive fix</strong> — Mesh keepalive pings were never firing due to timer reset bug in select loop. Connections were being zombie-reaped instead of kept alive.</li>
|
||||
<li><strong>Auto-reconnect</strong> — Unexpected disconnects now trigger immediate reconnect attempt (3s delay, then direct connect to last known address). Falls back to growth loop if direct fails.</li>
|
||||
<li><strong>Tab icon fix</strong> — Badge updates were destroying tab icons on mobile. Now updates label and badge separately.</li>
|
||||
<li><strong>Video playback</strong> — Feed re-render skipped while video/audio is playing to prevent echo and restart.</li>
|
||||
</ul>
|
||||
|
||||
<div class="changelog-date">v0.4.2 — March 22, 2026</div>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue