diff --git a/crates/core/src/connection.rs b/crates/core/src/connection.rs index b395b99..35d0a5c 100644 --- a/crates/core/src/connection.rs +++ b/crates/core/src/connection.rs @@ -3197,6 +3197,13 @@ impl ConnectionManager { self.log_activity(ActivityLevel::Warn, ActivityCategory::Connection, "Mesh empty".into(), None); self.notify_recovery(); } + + // Signal growth loop to fill the empty slot (don't wait 10min for rebalance) + let total_slots = self.preferred_slots + self.local_slots + self.wide_slots; + if remaining < total_slots { + self.notify_growth(); + } + } /// Notify watchers that a previously disconnected peer has reconnected. @@ -4495,7 +4502,10 @@ impl ConnectionManager { last_activity: Arc, ) { let our_stable_id = conn.stable_id(); - let keepalive_interval = std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS); + // Use interval (not sleep) so the timer ticks reliably even when other select branches fire. + // tokio::time::sleep inside select! restarts on every loop iteration — keepalive would never fire. + let mut keepalive_tick = tokio::time::interval(std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS)); + keepalive_tick.tick().await; // consume the immediate first tick loop { tokio::select! { uni_result = conn.accept_uni() => { @@ -4534,7 +4544,7 @@ impl ConnectionManager { } } } - _ = tokio::time::sleep(keepalive_interval) => { + _ = keepalive_tick.tick() => { // Send lightweight keepalive ping — keeps NAT mapping alive // and prevents zombie detection on the remote side if let Ok(mut send) = conn.open_uni().await { @@ -4551,15 +4561,72 @@ impl ConnectionManager { } } - // Connection ended — only clean up if this is still the active connection - // (a reconnect may have already replaced our entry with a newer connection) - let mut cm = conn_mgr.lock().await; - let is_current = cm.connections.get(&remote_node_id) - .map_or(false, |pc| pc.connection.stable_id() == our_stable_id); + // Connection ended unexpectedly — clean up and attempt reconnect + let (is_current, peer_addr, has_social_route) = { + let mut cm = conn_mgr.lock().await; + let is_current = cm.connections.get(&remote_node_id) + .map_or(false, |pc| pc.connection.stable_id() == our_stable_id); + if is_current { + // Gather reconnect info before disconnect clears it + let storage = cm.storage.get().await; + let addr = storage.get_peer_record(&remote_node_id).ok().flatten() + .and_then(|r| r.addresses.first().cloned()) + .or_else(|| storage.get_social_route(&remote_node_id).ok().flatten() + .and_then(|r| r.addresses.first().cloned())); + let has_route = storage.has_social_route(&remote_node_id).unwrap_or(false); + drop(storage); + cm.disconnect_peer(&remote_node_id).await; + (true, addr, has_route) + } else { + debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect"); + (false, None, false) + } + }; + + // Attempt reconnect for unexpected disconnects (not intentional SocialDisconnectNotice) if is_current { - cm.disconnect_peer(&remote_node_id).await; - } else { - debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect"); + if let Some(addr) = peer_addr { + let cm_arc = Arc::clone(&conn_mgr); + tokio::spawn(async move { + // Brief delay to let the disconnect settle and avoid reconnect storms + tokio::time::sleep(std::time::Duration::from_secs(3)).await; + // Check if already reconnected (by the other side or growth loop) + { + let cm = cm_arc.lock().await; + if cm.connections.contains_key(&remote_node_id) || cm.sessions.contains_key(&remote_node_id) { + return; // Already reconnected + } + } + if let Ok(eid) = iroh::EndpointId::from_bytes(&remote_node_id) { + let ep_addr = iroh::EndpointAddr::from(eid).with_ip_addr(addr); + let endpoint = { + let cm = cm_arc.lock().await; + cm.endpoint.clone() + }; + match ConnectionManager::connect_to_unlocked(&endpoint, ep_addr).await { + Ok(conn) => { + let mut cm = cm_arc.lock().await; + if !cm.connections.contains_key(&remote_node_id) { + cm.register_new_connection(remote_node_id, conn, &[addr], PeerSlotKind::Local).await; + info!(peer = hex::encode(remote_node_id), "Auto-reconnected after unexpected disconnect"); + cm.log_activity(ActivityLevel::Info, ActivityCategory::Connection, + format!("Auto-reconnected to {}", &hex::encode(remote_node_id)[..8]), Some(remote_node_id)); + } + } + Err(e) => { + debug!(peer = hex::encode(remote_node_id), error = %e, "Auto-reconnect failed"); + // Signal growth loop as fallback + let cm = cm_arc.lock().await; + cm.notify_growth(); + } + } + } + }); + } else { + // No known address — signal growth loop to find new peers + let cm = conn_mgr.lock().await; + cm.notify_growth(); + } } } diff --git a/frontend/app.js b/frontend/app.js index 87eff6e..77d4b9e 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -591,8 +591,24 @@ const TAB_BASE_LABELS = { feed: 'Feed', myposts: 'My Posts', people: 'People', m function updateTabBadge(tabName, count) { const tab = document.querySelector(`.tab[data-tab="${tabName}"]`); if (!tab) return; + // Update the label span (preserve icon span) + const label = tab.querySelector('.tab-label'); const base = TAB_BASE_LABELS[tabName] || tabName; - tab.textContent = count > 0 ? `${base} (${count})` : base; + if (label) { + label.textContent = base; + } + // Update or create badge span + let badge = tab.querySelector('.tab-badge'); + if (count > 0) { + if (!badge) { + badge = document.createElement('span'); + badge.className = 'tab-badge'; + tab.appendChild(badge); + } + badge.textContent = count; + } else if (badge) { + badge.remove(); + } } let _lastFeedViewMs = 0; @@ -743,6 +759,18 @@ async function loadFeed(force) { } catch (_) {} } + // Skip full re-render if any video/audio is actively playing (prevents echo/restart) + const mediaPlaying = [...feedList.querySelectorAll('video, audio')].some(el => !el.paused); + if (mediaPlaying) { + // Don't destroy the DOM while media is playing — re-render on next cycle when stopped + return; + } + + // Revoke old object URLs to prevent memory leaks + feedList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => { + if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src); + }); + // Preserve expanded comment threads const expandedComments = new Set(); feedList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => { @@ -780,6 +808,13 @@ async function loadMyPosts(force) { const fp = mine.map(p => `${p.id}:${(p.reactionCounts||[]).map(r=>r.emoji+r.count).join(',')}:${p.commentCount||0}`).join('|'); if (!force && fp === _myPostsFingerprint) return; _myPostsFingerprint = fp; + // Skip re-render if media is playing + const mediaPlaying = [...myPostsList.querySelectorAll('video, audio')].some(el => !el.paused); + if (mediaPlaying) return; + // Revoke old blob URLs + myPostsList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => { + if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src); + }); const expandedComments = new Set(); myPostsList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => { const postEl = el.closest('.post'); diff --git a/website/design.html b/website/design.html index dd6da10..ab98e3f 100644 --- a/website/design.html +++ b/website/design.html @@ -44,7 +44,7 @@

This is the canonical technical reference for ItsGoin. It describes the vision, the architecture, and the current state of every subsystem — with full implementation detail. This document is versioned; each update records what changed.

Changelog -

v0.4.3 (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility.

+

v0.4.3 (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility. Keepalive fix — tokio::time::sleep inside select! was resetting every loop iteration, keepalives never fired; switched to tokio::time::interval. Auto-reconnect on unexpected disconnect — 3s delay then direct reconnect to last known address; falls back to growth loop. notify_growth on disconnect — immediately signals growth loop to fill empty slot instead of waiting 10min rebalance. Tab badge fix — updateTabBadge was using textContent which destroyed icon+label spans; now updates only the label and manages badge span separately. Feed re-render skip during media playback — prevents video echo from DOM destruction.

v0.4.2 (2026-03-22): Welcome screen — startup shows “How’s it goin?” with staggered counters (connections, posts, messages, reacts, comments) while backend bootstraps. Status ticker — header ticker for new posts, messages, reactions, comments, connection changes. Notification improvements — Tauri plugin → Web Notification → notify-rust fallback chain, Linux native notifications. Responsive text scaling — Small/Normal/Large (100%/150%/200%), persisted via settings. Diagnostics popover — diagnostics moved from inline section to overlay, connections on-demand, timers removed. Share details lightbox with QR code. Connect string prefers external address (UPnP/public IPv6/observed). Stale N1 fix — disconnected social routes excluded from N1 share. Replication handler fix — actively fetches posts + blobs from requester after accepting replication. Hole punch fix — target-side registers publicly routable remote address for relay introduction. Replication semaphore (3 concurrent max). Peer labels show truncated node ID.

v0.4.1 (2026-03-21): Security hardening — reaction signatures (ed25519), comment signature verification on receipt, reaction removal authorization, BlobHeader author verification. Lock contention fixes — ManifestPush discovery (cm lock released during I/O), pull request handler (filter without lock), pull sender (split into brief locks), engagement checker (batch writes per chunk). Data cleanup — post deletion cleans downstream/upstream/seen tables.

v0.4.0 (2026-03-21): Protocol v4 — header-driven sync. ManifestPush as primary post notification. Slim PullSyncRequest (per-author timestamps, not full post ID list). Tiered engagement checks (5min/1hr/4hr/24hr by content age). Multi-upstream (3 max) with fallback chain. Auto-prefetch followed authors <90d. Self Last Encounter per-author tracking. Encrypted-but-not-for-us CDN caching. Serial engagement polling. ~90% bandwidth reduction for established nodes.

diff --git a/website/download.html b/website/download.html index 6529ada..1a279ca 100644 --- a/website/download.html +++ b/website/download.html @@ -81,6 +81,10 @@
  • Bottom nav bar — Mobile/tablet (≤768px) gets a fixed bottom navigation bar with icon tabs. Desktop keeps the top tab bar.
  • Text size update — Five options: XS (75%), S (100%), M (125% default), L (150%), XL (200%). Persisted to localStorage for instant restore on startup.
  • Startup fix — Fixed blocking_lock panic that prevented app from launching (async runtime conflict). StoragePool reduced to 4 connections for Android compatibility.
  • +
  • Keepalive fix — Mesh keepalive pings were never firing due to timer reset bug in select loop. Connections were being zombie-reaped instead of kept alive.
  • +
  • Auto-reconnect — Unexpected disconnects now trigger immediate reconnect attempt (3s delay, then direct connect to last known address). Falls back to growth loop if direct fails.
  • +
  • Tab icon fix — Badge updates were destroying tab icons on mobile. Now updates label and badge separately.
  • +
  • Video playback — Feed re-render skipped while video/audio is playing to prevent echo and restart.
  • v0.4.2 — March 22, 2026