Keepalive fix, auto-reconnect on disconnect, tab icon fix, video playback guard
Keepalive: tokio::time::sleep inside select! was resetting every iteration — keepalives never fired. Switched to tokio::time::interval which ticks reliably. This caused connections to be zombie-reaped (10min timeout with no pings). Auto-reconnect: unexpected disconnects (stream error, not SocialDisconnectNotice) now attempt direct reconnect after 3s delay using last known address from peers table or social route. Falls back to notify_growth() if direct reconnect fails. Tab icons: updateTabBadge was using textContent which destroyed the icon and label spans inside tab buttons. Now updates only the .tab-label span and manages a separate .tab-badge element. Video playback: feed re-render skipped while any video or audio is actively playing, preventing echo from DOM destruction and media element recreation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
68afc40b16
commit
6320a82852
4 changed files with 118 additions and 12 deletions
|
|
@ -3197,6 +3197,13 @@ impl ConnectionManager {
|
||||||
self.log_activity(ActivityLevel::Warn, ActivityCategory::Connection, "Mesh empty".into(), None);
|
self.log_activity(ActivityLevel::Warn, ActivityCategory::Connection, "Mesh empty".into(), None);
|
||||||
self.notify_recovery();
|
self.notify_recovery();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Signal growth loop to fill the empty slot (don't wait 10min for rebalance)
|
||||||
|
let total_slots = self.preferred_slots + self.local_slots + self.wide_slots;
|
||||||
|
if remaining < total_slots {
|
||||||
|
self.notify_growth();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Notify watchers that a previously disconnected peer has reconnected.
|
/// Notify watchers that a previously disconnected peer has reconnected.
|
||||||
|
|
@ -4495,7 +4502,10 @@ impl ConnectionManager {
|
||||||
last_activity: Arc<AtomicU64>,
|
last_activity: Arc<AtomicU64>,
|
||||||
) {
|
) {
|
||||||
let our_stable_id = conn.stable_id();
|
let our_stable_id = conn.stable_id();
|
||||||
let keepalive_interval = std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS);
|
// Use interval (not sleep) so the timer ticks reliably even when other select branches fire.
|
||||||
|
// tokio::time::sleep inside select! restarts on every loop iteration — keepalive would never fire.
|
||||||
|
let mut keepalive_tick = tokio::time::interval(std::time::Duration::from_secs(MESH_KEEPALIVE_INTERVAL_SECS));
|
||||||
|
keepalive_tick.tick().await; // consume the immediate first tick
|
||||||
loop {
|
loop {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
uni_result = conn.accept_uni() => {
|
uni_result = conn.accept_uni() => {
|
||||||
|
|
@ -4534,7 +4544,7 @@ impl ConnectionManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ = tokio::time::sleep(keepalive_interval) => {
|
_ = keepalive_tick.tick() => {
|
||||||
// Send lightweight keepalive ping — keeps NAT mapping alive
|
// Send lightweight keepalive ping — keeps NAT mapping alive
|
||||||
// and prevents zombie detection on the remote side
|
// and prevents zombie detection on the remote side
|
||||||
if let Ok(mut send) = conn.open_uni().await {
|
if let Ok(mut send) = conn.open_uni().await {
|
||||||
|
|
@ -4551,15 +4561,72 @@ impl ConnectionManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connection ended — only clean up if this is still the active connection
|
// Connection ended unexpectedly — clean up and attempt reconnect
|
||||||
// (a reconnect may have already replaced our entry with a newer connection)
|
let (is_current, peer_addr, has_social_route) = {
|
||||||
let mut cm = conn_mgr.lock().await;
|
let mut cm = conn_mgr.lock().await;
|
||||||
let is_current = cm.connections.get(&remote_node_id)
|
let is_current = cm.connections.get(&remote_node_id)
|
||||||
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
|
.map_or(false, |pc| pc.connection.stable_id() == our_stable_id);
|
||||||
|
if is_current {
|
||||||
|
// Gather reconnect info before disconnect clears it
|
||||||
|
let storage = cm.storage.get().await;
|
||||||
|
let addr = storage.get_peer_record(&remote_node_id).ok().flatten()
|
||||||
|
.and_then(|r| r.addresses.first().cloned())
|
||||||
|
.or_else(|| storage.get_social_route(&remote_node_id).ok().flatten()
|
||||||
|
.and_then(|r| r.addresses.first().cloned()));
|
||||||
|
let has_route = storage.has_social_route(&remote_node_id).unwrap_or(false);
|
||||||
|
drop(storage);
|
||||||
|
cm.disconnect_peer(&remote_node_id).await;
|
||||||
|
(true, addr, has_route)
|
||||||
|
} else {
|
||||||
|
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
|
||||||
|
(false, None, false)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Attempt reconnect for unexpected disconnects (not intentional SocialDisconnectNotice)
|
||||||
if is_current {
|
if is_current {
|
||||||
cm.disconnect_peer(&remote_node_id).await;
|
if let Some(addr) = peer_addr {
|
||||||
} else {
|
let cm_arc = Arc::clone(&conn_mgr);
|
||||||
debug!(peer = hex::encode(remote_node_id), "Skipping disconnect — connection was replaced by reconnect");
|
tokio::spawn(async move {
|
||||||
|
// Brief delay to let the disconnect settle and avoid reconnect storms
|
||||||
|
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||||
|
// Check if already reconnected (by the other side or growth loop)
|
||||||
|
{
|
||||||
|
let cm = cm_arc.lock().await;
|
||||||
|
if cm.connections.contains_key(&remote_node_id) || cm.sessions.contains_key(&remote_node_id) {
|
||||||
|
return; // Already reconnected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Ok(eid) = iroh::EndpointId::from_bytes(&remote_node_id) {
|
||||||
|
let ep_addr = iroh::EndpointAddr::from(eid).with_ip_addr(addr);
|
||||||
|
let endpoint = {
|
||||||
|
let cm = cm_arc.lock().await;
|
||||||
|
cm.endpoint.clone()
|
||||||
|
};
|
||||||
|
match ConnectionManager::connect_to_unlocked(&endpoint, ep_addr).await {
|
||||||
|
Ok(conn) => {
|
||||||
|
let mut cm = cm_arc.lock().await;
|
||||||
|
if !cm.connections.contains_key(&remote_node_id) {
|
||||||
|
cm.register_new_connection(remote_node_id, conn, &[addr], PeerSlotKind::Local).await;
|
||||||
|
info!(peer = hex::encode(remote_node_id), "Auto-reconnected after unexpected disconnect");
|
||||||
|
cm.log_activity(ActivityLevel::Info, ActivityCategory::Connection,
|
||||||
|
format!("Auto-reconnected to {}", &hex::encode(remote_node_id)[..8]), Some(remote_node_id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!(peer = hex::encode(remote_node_id), error = %e, "Auto-reconnect failed");
|
||||||
|
// Signal growth loop as fallback
|
||||||
|
let cm = cm_arc.lock().await;
|
||||||
|
cm.notify_growth();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// No known address — signal growth loop to find new peers
|
||||||
|
let cm = conn_mgr.lock().await;
|
||||||
|
cm.notify_growth();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -591,8 +591,24 @@ const TAB_BASE_LABELS = { feed: 'Feed', myposts: 'My Posts', people: 'People', m
|
||||||
function updateTabBadge(tabName, count) {
|
function updateTabBadge(tabName, count) {
|
||||||
const tab = document.querySelector(`.tab[data-tab="${tabName}"]`);
|
const tab = document.querySelector(`.tab[data-tab="${tabName}"]`);
|
||||||
if (!tab) return;
|
if (!tab) return;
|
||||||
|
// Update the label span (preserve icon span)
|
||||||
|
const label = tab.querySelector('.tab-label');
|
||||||
const base = TAB_BASE_LABELS[tabName] || tabName;
|
const base = TAB_BASE_LABELS[tabName] || tabName;
|
||||||
tab.textContent = count > 0 ? `${base} (${count})` : base;
|
if (label) {
|
||||||
|
label.textContent = base;
|
||||||
|
}
|
||||||
|
// Update or create badge span
|
||||||
|
let badge = tab.querySelector('.tab-badge');
|
||||||
|
if (count > 0) {
|
||||||
|
if (!badge) {
|
||||||
|
badge = document.createElement('span');
|
||||||
|
badge.className = 'tab-badge';
|
||||||
|
tab.appendChild(badge);
|
||||||
|
}
|
||||||
|
badge.textContent = count;
|
||||||
|
} else if (badge) {
|
||||||
|
badge.remove();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let _lastFeedViewMs = 0;
|
let _lastFeedViewMs = 0;
|
||||||
|
|
@ -743,6 +759,18 @@ async function loadFeed(force) {
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip full re-render if any video/audio is actively playing (prevents echo/restart)
|
||||||
|
const mediaPlaying = [...feedList.querySelectorAll('video, audio')].some(el => !el.paused);
|
||||||
|
if (mediaPlaying) {
|
||||||
|
// Don't destroy the DOM while media is playing — re-render on next cycle when stopped
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Revoke old object URLs to prevent memory leaks
|
||||||
|
feedList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => {
|
||||||
|
if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src);
|
||||||
|
});
|
||||||
|
|
||||||
// Preserve expanded comment threads
|
// Preserve expanded comment threads
|
||||||
const expandedComments = new Set();
|
const expandedComments = new Set();
|
||||||
feedList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
feedList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
||||||
|
|
@ -780,6 +808,13 @@ async function loadMyPosts(force) {
|
||||||
const fp = mine.map(p => `${p.id}:${(p.reactionCounts||[]).map(r=>r.emoji+r.count).join(',')}:${p.commentCount||0}`).join('|');
|
const fp = mine.map(p => `${p.id}:${(p.reactionCounts||[]).map(r=>r.emoji+r.count).join(',')}:${p.commentCount||0}`).join('|');
|
||||||
if (!force && fp === _myPostsFingerprint) return;
|
if (!force && fp === _myPostsFingerprint) return;
|
||||||
_myPostsFingerprint = fp;
|
_myPostsFingerprint = fp;
|
||||||
|
// Skip re-render if media is playing
|
||||||
|
const mediaPlaying = [...myPostsList.querySelectorAll('video, audio')].some(el => !el.paused);
|
||||||
|
if (mediaPlaying) return;
|
||||||
|
// Revoke old blob URLs
|
||||||
|
myPostsList.querySelectorAll('video[src^="blob:"], audio[src^="blob:"], img[src^="blob:"]').forEach(el => {
|
||||||
|
if (el.src.startsWith('blob:')) URL.revokeObjectURL(el.src);
|
||||||
|
});
|
||||||
const expandedComments = new Set();
|
const expandedComments = new Set();
|
||||||
myPostsList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
myPostsList.querySelectorAll('.comment-thread:not(.hidden)').forEach(el => {
|
||||||
const postEl = el.closest('.post');
|
const postEl = el.closest('.post');
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@
|
||||||
<p>This is the canonical technical reference for ItsGoin. It describes the vision, the architecture, and the current state of every subsystem — with full implementation detail. This document is versioned; each update records what changed.</p>
|
<p>This is the canonical technical reference for ItsGoin. It describes the vision, the architecture, and the current state of every subsystem — with full implementation detail. This document is versioned; each update records what changed.</p>
|
||||||
<div class="card" style="margin-top: 1rem;">
|
<div class="card" style="margin-top: 1rem;">
|
||||||
<strong style="font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em;">Changelog</strong>
|
<strong style="font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em;">Changelog</strong>
|
||||||
<p style="margin-top: 0.5rem;"><strong>v0.4.3</strong> (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility.</p>
|
<p style="margin-top: 0.5rem;"><strong>v0.4.3</strong> (2026-03-22): Lock contention overhaul — all conn_mgr lock holds during network I/O eliminated. PostFetch, TcpPunch, PullFromPeer, FetchEngagement, ResolveAddress, AnchorProbe, WormLookup, ContentSearch now use brief locks for data gathering only. Bi-stream handlers (BlobRequest, WormQuery, RelayIntroduce, PostFetchRequest, ManifestRefresh) fully lock-free for I/O. ConnectionActor hoists shared Arcs (storage, blob_store, endpoint) for lock-free access. ResolveAddress adds 5s per-query timeout (was unbounded). Worm cascade uses connection snapshots. Initial exchange failure now aborts mesh upgrade (was silently continuing). connect_to_peer/connect_to_anchor use 15s timeout. StoragePool — 8 concurrent SQLite connections in WAL mode replace single Mutex<Storage>. Reads run fully parallel; writes serialize only at SQLite level. Bottom nav bar for mobile/tablet (≤768px) with icon tabs. Text sizes: XS 75%, S 100%, M 125% (default), L 150%, XL 200%. Text size persisted to localStorage for instant restore. Fix: blocking_lock panic inside async runtime (prevented app startup). StoragePool reduced to 4 connections for Android compatibility. Keepalive fix — tokio::time::sleep inside select! was resetting every loop iteration, keepalives never fired; switched to tokio::time::interval. Auto-reconnect on unexpected disconnect — 3s delay then direct reconnect to last known address; falls back to growth loop. notify_growth on disconnect — immediately signals growth loop to fill empty slot instead of waiting 10min rebalance. Tab badge fix — updateTabBadge was using textContent which destroyed icon+label spans; now updates only the label and manages badge span separately. Feed re-render skip during media playback — prevents video echo from DOM destruction.</p>
|
||||||
<p><strong>v0.4.2</strong> (2026-03-22): Welcome screen — startup shows “How’s it goin?” with staggered counters (connections, posts, messages, reacts, comments) while backend bootstraps. Status ticker — header ticker for new posts, messages, reactions, comments, connection changes. Notification improvements — Tauri plugin → Web Notification → notify-rust fallback chain, Linux native notifications. Responsive text scaling — Small/Normal/Large (100%/150%/200%), persisted via settings. Diagnostics popover — diagnostics moved from inline section to overlay, connections on-demand, timers removed. Share details lightbox with QR code. Connect string prefers external address (UPnP/public IPv6/observed). Stale N1 fix — disconnected social routes excluded from N1 share. Replication handler fix — actively fetches posts + blobs from requester after accepting replication. Hole punch fix — target-side registers publicly routable remote address for relay introduction. Replication semaphore (3 concurrent max). Peer labels show truncated node ID.</p>
|
<p><strong>v0.4.2</strong> (2026-03-22): Welcome screen — startup shows “How’s it goin?” with staggered counters (connections, posts, messages, reacts, comments) while backend bootstraps. Status ticker — header ticker for new posts, messages, reactions, comments, connection changes. Notification improvements — Tauri plugin → Web Notification → notify-rust fallback chain, Linux native notifications. Responsive text scaling — Small/Normal/Large (100%/150%/200%), persisted via settings. Diagnostics popover — diagnostics moved from inline section to overlay, connections on-demand, timers removed. Share details lightbox with QR code. Connect string prefers external address (UPnP/public IPv6/observed). Stale N1 fix — disconnected social routes excluded from N1 share. Replication handler fix — actively fetches posts + blobs from requester after accepting replication. Hole punch fix — target-side registers publicly routable remote address for relay introduction. Replication semaphore (3 concurrent max). Peer labels show truncated node ID.</p>
|
||||||
<p><strong>v0.4.1</strong> (2026-03-21): Security hardening — reaction signatures (ed25519), comment signature verification on receipt, reaction removal authorization, BlobHeader author verification. Lock contention fixes — ManifestPush discovery (cm lock released during I/O), pull request handler (filter without lock), pull sender (split into brief locks), engagement checker (batch writes per chunk). Data cleanup — post deletion cleans downstream/upstream/seen tables.</p>
|
<p><strong>v0.4.1</strong> (2026-03-21): Security hardening — reaction signatures (ed25519), comment signature verification on receipt, reaction removal authorization, BlobHeader author verification. Lock contention fixes — ManifestPush discovery (cm lock released during I/O), pull request handler (filter without lock), pull sender (split into brief locks), engagement checker (batch writes per chunk). Data cleanup — post deletion cleans downstream/upstream/seen tables.</p>
|
||||||
<p><strong>v0.4.0</strong> (2026-03-21): Protocol v4 — header-driven sync. ManifestPush as primary post notification. Slim PullSyncRequest (per-author timestamps, not full post ID list). Tiered engagement checks (5min/1hr/4hr/24hr by content age). Multi-upstream (3 max) with fallback chain. Auto-prefetch followed authors <90d. Self Last Encounter per-author tracking. Encrypted-but-not-for-us CDN caching. Serial engagement polling. ~90% bandwidth reduction for established nodes.</p>
|
<p><strong>v0.4.0</strong> (2026-03-21): Protocol v4 — header-driven sync. ManifestPush as primary post notification. Slim PullSyncRequest (per-author timestamps, not full post ID list). Tiered engagement checks (5min/1hr/4hr/24hr by content age). Multi-upstream (3 max) with fallback chain. Auto-prefetch followed authors <90d. Self Last Encounter per-author tracking. Encrypted-but-not-for-us CDN caching. Serial engagement polling. ~90% bandwidth reduction for established nodes.</p>
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,10 @@
|
||||||
<li><strong>Bottom nav bar</strong> — Mobile/tablet (≤768px) gets a fixed bottom navigation bar with icon tabs. Desktop keeps the top tab bar.</li>
|
<li><strong>Bottom nav bar</strong> — Mobile/tablet (≤768px) gets a fixed bottom navigation bar with icon tabs. Desktop keeps the top tab bar.</li>
|
||||||
<li><strong>Text size update</strong> — Five options: XS (75%), S (100%), M (125% default), L (150%), XL (200%). Persisted to localStorage for instant restore on startup.</li>
|
<li><strong>Text size update</strong> — Five options: XS (75%), S (100%), M (125% default), L (150%), XL (200%). Persisted to localStorage for instant restore on startup.</li>
|
||||||
<li><strong>Startup fix</strong> — Fixed blocking_lock panic that prevented app from launching (async runtime conflict). StoragePool reduced to 4 connections for Android compatibility.</li>
|
<li><strong>Startup fix</strong> — Fixed blocking_lock panic that prevented app from launching (async runtime conflict). StoragePool reduced to 4 connections for Android compatibility.</li>
|
||||||
|
<li><strong>Keepalive fix</strong> — Mesh keepalive pings were never firing due to timer reset bug in select loop. Connections were being zombie-reaped instead of kept alive.</li>
|
||||||
|
<li><strong>Auto-reconnect</strong> — Unexpected disconnects now trigger immediate reconnect attempt (3s delay, then direct connect to last known address). Falls back to growth loop if direct fails.</li>
|
||||||
|
<li><strong>Tab icon fix</strong> — Badge updates were destroying tab icons on mobile. Now updates label and badge separately.</li>
|
||||||
|
<li><strong>Video playback</strong> — Feed re-render skipped while video/audio is playing to prevent echo and restart.</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<div class="changelog-date">v0.4.2 — March 22, 2026</div>
|
<div class="changelog-date">v0.4.2 — March 22, 2026</div>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue