v0.3.6: Active CDN replication, device roles, budgets, tombstones, engagement fix, DOS hardening

Active CDN replication: - All devices proactively replicate recent posts (<72h, <2 replicas) to peers - Target priority: desktops (300) > anchors (200) > phones (100) + cache_pressure - ReplicationRequest/Response (0xE1/0xE2) wire messages - 10-min cycle, 2-min initial delay, cap 20 posts per request - Graceful with small networks (1 peer = 1 replica, 0 peers = silent skip) Device roles & budgets: - Intermittent (phone), Available (desktop), Persistent (anchor) - Advertised in InitialExchange, stored per-peer - Replication budget: phones 100MB/hr, desktops/anchors 200MB/hr - Delivery budget: phones 1GB/hr, desktops 2GB/hr, anchors 1GB/hr - Hourly auto-reset, enforcement on blob serving Cache management: - 1GB default cache limit, configurable in settings UI - Eviction cycle activated (was implemented but never started) - Share-link priority boost (+100 for 3+ downstream) - Cache pressure score (0-255) for replication targeting Engagement distribution fix: - BlobHeader JSON rebuilt after BlobHeaderDiff ops - Previously reactions/comments stored in tables but header stayed stale Tombstone system: - deleted_at column on reactions and comments - Tombstones propagate through pull sync (additive merge respects timestamps) - UI queries filter WHERE deleted_at IS NULL Persistent notifications: - seen_engagement and seen_messages tables replace in-memory Sets - Only notify on genuinely unseen content, survives restarts DOS hardening: - BlobHeaderDiff fan-out: single batched task, max 10 concurrent via JoinSet - Blob prefetch: cap 20 per cycle, newest first - PostDownstreamRegister: cap 50 per sync - Delivery budget enforcement on BlobRequest handler - Pull preference: non-anchors first to preserve anchor delivery budget Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 21:00:28 -04:00 · 2026-03-20 21:00:28 -04:00 · a7e632de88
commit a7e632de88
parent b7f2d369fa
16 changed files with 1254 additions and 158 deletions
--- a/crates/core/src/connection.rs
+++ b/crates/core/src/connection.rs
@ -22,7 +22,8 @@ use crate::protocol::{
    ProfileUpdatePayload, PullSyncRequestPayload, PullSyncResponsePayload,
    RefuseRedirectPayload, RelayIntroducePayload, RelayIntroduceResultPayload, SessionRelayPayload,
    SocialAddressUpdatePayload, SocialCheckinPayload, SocialDisconnectNoticePayload,
-    SyncPost, VisibilityUpdatePayload, WormQueryPayload, WormResponsePayload, ALPN_V2,
+    SyncPost, VisibilityUpdatePayload, WormQueryPayload, WormResponsePayload,
+    ReplicationRequestPayload, ReplicationResponsePayload, ALPN_V2,
 };
 use crate::storage::Storage;
 use crate::types::{
@ -1202,6 +1203,8 @@ impl ConnectionManager {
                nat_filtering: Some(our_profile.filtering.to_string()),
                http_capable: self.http_capable,
                http_addr: self.http_addr.clone(),
+                device_role: None,
+                cache_pressure: None,
            }
        };

@ -1339,6 +1342,8 @@ impl ConnectionManager {
                nat_filtering: Some(our_profile.filtering.to_string()),
                http_capable: self.http_capable,
                http_addr: self.http_addr.clone(),
+                device_role: None,
+                cache_pressure: None,
            }
        };

@ -1603,11 +1608,11 @@ impl ConnectionManager {
        }
        drop(storage);

-        // Register as downstream for new posts
+        // Register as downstream for new posts (cap at 50 to avoid flooding)
        if !new_post_ids.is_empty() {
            let reg_conn = pull_conn.clone();
            tokio::spawn(async move {
-                for post_id in new_post_ids {
+                for post_id in new_post_ids.into_iter().take(50) {
                    let payload = PostDownstreamRegisterPayload { post_id };
                    if let Ok(mut send) = reg_conn.open_uni().await {
                        let _ = write_typed_message(&mut send, MessageType::PostDownstreamRegister, &payload).await;
@ -1683,12 +1688,11 @@ impl ConnectionManager {
            }
        }

-        // Register as downstream with the sender for new posts
-        // so they push engagement diffs (reactions, comments) to us
+        // Register as downstream with the sender for new posts (cap at 50 to avoid flooding)
        if !new_post_ids.is_empty() {
            let conn = pc.connection.clone();
            tokio::spawn(async move {
-                for post_id in new_post_ids {
+                for post_id in new_post_ids.into_iter().take(50) {
                    let payload = PostDownstreamRegisterPayload { post_id };
                    if let Ok(mut send) = conn.open_uni().await {
                        let _ = write_typed_message(&mut send, MessageType::PostDownstreamRegister, &payload).await;
@ -1761,7 +1765,9 @@ impl ConnectionManager {
                                    json,
                                    header.updated_at,
                                );
-                                // Apply individual reactions and comments
+                                // Apply individual reactions and comments.
+                                // store_reaction / store_comment are tombstone-aware:
+                                // they compare timestamps and respect deleted_at fields.
                                for reaction in &header.reactions {
                                    let _ = storage.store_reaction(reaction);
                                }
@ -3737,7 +3743,7 @@ impl ConnectionManager {
                    if let Some(session) = cm.sessions.get(&requester) {
                        let session_conn = session.connection.clone();
                        drop(cm); // release lock before async work
-                        match initial_exchange_connect(&storage_clone, &our_node_id, &session_conn, requester, None, our_nat_type, our_http_capable, our_http_addr.clone()).await {
+                        match initial_exchange_connect(&storage_clone, &our_node_id, &session_conn, requester, None, our_nat_type, our_http_capable, our_http_addr.clone(), None, None).await {
                            Ok(ExchangeResult::Accepted) => {
                                tracing::info!(peer = hex::encode(requester), "Target-side: initial exchange after hole punch");
                            }
@ -5061,7 +5067,7 @@ impl ConnectionManager {
                    let cm = conn_mgr.lock().await;
                    (cm.storage_ref(), *cm.our_node_id(), cm.build_anchor_advertised_addr(), cm.nat_type(), cm.http_capable, cm.http_addr.clone())
                };
-                initial_exchange_accept(&storage, &our_node_id, send, recv, remote_node_id, anchor_addr, None, our_nat_type, our_http_capable, our_http_addr)
+                initial_exchange_accept(&storage, &our_node_id, send, recv, remote_node_id, anchor_addr, None, our_nat_type, our_http_capable, our_http_addr, None, None)
                    .await?;
            }
            MessageType::AddressRequest => {
@ -5193,6 +5199,23 @@ impl ConnectionManager {
                let data = cm.blob_store.get(&payload.cid)?;
                let response = match data {
                    Some(bytes) => {
+                        // Check delivery budget before serving
+                        if !cm.blob_store.consume_delivery_budget(bytes.len() as u64) {
+                            debug!(
+                                peer = hex::encode(remote_node_id),
+                                cid = hex::encode(payload.cid),
+                                blob_size = bytes.len(),
+                                "Delivery budget exhausted, declining blob request"
+                            );
+                            BlobResponsePayload {
+                                cid: payload.cid,
+                                found: false,
+                                data_b64: String::new(),
+                                manifest: None,
+                                cdn_registered: false,
+                                cdn_redirect_peers: vec![],
+                            }
+                        } else {
                        use base64::Engine;

                        // Load manifest if available, wrap in CdnManifest
@ -5252,6 +5275,7 @@ impl ConnectionManager {
                            cdn_registered,
                            cdn_redirect_peers,
                        }
+                        } // end delivery budget else
                    }
                    None => BlobResponsePayload {
                        cid: payload.cid,
@ -5430,6 +5454,58 @@ impl ConnectionManager {
                };
                write_typed_message(&mut send, MessageType::BlobHeaderResponse, &response).await?;
            }
+            MessageType::ReplicationRequest => {
+                let payload: ReplicationRequestPayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
+                let (accepted, rejected, needs_pull) = {
+                    let cm = conn_mgr.lock().await;
+                    let storage = cm.storage.lock().await;
+                    let mut acc = Vec::new();
+                    let mut rej = Vec::new();
+                    let mut to_pull = Vec::new();
+                    // Estimate ~1 MB per post with blobs for budget tracking
+                    let est_bytes_per_post: u64 = 1024 * 1024;
+                    let mut budget_used: u64 = 0;
+                    let budget_cap: u64 = 20 * est_bytes_per_post; // cap per request
+
+                    for pid in &payload.post_ids {
+                        // Already have it — accept for free
+                        if storage.get_post(pid).ok().flatten().is_some() {
+                            acc.push(*pid);
+                            continue;
+                        }
+                        // Check budget before accepting a post we need to pull
+                        if budget_used + est_bytes_per_post > budget_cap {
+                            rej.push(*pid);
+                            continue;
+                        }
+                        budget_used += est_bytes_per_post;
+                        acc.push(*pid);
+                        to_pull.push(*pid);
+                    }
+
+                    // Register as downstream for all accepted posts
+                    for pid in &acc {
+                        let _ = storage.add_post_downstream(pid, &remote_node_id);
+                    }
+
+                    (acc, rej, to_pull)
+                };
+                let response = ReplicationResponsePayload { accepted: accepted.clone(), rejected };
+                write_typed_message(&mut send, MessageType::ReplicationResponse, &response).await?;
+                send.finish()?;
+                let accepted_count = accepted.len();
+                let needs_pull_count = needs_pull.len();
+                debug!(
+                    peer = hex::encode(remote_node_id),
+                    accepted = accepted_count,
+                    rejected = response.rejected.len(),
+                    needs_pull = needs_pull_count,
+                    "Handled replication request"
+                );
+                // Posts we accepted but don't have will be fetched on the next pull cycle
+                // from the requester (they have these posts since they asked us to hold them).
+                // No explicit pull spawn needed — the periodic pull cycle handles it.
+            }
            other => {
                warn!(msg_type = ?other, "Unexpected message type on bi-stream");
            }
@ -5575,41 +5651,78 @@ impl ConnectionManager {
                    BlobHeaderDiffOp::Unknown => {} // future ops — silently skip
                }
            }
+
+            // Rebuild blob header JSON from current DB state so pull-based sync gets fresh data.
+            // Use _with_tombstones so tombstones propagate through the pull path.
+            let reactions = storage.get_reactions_with_tombstones(&payload.post_id).unwrap_or_default();
+            let comments = storage.get_comments_with_tombstones(&payload.post_id).unwrap_or_default();
+            let policy = storage.get_comment_policy(&payload.post_id).ok().flatten().unwrap_or_default();
+            let (existing_header_json, _) = storage.get_blob_header(&payload.post_id)
+                .ok()
+                .flatten()
+                .unwrap_or((String::new(), 0));
+            let mut header: crate::types::BlobHeader = serde_json::from_str(&existing_header_json).unwrap_or_else(|_| {
+                crate::types::BlobHeader {
+                    post_id: payload.post_id,
+                    author: payload.author,
+                    reactions: vec![],
+                    comments: vec![],
+                    policy: crate::types::CommentPolicy::default(),
+                    updated_at: 0,
+                    thread_splits: vec![],
+                    receipt_slots: vec![],
+                    comment_slots: vec![],
+                }
+            });
+            header.reactions = reactions;
+            header.comments = comments;
+            header.policy = policy;
+            header.updated_at = payload.timestamp_ms;
+            if let Ok(json) = serde_json::to_string(&header) {
+                let _ = storage.store_blob_header(&payload.post_id, &payload.author, &json, payload.timestamp_ms);
+            }
        }

+        // Collect all targets (downstream + upstream), then send in a single batched task
+        let mut targets: Vec<iroh::endpoint::Connection> = Vec::new();
        for peer_id in downstream {
-            if peer_id == sender {
-                continue;
-            }
-            // Try mesh connection first, then session
-            let conn = self.connections.get(&peer_id).map(|mc| mc.connection.clone())
-                .or_else(|| self.sessions.get(&peer_id).map(|sc| sc.connection.clone()));
-            if let Some(conn) = conn {
-                let payload_clone = payload.clone();
-                tokio::spawn(async move {
-                    if let Ok(mut send) = conn.open_uni().await {
-                        let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &payload_clone).await;
-                        let _ = send.finish();
-                    }
-                });
+            if peer_id == sender { continue; }
+            if let Some(conn) = self.connections.get(&peer_id).map(|mc| mc.connection.clone())
+                .or_else(|| self.sessions.get(&peer_id).map(|sc| sc.connection.clone()))
+            {
+                targets.push(conn);
            }
        }
-
-        // Also propagate upstream (toward the author)
        if let Some(up) = upstream {
            if up != sender {
-                let conn = self.connections.get(&up).map(|mc| mc.connection.clone())
-                    .or_else(|| self.sessions.get(&up).map(|sc| sc.connection.clone()));
-                if let Some(conn) = conn {
-                    let payload_clone = payload.clone();
-                    tokio::spawn(async move {
+                if let Some(conn) = self.connections.get(&up).map(|mc| mc.connection.clone())
+                    .or_else(|| self.sessions.get(&up).map(|sc| sc.connection.clone()))
+                {
+                    targets.push(conn);
+                }
+            }
+        }
+        if !targets.is_empty() {
+            let payload_clone = payload.clone();
+            tokio::spawn(async move {
+                // Send to up to 10 concurrently, then batch the rest
+                use tokio::task::JoinSet;
+                let mut set = JoinSet::new();
+                for conn in targets {
+                    let p = payload_clone.clone();
+                    set.spawn(async move {
                        if let Ok(mut send) = conn.open_uni().await {
-                            let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &payload_clone).await;
+                            let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &p).await;
                            let _ = send.finish();
                        }
                    });
+                    // Cap concurrency at 10
+                    if set.len() >= 10 {
+                        let _ = set.join_next().await;
+                    }
                }
-            }
+                while set.join_next().await.is_some() {}
+            });
        }
    }

@ -5958,6 +6071,8 @@ pub struct ConnHandle {
    http_capable: Arc<AtomicBool>,
    /// External HTTP address if known (set once at startup)
    http_addr: Arc<std::sync::Mutex<Option<String>>>,
+    /// CDN device role (set once at startup by Network)
+    device_role_val: Arc<std::sync::Mutex<Option<crate::types::DeviceRole>>>,
 }

 impl ConnHandle {
@ -5967,6 +6082,7 @@ impl ConnHandle {
            tx,
            http_capable: Arc::new(AtomicBool::new(false)),
            http_addr: Arc::new(std::sync::Mutex::new(None)),
+            device_role_val: Arc::new(std::sync::Mutex::new(None)),
        }
    }

@ -5976,6 +6092,16 @@ impl ConnHandle {
        *self.http_addr.lock().unwrap() = addr;
    }

+    /// Set CDN device role (called once at Network startup).
+    pub fn set_device_role(&self, role: crate::types::DeviceRole) {
+        *self.device_role_val.lock().unwrap() = Some(role);
+    }
+
+    /// Get CDN device role, if set.
+    pub fn device_role(&self) -> Option<crate::types::DeviceRole> {
+        *self.device_role_val.lock().unwrap()
+    }
+
    /// Whether this node is HTTP-capable.
    pub fn is_http_capable(&self) -> bool {
        self.http_capable.load(Ordering::Relaxed)
@ -6944,6 +7070,8 @@ pub async fn initial_exchange_connect(
    our_nat_type: crate::types::NatType,
    our_http_capable: bool,
    our_http_addr: Option<String>,
+    our_device_role: Option<crate::types::DeviceRole>,
+    our_cache_pressure: Option<u8>,
 ) -> anyhow::Result<ExchangeResult> {
    let our_payload = {
        let storage = storage.lock().await;
@ -6967,6 +7095,8 @@ pub async fn initial_exchange_connect(
            nat_filtering: Some(crate::types::NatProfile::from_nat_type(our_nat_type).filtering.to_string()),
            http_capable: our_http_capable,
            http_addr: our_http_addr,
+            device_role: our_device_role.map(|r| r.as_str().to_string()),
+            cache_pressure: our_cache_pressure,
        }
    };

@ -7011,6 +7141,8 @@ pub async fn initial_exchange_accept(
    our_nat_type: crate::types::NatType,
    our_http_capable: bool,
    our_http_addr: Option<String>,
+    our_device_role: Option<crate::types::DeviceRole>,
+    our_cache_pressure: Option<u8>,
 ) -> anyhow::Result<()> {
    let their_payload: InitialExchangePayload = read_payload(&mut recv, MAX_PAYLOAD).await?;

@ -7036,6 +7168,8 @@ pub async fn initial_exchange_accept(
            nat_filtering: Some(crate::types::NatProfile::from_nat_type(our_nat_type).filtering.to_string()),
            http_capable: our_http_capable,
            http_addr: our_http_addr,
+            device_role: our_device_role.map(|r| r.as_str().to_string()),
+            cache_pressure: our_cache_pressure,
        }
    };

@ -7137,6 +7271,21 @@ async fn process_exchange_payload(
        debug!(peer = hex::encode(remote_node_id), http_addr = ?payload.http_addr, "Stored peer HTTP capability");
    }

+    // Store peer's CDN device role and cache pressure
+    if payload.device_role.is_some() || payload.cache_pressure.is_some() {
+        let _ = storage.set_peer_device_role(
+            remote_node_id,
+            payload.device_role.as_deref(),
+            payload.cache_pressure,
+        );
+        debug!(
+            peer = hex::encode(remote_node_id),
+            role = ?payload.device_role,
+            pressure = ?payload.cache_pressure,
+            "Stored peer CDN role"
+        );
+    }
+
    Ok(())
 }