v0.3.6: Active CDN replication, device roles, budgets, tombstones, engagement fix, DOS hardening

Active CDN replication:
- All devices proactively replicate recent posts (<72h, <2 replicas) to peers
- Target priority: desktops (300) > anchors (200) > phones (100) + cache_pressure
- ReplicationRequest/Response (0xE1/0xE2) wire messages
- 10-min cycle, 2-min initial delay, cap 20 posts per request
- Graceful with small networks (1 peer = 1 replica, 0 peers = silent skip)

Device roles & budgets:
- Intermittent (phone), Available (desktop), Persistent (anchor)
- Advertised in InitialExchange, stored per-peer
- Replication budget: phones 100MB/hr, desktops/anchors 200MB/hr
- Delivery budget: phones 1GB/hr, desktops 2GB/hr, anchors 1GB/hr
- Hourly auto-reset, enforcement on blob serving

Cache management:
- 1GB default cache limit, configurable in settings UI
- Eviction cycle activated (was implemented but never started)
- Share-link priority boost (+100 for 3+ downstream)
- Cache pressure score (0-255) for replication targeting

Engagement distribution fix:
- BlobHeader JSON rebuilt after BlobHeaderDiff ops
- Previously reactions/comments stored in tables but header stayed stale

Tombstone system:
- deleted_at column on reactions and comments
- Tombstones propagate through pull sync (additive merge respects timestamps)
- UI queries filter WHERE deleted_at IS NULL

Persistent notifications:
- seen_engagement and seen_messages tables replace in-memory Sets
- Only notify on genuinely unseen content, survives restarts

DOS hardening:
- BlobHeaderDiff fan-out: single batched task, max 10 concurrent via JoinSet
- Blob prefetch: cap 20 per cycle, newest first
- PostDownstreamRegister: cap 50 per sync
- Delivery budget enforcement on BlobRequest handler
- Pull preference: non-anchors first to preserve anchor delivery budget

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Scott Reimers 2026-03-20 21:00:28 -04:00
parent b7f2d369fa
commit a7e632de88
16 changed files with 1254 additions and 158 deletions

View file

@ -22,7 +22,8 @@ use crate::protocol::{
ProfileUpdatePayload, PullSyncRequestPayload, PullSyncResponsePayload,
RefuseRedirectPayload, RelayIntroducePayload, RelayIntroduceResultPayload, SessionRelayPayload,
SocialAddressUpdatePayload, SocialCheckinPayload, SocialDisconnectNoticePayload,
SyncPost, VisibilityUpdatePayload, WormQueryPayload, WormResponsePayload, ALPN_V2,
SyncPost, VisibilityUpdatePayload, WormQueryPayload, WormResponsePayload,
ReplicationRequestPayload, ReplicationResponsePayload, ALPN_V2,
};
use crate::storage::Storage;
use crate::types::{
@ -1202,6 +1203,8 @@ impl ConnectionManager {
nat_filtering: Some(our_profile.filtering.to_string()),
http_capable: self.http_capable,
http_addr: self.http_addr.clone(),
device_role: None,
cache_pressure: None,
}
};
@ -1339,6 +1342,8 @@ impl ConnectionManager {
nat_filtering: Some(our_profile.filtering.to_string()),
http_capable: self.http_capable,
http_addr: self.http_addr.clone(),
device_role: None,
cache_pressure: None,
}
};
@ -1603,11 +1608,11 @@ impl ConnectionManager {
}
drop(storage);
// Register as downstream for new posts
// Register as downstream for new posts (cap at 50 to avoid flooding)
if !new_post_ids.is_empty() {
let reg_conn = pull_conn.clone();
tokio::spawn(async move {
for post_id in new_post_ids {
for post_id in new_post_ids.into_iter().take(50) {
let payload = PostDownstreamRegisterPayload { post_id };
if let Ok(mut send) = reg_conn.open_uni().await {
let _ = write_typed_message(&mut send, MessageType::PostDownstreamRegister, &payload).await;
@ -1683,12 +1688,11 @@ impl ConnectionManager {
}
}
// Register as downstream with the sender for new posts
// so they push engagement diffs (reactions, comments) to us
// Register as downstream with the sender for new posts (cap at 50 to avoid flooding)
if !new_post_ids.is_empty() {
let conn = pc.connection.clone();
tokio::spawn(async move {
for post_id in new_post_ids {
for post_id in new_post_ids.into_iter().take(50) {
let payload = PostDownstreamRegisterPayload { post_id };
if let Ok(mut send) = conn.open_uni().await {
let _ = write_typed_message(&mut send, MessageType::PostDownstreamRegister, &payload).await;
@ -1761,7 +1765,9 @@ impl ConnectionManager {
json,
header.updated_at,
);
// Apply individual reactions and comments
// Apply individual reactions and comments.
// store_reaction / store_comment are tombstone-aware:
// they compare timestamps and respect deleted_at fields.
for reaction in &header.reactions {
let _ = storage.store_reaction(reaction);
}
@ -3737,7 +3743,7 @@ impl ConnectionManager {
if let Some(session) = cm.sessions.get(&requester) {
let session_conn = session.connection.clone();
drop(cm); // release lock before async work
match initial_exchange_connect(&storage_clone, &our_node_id, &session_conn, requester, None, our_nat_type, our_http_capable, our_http_addr.clone()).await {
match initial_exchange_connect(&storage_clone, &our_node_id, &session_conn, requester, None, our_nat_type, our_http_capable, our_http_addr.clone(), None, None).await {
Ok(ExchangeResult::Accepted) => {
tracing::info!(peer = hex::encode(requester), "Target-side: initial exchange after hole punch");
}
@ -5061,7 +5067,7 @@ impl ConnectionManager {
let cm = conn_mgr.lock().await;
(cm.storage_ref(), *cm.our_node_id(), cm.build_anchor_advertised_addr(), cm.nat_type(), cm.http_capable, cm.http_addr.clone())
};
initial_exchange_accept(&storage, &our_node_id, send, recv, remote_node_id, anchor_addr, None, our_nat_type, our_http_capable, our_http_addr)
initial_exchange_accept(&storage, &our_node_id, send, recv, remote_node_id, anchor_addr, None, our_nat_type, our_http_capable, our_http_addr, None, None)
.await?;
}
MessageType::AddressRequest => {
@ -5193,6 +5199,23 @@ impl ConnectionManager {
let data = cm.blob_store.get(&payload.cid)?;
let response = match data {
Some(bytes) => {
// Check delivery budget before serving
if !cm.blob_store.consume_delivery_budget(bytes.len() as u64) {
debug!(
peer = hex::encode(remote_node_id),
cid = hex::encode(payload.cid),
blob_size = bytes.len(),
"Delivery budget exhausted, declining blob request"
);
BlobResponsePayload {
cid: payload.cid,
found: false,
data_b64: String::new(),
manifest: None,
cdn_registered: false,
cdn_redirect_peers: vec![],
}
} else {
use base64::Engine;
// Load manifest if available, wrap in CdnManifest
@ -5252,6 +5275,7 @@ impl ConnectionManager {
cdn_registered,
cdn_redirect_peers,
}
} // end delivery budget else
}
None => BlobResponsePayload {
cid: payload.cid,
@ -5430,6 +5454,58 @@ impl ConnectionManager {
};
write_typed_message(&mut send, MessageType::BlobHeaderResponse, &response).await?;
}
MessageType::ReplicationRequest => {
let payload: ReplicationRequestPayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
let (accepted, rejected, needs_pull) = {
let cm = conn_mgr.lock().await;
let storage = cm.storage.lock().await;
let mut acc = Vec::new();
let mut rej = Vec::new();
let mut to_pull = Vec::new();
// Estimate ~1 MB per post with blobs for budget tracking
let est_bytes_per_post: u64 = 1024 * 1024;
let mut budget_used: u64 = 0;
let budget_cap: u64 = 20 * est_bytes_per_post; // cap per request
for pid in &payload.post_ids {
// Already have it — accept for free
if storage.get_post(pid).ok().flatten().is_some() {
acc.push(*pid);
continue;
}
// Check budget before accepting a post we need to pull
if budget_used + est_bytes_per_post > budget_cap {
rej.push(*pid);
continue;
}
budget_used += est_bytes_per_post;
acc.push(*pid);
to_pull.push(*pid);
}
// Register as downstream for all accepted posts
for pid in &acc {
let _ = storage.add_post_downstream(pid, &remote_node_id);
}
(acc, rej, to_pull)
};
let response = ReplicationResponsePayload { accepted: accepted.clone(), rejected };
write_typed_message(&mut send, MessageType::ReplicationResponse, &response).await?;
send.finish()?;
let accepted_count = accepted.len();
let needs_pull_count = needs_pull.len();
debug!(
peer = hex::encode(remote_node_id),
accepted = accepted_count,
rejected = response.rejected.len(),
needs_pull = needs_pull_count,
"Handled replication request"
);
// Posts we accepted but don't have will be fetched on the next pull cycle
// from the requester (they have these posts since they asked us to hold them).
// No explicit pull spawn needed — the periodic pull cycle handles it.
}
other => {
warn!(msg_type = ?other, "Unexpected message type on bi-stream");
}
@ -5575,41 +5651,78 @@ impl ConnectionManager {
BlobHeaderDiffOp::Unknown => {} // future ops — silently skip
}
}
// Rebuild blob header JSON from current DB state so pull-based sync gets fresh data.
// Use _with_tombstones so tombstones propagate through the pull path.
let reactions = storage.get_reactions_with_tombstones(&payload.post_id).unwrap_or_default();
let comments = storage.get_comments_with_tombstones(&payload.post_id).unwrap_or_default();
let policy = storage.get_comment_policy(&payload.post_id).ok().flatten().unwrap_or_default();
let (existing_header_json, _) = storage.get_blob_header(&payload.post_id)
.ok()
.flatten()
.unwrap_or((String::new(), 0));
let mut header: crate::types::BlobHeader = serde_json::from_str(&existing_header_json).unwrap_or_else(|_| {
crate::types::BlobHeader {
post_id: payload.post_id,
author: payload.author,
reactions: vec![],
comments: vec![],
policy: crate::types::CommentPolicy::default(),
updated_at: 0,
thread_splits: vec![],
receipt_slots: vec![],
comment_slots: vec![],
}
});
header.reactions = reactions;
header.comments = comments;
header.policy = policy;
header.updated_at = payload.timestamp_ms;
if let Ok(json) = serde_json::to_string(&header) {
let _ = storage.store_blob_header(&payload.post_id, &payload.author, &json, payload.timestamp_ms);
}
}
// Collect all targets (downstream + upstream), then send in a single batched task
let mut targets: Vec<iroh::endpoint::Connection> = Vec::new();
for peer_id in downstream {
if peer_id == sender {
continue;
}
// Try mesh connection first, then session
let conn = self.connections.get(&peer_id).map(|mc| mc.connection.clone())
.or_else(|| self.sessions.get(&peer_id).map(|sc| sc.connection.clone()));
if let Some(conn) = conn {
let payload_clone = payload.clone();
tokio::spawn(async move {
if let Ok(mut send) = conn.open_uni().await {
let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &payload_clone).await;
let _ = send.finish();
}
});
if peer_id == sender { continue; }
if let Some(conn) = self.connections.get(&peer_id).map(|mc| mc.connection.clone())
.or_else(|| self.sessions.get(&peer_id).map(|sc| sc.connection.clone()))
{
targets.push(conn);
}
}
// Also propagate upstream (toward the author)
if let Some(up) = upstream {
if up != sender {
let conn = self.connections.get(&up).map(|mc| mc.connection.clone())
.or_else(|| self.sessions.get(&up).map(|sc| sc.connection.clone()));
if let Some(conn) = conn {
let payload_clone = payload.clone();
tokio::spawn(async move {
if let Some(conn) = self.connections.get(&up).map(|mc| mc.connection.clone())
.or_else(|| self.sessions.get(&up).map(|sc| sc.connection.clone()))
{
targets.push(conn);
}
}
}
if !targets.is_empty() {
let payload_clone = payload.clone();
tokio::spawn(async move {
// Send to up to 10 concurrently, then batch the rest
use tokio::task::JoinSet;
let mut set = JoinSet::new();
for conn in targets {
let p = payload_clone.clone();
set.spawn(async move {
if let Ok(mut send) = conn.open_uni().await {
let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &payload_clone).await;
let _ = write_typed_message(&mut send, MessageType::BlobHeaderDiff, &p).await;
let _ = send.finish();
}
});
// Cap concurrency at 10
if set.len() >= 10 {
let _ = set.join_next().await;
}
}
}
while set.join_next().await.is_some() {}
});
}
}
@ -5958,6 +6071,8 @@ pub struct ConnHandle {
http_capable: Arc<AtomicBool>,
/// External HTTP address if known (set once at startup)
http_addr: Arc<std::sync::Mutex<Option<String>>>,
/// CDN device role (set once at startup by Network)
device_role_val: Arc<std::sync::Mutex<Option<crate::types::DeviceRole>>>,
}
impl ConnHandle {
@ -5967,6 +6082,7 @@ impl ConnHandle {
tx,
http_capable: Arc::new(AtomicBool::new(false)),
http_addr: Arc::new(std::sync::Mutex::new(None)),
device_role_val: Arc::new(std::sync::Mutex::new(None)),
}
}
@ -5976,6 +6092,16 @@ impl ConnHandle {
*self.http_addr.lock().unwrap() = addr;
}
/// Set CDN device role (called once at Network startup).
pub fn set_device_role(&self, role: crate::types::DeviceRole) {
*self.device_role_val.lock().unwrap() = Some(role);
}
/// Get CDN device role, if set.
pub fn device_role(&self) -> Option<crate::types::DeviceRole> {
*self.device_role_val.lock().unwrap()
}
/// Whether this node is HTTP-capable.
pub fn is_http_capable(&self) -> bool {
self.http_capable.load(Ordering::Relaxed)
@ -6944,6 +7070,8 @@ pub async fn initial_exchange_connect(
our_nat_type: crate::types::NatType,
our_http_capable: bool,
our_http_addr: Option<String>,
our_device_role: Option<crate::types::DeviceRole>,
our_cache_pressure: Option<u8>,
) -> anyhow::Result<ExchangeResult> {
let our_payload = {
let storage = storage.lock().await;
@ -6967,6 +7095,8 @@ pub async fn initial_exchange_connect(
nat_filtering: Some(crate::types::NatProfile::from_nat_type(our_nat_type).filtering.to_string()),
http_capable: our_http_capable,
http_addr: our_http_addr,
device_role: our_device_role.map(|r| r.as_str().to_string()),
cache_pressure: our_cache_pressure,
}
};
@ -7011,6 +7141,8 @@ pub async fn initial_exchange_accept(
our_nat_type: crate::types::NatType,
our_http_capable: bool,
our_http_addr: Option<String>,
our_device_role: Option<crate::types::DeviceRole>,
our_cache_pressure: Option<u8>,
) -> anyhow::Result<()> {
let their_payload: InitialExchangePayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
@ -7036,6 +7168,8 @@ pub async fn initial_exchange_accept(
nat_filtering: Some(crate::types::NatProfile::from_nat_type(our_nat_type).filtering.to_string()),
http_capable: our_http_capable,
http_addr: our_http_addr,
device_role: our_device_role.map(|r| r.as_str().to_string()),
cache_pressure: our_cache_pressure,
}
};
@ -7137,6 +7271,21 @@ async fn process_exchange_payload(
debug!(peer = hex::encode(remote_node_id), http_addr = ?payload.http_addr, "Stored peer HTTP capability");
}
// Store peer's CDN device role and cache pressure
if payload.device_role.is_some() || payload.cache_pressure.is_some() {
let _ = storage.set_peer_device_role(
remote_node_id,
payload.device_role.as_deref(),
payload.cache_pressure,
);
debug!(
peer = hex::encode(remote_node_id),
role = ?payload.device_role,
pressure = ?payload.cache_pressure,
"Stored peer CDN role"
);
}
Ok(())
}