v0.3.6: Active CDN replication, device roles, budgets, tombstones, engagement fix, DOS hardening

Active CDN replication:
- All devices proactively replicate recent posts (<72h, <2 replicas) to peers
- Target priority: desktops (300) > anchors (200) > phones (100) + cache_pressure
- ReplicationRequest/Response (0xE1/0xE2) wire messages
- 10-min cycle, 2-min initial delay, cap 20 posts per request
- Graceful with small networks (1 peer = 1 replica, 0 peers = silent skip)

Device roles & budgets:
- Intermittent (phone), Available (desktop), Persistent (anchor)
- Advertised in InitialExchange, stored per-peer
- Replication budget: phones 100MB/hr, desktops/anchors 200MB/hr
- Delivery budget: phones 1GB/hr, desktops 2GB/hr, anchors 1GB/hr
- Hourly auto-reset, enforcement on blob serving

Cache management:
- 1GB default cache limit, configurable in settings UI
- Eviction cycle activated (was implemented but never started)
- Share-link priority boost (+100 for 3+ downstream)
- Cache pressure score (0-255) for replication targeting

Engagement distribution fix:
- BlobHeader JSON rebuilt after BlobHeaderDiff ops
- Previously reactions/comments stored in tables but header stayed stale

Tombstone system:
- deleted_at column on reactions and comments
- Tombstones propagate through pull sync (additive merge respects timestamps)
- UI queries filter WHERE deleted_at IS NULL

Persistent notifications:
- seen_engagement and seen_messages tables replace in-memory Sets
- Only notify on genuinely unseen content, survives restarts

DOS hardening:
- BlobHeaderDiff fan-out: single batched task, max 10 concurrent via JoinSet
- Blob prefetch: cap 20 per cycle, newest first
- PostDownstreamRegister: cap 50 per sync
- Delivery budget enforcement on BlobRequest handler
- Pull preference: non-anchors first to preserve anchor delivery budget

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Scott Reimers 2026-03-20 21:00:28 -04:00
parent b7f2d369fa
commit a7e632de88
16 changed files with 1254 additions and 158 deletions

View file

@ -20,7 +20,7 @@ use crate::protocol::{
};
use crate::storage::Storage;
use crate::types::{
DeleteRecord, DeviceProfile, NodeId, PeerSlotKind, PeerWithAddress, Post, PostId,
DeleteRecord, DeviceProfile, DeviceRole, NodeId, PeerSlotKind, PeerWithAddress, Post, PostId,
PostVisibility, PublicProfile, SessionReachMethod, WormResult,
};
@ -44,6 +44,8 @@ pub struct Network {
has_public_v6: bool,
/// Stable bind address (from --bind flag), passed to ConnectionManager for anchor advertised address
bind_addr: Option<SocketAddr>,
/// CDN replication role: determines budget limits and pull ordering
device_role: DeviceRole,
}
fn is_public_ip(ip: IpAddr) -> bool {
@ -233,6 +235,17 @@ impl Network {
"Network started (v2)"
);
// Determine CDN replication role from device characteristics
let device_role = if is_mobile {
DeviceRole::Intermittent
} else if is_anchor.load(Ordering::Relaxed) {
DeviceRole::Persistent
} else {
DeviceRole::Available
};
info!(role = %device_role, "CDN replication role determined");
conn_handle.set_device_role(device_role);
Ok(Self {
endpoint,
storage,
@ -246,6 +259,7 @@ impl Network {
has_upnp_tcp,
has_public_v6,
bind_addr,
device_role,
})
}
@ -319,6 +333,11 @@ impl Network {
self.is_anchor.load(Ordering::Relaxed)
}
/// Get the CDN replication device role.
pub fn device_role(&self) -> DeviceRole {
self.device_role
}
/// Whether this node can serve HTTP (has TCP reachability).
pub fn is_http_capable(&self) -> bool {
self.has_upnp_tcp || self.has_public_v6 || self.bind_addr.is_some()
@ -626,7 +645,7 @@ impl Network {
let our_nat_type = conn_handle.nat_type().await;
let our_http_capable = conn_handle.is_http_capable();
let our_http_addr = conn_handle.http_addr();
match initial_exchange_accept(storage, &our_node_id, send, recv, remote_node_id, anchor_addr, Some(remote_sock), our_nat_type, our_http_capable, our_http_addr).await {
match initial_exchange_accept(storage, &our_node_id, send, recv, remote_node_id, anchor_addr, Some(remote_sock), our_nat_type, our_http_capable, our_http_addr, conn_handle.device_role(), None).await {
Ok(()) => {
info!(peer = hex::encode(remote_node_id), "Initial exchange complete (upgraded to mesh)");
conn_handle.log_activity(ActivityLevel::Info, ActivityCategory::Connection, format!("Upgraded {} to mesh", &hex::encode(remote_node_id)[..8]), Some(remote_node_id));
@ -676,7 +695,7 @@ impl Network {
let our_nat_type = self.conn_handle.nat_type().await;
// Initial exchange WITHOUT holding conn_mgr lock
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, peer_id, anchor_addr, our_nat_type, self.is_http_capable(), self.http_addr()).await? {
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, peer_id, anchor_addr, our_nat_type, self.is_http_capable(), self.http_addr(), Some(self.device_role), None).await? {
ExchangeResult::Accepted => {
// Spawn the per-connection stream loop
let conn_data = self.conn_handle.get_connection_map().await;
@ -1318,7 +1337,7 @@ impl Network {
let anchor_addr = self.conn_handle.build_anchor_advertised_addr().await;
let our_nat_type = self.conn_handle.nat_type().await;
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, peer_id, anchor_addr, our_nat_type, self.is_http_capable(), self.http_addr()).await {
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, peer_id, anchor_addr, our_nat_type, self.is_http_capable(), self.http_addr(), Some(self.device_role), None).await {
Ok(ExchangeResult::Accepted) => {
self.conn_handle.register_connection(peer_id, conn.clone(), vec![], PeerSlotKind::Local).await;
{
@ -1423,7 +1442,7 @@ impl Network {
for peer_id in &newly_connected {
let conn = self.conn_handle.get_connection(peer_id).await;
if let Some(conn) = conn {
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, *peer_id, anchor_addr.clone(), our_nat_type, self.is_http_capable(), self.http_addr()).await {
match initial_exchange_connect(&self.storage, &self.our_node_id, &conn, *peer_id, anchor_addr.clone(), our_nat_type, self.is_http_capable(), self.http_addr(), Some(self.device_role), None).await {
Ok(ExchangeResult::Accepted) => {}
Ok(ExchangeResult::Refused { redirect }) => {
debug!(peer = hex::encode(peer_id), "Auto-connect refused, disconnecting");
@ -1789,6 +1808,31 @@ impl Network {
Ok(read_payload(&mut recv, 10 * 1024 * 1024).await?)
}
/// Send a replication request to a peer, asking them to hold specific posts.
/// Returns the list of post IDs the peer accepted. Times out after 10 seconds.
pub async fn send_replication_request(
&self,
peer_id: &NodeId,
post_ids: Vec<PostId>,
priority: u8,
) -> anyhow::Result<Vec<PostId>> {
use crate::protocol::{ReplicationRequestPayload, ReplicationResponsePayload};
let payload = ReplicationRequestPayload { post_ids, priority };
let response: ReplicationResponsePayload = tokio::time::timeout(
std::time::Duration::from_secs(10),
self.send_to_peer_bi(
peer_id,
MessageType::ReplicationRequest,
&payload,
MessageType::ReplicationResponse,
),
)
.await
.map_err(|_| anyhow::anyhow!("replication request timed out"))??;
Ok(response.accepted)
}
/// Fetch a blob from a peer by CID.
/// Returns None if the peer doesn't have it.
/// Returns (data, response) so caller can handle manifest + CDN fields.