v0.4.0: Protocol v4 — header-driven sync, tiered engagement, multi-upstream

Protocol v4 sync overhaul:
- Slim PullSyncRequest: per-author timestamps (since_ms) replace full post ID lists
  Request size O(follows) instead of O(posts). Backward-compatible via serde default.
- Tiered pull frequency: 60s ticks, only syncs stale authors (4hr default)
  Full pull only on first tick (bootstrap). Most ticks skip — no stale authors.
- Tiered engagement checks: frequency scales with content age
  5min (<72h), 1hr (3-14d), 4hr (14-30d), 24hr (>30d)
  Single SQL query filters posts due for check.
- Header-driven post discovery: ManifestPush triggers PostFetch for missing
  followed-author posts (capped 10 per manifest). CDN tree = notification system.
- Multi-upstream (3 max): composite PK, priority ordering, engagement diffs
  sent to all upstreams, promote/remove on failure.

DB schema:
- follows.last_sync_ms — Self Last Encounter per author
- posts.last_engagement_ms — last reaction/comment timestamp
- posts.last_check_ms — last engagement check timestamp
- post_upstream: single-row → 3-row with priority column

Lock contention fixes:
- get_blob_for_post: 3 locks → 1
- prefetch_blobs_from_peer: lock-free blob checks
- fetch_engagement_from_peer: explicit lock release before I/O
- serve_post: 4 locks → 2 (eliminated redundant queries)
- run_replication_check: 2 locks → 1
- Badge cycle: N+2 IPC calls → 1 (get_badge_counts)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Scott Reimers 2026-03-21 16:13:45 -04:00
parent 1df00eebf8
commit bbaacf9b6c
10 changed files with 489 additions and 100 deletions

View file

@ -606,6 +606,47 @@ impl Storage {
)?;
}
// Protocol v4: Add last_sync_ms to follows if missing
let has_last_sync = self.conn.prepare(
"SELECT COUNT(*) FROM pragma_table_info('follows') WHERE name='last_sync_ms'"
)?.query_row([], |row| row.get::<_, i64>(0))?;
if has_last_sync == 0 {
self.conn.execute_batch(
"ALTER TABLE follows ADD COLUMN last_sync_ms INTEGER NOT NULL DEFAULT 0;"
)?;
}
// Protocol v4: Add last_engagement_ms and last_check_ms to posts if missing
let has_last_engagement = self.conn.prepare(
"SELECT COUNT(*) FROM pragma_table_info('posts') WHERE name='last_engagement_ms'"
)?.query_row([], |row| row.get::<_, i64>(0))?;
if has_last_engagement == 0 {
self.conn.execute_batch(
"ALTER TABLE posts ADD COLUMN last_engagement_ms INTEGER NOT NULL DEFAULT 0;
ALTER TABLE posts ADD COLUMN last_check_ms INTEGER NOT NULL DEFAULT 0;"
)?;
}
// Protocol v4 Phase 6: Migrate post_upstream to multi-upstream (3 max)
let has_priority = self.conn.prepare(
"SELECT COUNT(*) FROM pragma_table_info('post_upstream') WHERE name='priority'"
)?.query_row([], |row| row.get::<_, i64>(0))?;
if has_priority == 0 {
self.conn.execute_batch(
"ALTER TABLE post_upstream RENAME TO post_upstream_old;
CREATE TABLE post_upstream (
post_id BLOB NOT NULL,
peer_node_id BLOB NOT NULL,
priority INTEGER NOT NULL DEFAULT 0,
registered_at INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (post_id, peer_node_id)
);
INSERT INTO post_upstream (post_id, peer_node_id, priority, registered_at)
SELECT post_id, peer_node_id, 0, 0 FROM post_upstream_old;
DROP TABLE post_upstream_old;"
)?;
}
Ok(())
}
@ -880,6 +921,104 @@ impl Storage {
Ok(ids)
}
// ---- Protocol v4: Per-Author Sync Tracking ----
/// Update the last_sync_ms timestamp for a followed author.
pub fn update_follow_last_sync(&self, node_id: &NodeId, timestamp_ms: u64) -> anyhow::Result<()> {
self.conn.execute(
"UPDATE follows SET last_sync_ms = ?2 WHERE node_id = ?1",
params![node_id.as_slice(), timestamp_ms as i64],
)?;
Ok(())
}
/// Get all follows with their last_sync_ms timestamps.
pub fn get_follows_with_last_sync(&self) -> anyhow::Result<Vec<(NodeId, u64)>> {
let mut stmt = self.conn.prepare("SELECT node_id, last_sync_ms FROM follows")?;
let rows = stmt.query_map([], |row| {
let bytes: Vec<u8> = row.get(0)?;
let ts: i64 = row.get(1)?;
Ok((bytes, ts))
})?;
let mut result = Vec::new();
for row in rows {
let (bytes, ts) = row?;
result.push((blob_to_nodeid(bytes)?, ts as u64));
}
Ok(result)
}
/// Get follows whose last_sync_ms is older than max_age_ms from now.
pub fn get_stale_follows(&self, max_age_ms: u64) -> anyhow::Result<Vec<NodeId>> {
let now = now_ms() as u64;
let cutoff = now.saturating_sub(max_age_ms) as i64;
let mut stmt = self.conn.prepare(
"SELECT node_id FROM follows WHERE last_sync_ms < ?1"
)?;
let rows = stmt.query_map(params![cutoff], |row| {
let bytes: Vec<u8> = row.get(0)?;
Ok(bytes)
})?;
let mut ids = Vec::new();
for row in rows {
ids.push(blob_to_nodeid(row?)?);
}
Ok(ids)
}
/// Get posts due for engagement check using tiered frequency:
/// - Active (engagement within 72h): check every 5 min
/// - Recent (engagement within 14d): check every 1 hour
/// - Aging (engagement within 30d): check every 4 hours
/// - Cold (older): check every 24 hours
pub fn get_posts_due_for_engagement_check(&self) -> anyhow::Result<Vec<PostId>> {
let now = now_ms() as u64;
let h72 = now.saturating_sub(72 * 3600 * 1000) as i64;
let d14 = now.saturating_sub(14 * 24 * 3600 * 1000) as i64;
let d30 = now.saturating_sub(30 * 24 * 3600 * 1000) as i64;
let now_i64 = now as i64;
let mut stmt = self.conn.prepare(
"SELECT id FROM posts WHERE last_check_ms < ?1 - CASE
WHEN last_engagement_ms > ?2 THEN 300000
WHEN last_engagement_ms > ?3 THEN 3600000
WHEN last_engagement_ms > ?4 THEN 14400000
ELSE 86400000
END"
)?;
let rows = stmt.query_map(params![now_i64, h72, d14, d30], |row| {
let bytes: Vec<u8> = row.get(0)?;
Ok(bytes)
})?;
let mut ids = Vec::new();
for row in rows {
let bytes = row?;
if bytes.len() == 32 {
let mut id = [0u8; 32];
id.copy_from_slice(&bytes);
ids.push(id);
}
}
Ok(ids)
}
/// Update the last_check_ms timestamp for a post.
pub fn update_post_last_check(&self, post_id: &PostId, timestamp_ms: u64) -> anyhow::Result<()> {
self.conn.execute(
"UPDATE posts SET last_check_ms = ?2 WHERE id = ?1",
params![post_id.as_slice(), timestamp_ms as i64],
)?;
Ok(())
}
/// Update the last_engagement_ms timestamp for a post.
pub fn update_post_last_engagement(&self, post_id: &PostId, timestamp_ms: u64) -> anyhow::Result<()> {
self.conn.execute(
"UPDATE posts SET last_engagement_ms = ?2 WHERE id = ?1",
params![post_id.as_slice(), timestamp_ms as i64],
)?;
Ok(())
}
// ---- Peers ----
/// Add or update a peer (backward-compat: no addresses)
@ -3976,30 +4115,75 @@ impl Storage {
Ok(())
}
// --- Engagement: post_upstream ---
// --- Engagement: post_upstream (multi-upstream, 3 max) ---
/// Set the upstream peer for a post (who we got it from).
pub fn set_post_upstream(&self, post_id: &PostId, peer_node_id: &NodeId) -> anyhow::Result<()> {
/// Add an upstream peer for a post. INSERT OR IGNORE, cap at 3 per post.
pub fn add_post_upstream(&self, post_id: &PostId, peer_node_id: &NodeId, priority: u8) -> anyhow::Result<()> {
// Check current count
let count: i64 = self.conn.prepare(
"SELECT COUNT(*) FROM post_upstream WHERE post_id = ?1"
)?.query_row(params![post_id.as_slice()], |row| row.get(0))?;
if count >= 3 {
return Ok(()); // Already at cap
}
let now = now_ms();
self.conn.execute(
"INSERT INTO post_upstream (post_id, peer_node_id) VALUES (?1, ?2)
ON CONFLICT(post_id) DO UPDATE SET peer_node_id = excluded.peer_node_id",
"INSERT OR IGNORE INTO post_upstream (post_id, peer_node_id, priority, registered_at)
VALUES (?1, ?2, ?3, ?4)",
params![post_id.as_slice(), peer_node_id.as_slice(), priority as i64, now],
)?;
Ok(())
}
/// Get all upstream peers for a post, ordered by priority ASC (0 = primary).
pub fn get_post_upstreams(&self, post_id: &PostId) -> anyhow::Result<Vec<(NodeId, u8)>> {
let mut stmt = self.conn.prepare(
"SELECT peer_node_id, priority FROM post_upstream WHERE post_id = ?1 ORDER BY priority ASC"
)?;
let rows = stmt.query_map(params![post_id.as_slice()], |row| {
let bytes: Vec<u8> = row.get(0)?;
let prio: i64 = row.get(1)?;
Ok((bytes, prio as u8))
})?;
let mut result = Vec::new();
for row in rows {
let (bytes, prio) = row?;
if let Ok(nid) = <[u8; 32]>::try_from(bytes.as_slice()) {
result.push((nid, prio));
}
}
Ok(result)
}
/// Get the primary (lowest priority) upstream peer for a post.
/// Backward-compatible wrapper for code that only needs a single upstream.
pub fn get_post_upstream(&self, post_id: &PostId) -> anyhow::Result<Option<NodeId>> {
let upstreams = self.get_post_upstreams(post_id)?;
Ok(upstreams.into_iter().next().map(|(nid, _)| nid))
}
/// Remove a specific upstream peer for a post.
pub fn remove_post_upstream(&self, post_id: &PostId, peer_node_id: &NodeId) -> anyhow::Result<()> {
self.conn.execute(
"DELETE FROM post_upstream WHERE post_id = ?1 AND peer_node_id = ?2",
params![post_id.as_slice(), peer_node_id.as_slice()],
)?;
Ok(())
}
/// Get the upstream peer for a post.
pub fn get_post_upstream(&self, post_id: &PostId) -> anyhow::Result<Option<NodeId>> {
let result = self.conn.query_row(
"SELECT peer_node_id FROM post_upstream WHERE post_id = ?1",
/// Promote an upstream peer to primary (priority 0), pushing others up.
pub fn promote_post_upstream(&self, post_id: &PostId, peer_node_id: &NodeId) -> anyhow::Result<()> {
// Shift all priorities up by 1
self.conn.execute(
"UPDATE post_upstream SET priority = priority + 1 WHERE post_id = ?1",
params![post_id.as_slice()],
|row| row.get::<_, Vec<u8>>(0),
);
match result {
Ok(bytes) => Ok(bytes.try_into().ok()),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
)?;
// Set the promoted peer to priority 0
self.conn.execute(
"UPDATE post_upstream SET priority = 0 WHERE post_id = ?1 AND peer_node_id = ?2",
params![post_id.as_slice(), peer_node_id.as_slice()],
)?;
Ok(())
}
/// Count downstream peers for a post.