Fix 3 pre-existing lock contention issues in message handlers
- handle_pull_request: extracted to static method, conn_mgr lock no longer held during network I/O (read request + write response) - handle_address_request: inlined at call site with brief lock to gather data, response written after lock release - handle_session_relay: capacity check + target lookup under brief lock, rejection write moved outside lock scope Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
be253e8001
commit
4379b6fdfc
1 changed files with 80 additions and 110 deletions
|
|
@ -2173,8 +2173,10 @@ impl ConnectionManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle an incoming pull request from a peer.
|
/// Handle an incoming pull request from a peer.
|
||||||
pub async fn handle_pull_request(
|
/// Handle a pull sync request — no conn_mgr lock needed, only storage + our_node_id.
|
||||||
&self,
|
pub async fn handle_pull_request_unlocked(
|
||||||
|
storage: &StoragePool,
|
||||||
|
our_node_id: NodeId,
|
||||||
remote_node_id: NodeId,
|
remote_node_id: NodeId,
|
||||||
mut recv: iroh::endpoint::RecvStream,
|
mut recv: iroh::endpoint::RecvStream,
|
||||||
mut send: iroh::endpoint::SendStream,
|
mut send: iroh::endpoint::SendStream,
|
||||||
|
|
@ -2190,12 +2192,11 @@ impl ConnectionManager {
|
||||||
|
|
||||||
// Phase 1: Brief lock — load data
|
// Phase 1: Brief lock — load data
|
||||||
let (all_posts, group_members) = {
|
let (all_posts, group_members) = {
|
||||||
let storage = self.storage.get().await;
|
let s = storage.get().await;
|
||||||
let posts = storage.list_posts_with_visibility()?;
|
let posts = s.list_posts_with_visibility()?;
|
||||||
let members = storage.get_all_group_members().unwrap_or_default();
|
let members = s.get_all_group_members().unwrap_or_default();
|
||||||
(posts, members)
|
(posts, members)
|
||||||
};
|
};
|
||||||
// Lock RELEASED
|
|
||||||
|
|
||||||
// Phase 2: Filter without lock (pure CPU)
|
// Phase 2: Filter without lock (pure CPU)
|
||||||
let mut candidates_to_send = Vec::new();
|
let mut candidates_to_send = Vec::new();
|
||||||
|
|
@ -2209,27 +2210,23 @@ impl ConnectionManager {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine if peer already has this post
|
|
||||||
let peer_has_post = if use_since_ms {
|
let peer_has_post = if use_since_ms {
|
||||||
// v4 path: filter by per-author timestamp (60s fudge for clock skew)
|
|
||||||
if let Some(&since) = since_ms_map.get(&post.author) {
|
if let Some(&since) = since_ms_map.get(&post.author) {
|
||||||
post.timestamp_ms <= since + 60_000
|
post.timestamp_ms <= since + 60_000
|
||||||
} else {
|
} else {
|
||||||
false // no since_ms for this author — they want everything
|
false
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Legacy path: use have_post_ids
|
|
||||||
their_post_ids.contains(&id)
|
their_post_ids.contains(&id)
|
||||||
};
|
};
|
||||||
|
|
||||||
if !peer_has_post {
|
if !peer_has_post {
|
||||||
candidates_to_send.push((id, post, visibility));
|
candidates_to_send.push((id, post, visibility));
|
||||||
} else {
|
} else {
|
||||||
// They already have the post — send visibility update if we authored it
|
if post.author == our_node_id {
|
||||||
if post.author == self.our_node_id {
|
|
||||||
vis_updates_to_send.push(crate::types::VisibilityUpdate {
|
vis_updates_to_send.push(crate::types::VisibilityUpdate {
|
||||||
post_id: id,
|
post_id: id,
|
||||||
author: self.our_node_id,
|
author: our_node_id,
|
||||||
visibility,
|
visibility,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -2238,9 +2235,9 @@ impl ConnectionManager {
|
||||||
|
|
||||||
// Phase 3: Brief re-lock for is_deleted checks on filtered posts
|
// Phase 3: Brief re-lock for is_deleted checks on filtered posts
|
||||||
let (posts, vis_updates) = {
|
let (posts, vis_updates) = {
|
||||||
let storage = self.storage.get().await;
|
let s = storage.get().await;
|
||||||
let posts_to_send: Vec<SyncPost> = candidates_to_send.into_iter()
|
let posts_to_send: Vec<SyncPost> = candidates_to_send.into_iter()
|
||||||
.filter(|(id, _, _)| !storage.is_deleted(id).unwrap_or(false))
|
.filter(|(id, _, _)| !s.is_deleted(id).unwrap_or(false))
|
||||||
.map(|(id, post, visibility)| SyncPost { id, post, visibility })
|
.map(|(id, post, visibility)| SyncPost { id, post, visibility })
|
||||||
.collect();
|
.collect();
|
||||||
(posts_to_send, vis_updates_to_send)
|
(posts_to_send, vis_updates_to_send)
|
||||||
|
|
@ -2259,79 +2256,6 @@ impl ConnectionManager {
|
||||||
|
|
||||||
/// Handle an address resolution request: check connections, social routes, then peer records.
|
/// Handle an address resolution request: check connections, social routes, then peer records.
|
||||||
/// If target is disconnected, register the requester as a watcher.
|
/// If target is disconnected, register the requester as a watcher.
|
||||||
pub async fn handle_address_request(
|
|
||||||
&self,
|
|
||||||
mut recv: iroh::endpoint::RecvStream,
|
|
||||||
mut send: iroh::endpoint::SendStream,
|
|
||||||
requester: NodeId,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let req: crate::protocol::AddressRequestPayload =
|
|
||||||
read_payload(&mut recv, MAX_PAYLOAD).await?;
|
|
||||||
|
|
||||||
// Check if target is directly connected to us
|
|
||||||
if let Some(_pc) = self.connections.get(&req.target) {
|
|
||||||
let storage = self.storage.get().await;
|
|
||||||
let addr = storage.get_peer_record(&req.target)?
|
|
||||||
.and_then(|r| r.addresses.first().map(|a| a.to_string()));
|
|
||||||
let response = crate::protocol::AddressResponsePayload {
|
|
||||||
target: req.target,
|
|
||||||
address: addr,
|
|
||||||
disconnected_at: None,
|
|
||||||
peer_addresses: vec![],
|
|
||||||
};
|
|
||||||
write_typed_message(&mut send, MessageType::AddressResponse, &response).await?;
|
|
||||||
send.finish()?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let storage = self.storage.get().await;
|
|
||||||
|
|
||||||
// Check social routes (richer info)
|
|
||||||
if let Some(route) = storage.get_social_route(&req.target)? {
|
|
||||||
match route.status {
|
|
||||||
SocialStatus::Online => {
|
|
||||||
let addr = route.addresses.first().map(|a| a.to_string());
|
|
||||||
let response = crate::protocol::AddressResponsePayload {
|
|
||||||
target: req.target,
|
|
||||||
address: addr,
|
|
||||||
disconnected_at: None,
|
|
||||||
peer_addresses: route.peer_addresses.clone(),
|
|
||||||
};
|
|
||||||
write_typed_message(&mut send, MessageType::AddressResponse, &response).await?;
|
|
||||||
send.finish()?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
SocialStatus::Disconnected => {
|
|
||||||
let _ = storage.add_reconnect_watcher(&req.target, &requester);
|
|
||||||
let response = crate::protocol::AddressResponsePayload {
|
|
||||||
target: req.target,
|
|
||||||
address: None,
|
|
||||||
disconnected_at: Some(route.last_seen_ms),
|
|
||||||
peer_addresses: route.peer_addresses.clone(),
|
|
||||||
};
|
|
||||||
write_typed_message(&mut send, MessageType::AddressResponse, &response).await?;
|
|
||||||
send.finish()?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to peer record
|
|
||||||
let address = storage.get_peer_record(&req.target)?
|
|
||||||
.and_then(|r| r.addresses.first().map(|a| a.to_string()));
|
|
||||||
|
|
||||||
let response = crate::protocol::AddressResponsePayload {
|
|
||||||
target: req.target,
|
|
||||||
address,
|
|
||||||
disconnected_at: None,
|
|
||||||
peer_addresses: vec![],
|
|
||||||
};
|
|
||||||
write_typed_message(&mut send, MessageType::AddressResponse, &response).await?;
|
|
||||||
send.finish()?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Resolve a peer's address using connections, social routes, and N2/N3 referral chain.
|
/// Resolve a peer's address using connections, social routes, and N2/N3 referral chain.
|
||||||
pub async fn resolve_address(&self, target: &NodeId) -> anyhow::Result<Option<String>> {
|
pub async fn resolve_address(&self, target: &NodeId) -> anyhow::Result<Option<String>> {
|
||||||
// Check if target is directly connected
|
// Check if target is directly connected
|
||||||
|
|
@ -4289,10 +4213,15 @@ impl ConnectionManager {
|
||||||
let payload: SessionRelayPayload = read_payload(&mut requester_recv, 4096).await?;
|
let payload: SessionRelayPayload = read_payload(&mut requester_recv, 4096).await?;
|
||||||
|
|
||||||
// Check capacity and find target connection
|
// Check capacity and find target connection
|
||||||
let (target_conn, active_pipes) = {
|
let (can_accept, target_conn, active_pipes) = {
|
||||||
let cm = conn_mgr.lock().await;
|
let cm = conn_mgr.lock().await;
|
||||||
if !cm.can_accept_relay_pipe() {
|
let can = cm.can_accept_relay_pipe();
|
||||||
// Reject — at capacity
|
let tc = cm.connections.get(&payload.target)
|
||||||
|
.map(|pc| pc.connection.clone());
|
||||||
|
(can, tc, Arc::clone(cm.active_relay_pipes()))
|
||||||
|
};
|
||||||
|
// Lock RELEASED — reject outside lock if at capacity
|
||||||
|
if !can_accept {
|
||||||
let result = RelayIntroduceResultPayload {
|
let result = RelayIntroduceResultPayload {
|
||||||
intro_id: payload.intro_id,
|
intro_id: payload.intro_id,
|
||||||
accepted: false,
|
accepted: false,
|
||||||
|
|
@ -4306,11 +4235,6 @@ impl ConnectionManager {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let target_conn = cm.connections.get(&payload.target)
|
|
||||||
.map(|pc| pc.connection.clone());
|
|
||||||
(target_conn, Arc::clone(cm.active_relay_pipes()))
|
|
||||||
};
|
|
||||||
|
|
||||||
let target_conn = match target_conn {
|
let target_conn = match target_conn {
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
None => {
|
None => {
|
||||||
|
|
@ -5542,8 +5466,12 @@ impl ConnectionManager {
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
match msg_type {
|
match msg_type {
|
||||||
MessageType::PullSyncRequest => {
|
MessageType::PullSyncRequest => {
|
||||||
|
let (storage, our_node_id) = {
|
||||||
let cm = conn_mgr.lock().await;
|
let cm = conn_mgr.lock().await;
|
||||||
cm.handle_pull_request(remote_node_id, recv, send).await?;
|
(Arc::clone(&cm.storage), *cm.our_node_id())
|
||||||
|
};
|
||||||
|
// Lock RELEASED — handler does its own brief storage locks + network I/O
|
||||||
|
ConnectionManager::handle_pull_request_unlocked(&storage, our_node_id, remote_node_id, recv, send).await?;
|
||||||
}
|
}
|
||||||
MessageType::InitialExchange => {
|
MessageType::InitialExchange => {
|
||||||
let (storage, our_node_id, anchor_addr, our_nat_type, our_http_capable, our_http_addr) = {
|
let (storage, our_node_id, anchor_addr, our_nat_type, our_http_capable, our_http_addr) = {
|
||||||
|
|
@ -5554,8 +5482,50 @@ impl ConnectionManager {
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
MessageType::AddressRequest => {
|
MessageType::AddressRequest => {
|
||||||
|
// Read request OUTSIDE lock (network I/O)
|
||||||
|
let req: crate::protocol::AddressRequestPayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
|
||||||
|
// Brief lock: gather address data
|
||||||
|
let response = {
|
||||||
let cm = conn_mgr.lock().await;
|
let cm = conn_mgr.lock().await;
|
||||||
cm.handle_address_request(recv, send, remote_node_id).await?;
|
let storage = cm.storage.get().await;
|
||||||
|
if cm.connections.contains_key(&req.target) {
|
||||||
|
let addr = storage.get_peer_record(&req.target).ok().flatten()
|
||||||
|
.and_then(|r| r.addresses.first().map(|a| a.to_string()));
|
||||||
|
crate::protocol::AddressResponsePayload {
|
||||||
|
target: req.target, address: addr,
|
||||||
|
disconnected_at: None, peer_addresses: vec![],
|
||||||
|
}
|
||||||
|
} else if let Some(route) = storage.get_social_route(&req.target).ok().flatten() {
|
||||||
|
match route.status {
|
||||||
|
SocialStatus::Online => {
|
||||||
|
crate::protocol::AddressResponsePayload {
|
||||||
|
target: req.target,
|
||||||
|
address: route.addresses.first().map(|a| a.to_string()),
|
||||||
|
disconnected_at: None,
|
||||||
|
peer_addresses: route.peer_addresses.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SocialStatus::Disconnected => {
|
||||||
|
let _ = storage.add_reconnect_watcher(&req.target, &remote_node_id);
|
||||||
|
crate::protocol::AddressResponsePayload {
|
||||||
|
target: req.target, address: None,
|
||||||
|
disconnected_at: Some(route.last_seen_ms),
|
||||||
|
peer_addresses: route.peer_addresses.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let address = storage.get_peer_record(&req.target).ok().flatten()
|
||||||
|
.and_then(|r| r.addresses.first().map(|a| a.to_string()));
|
||||||
|
crate::protocol::AddressResponsePayload {
|
||||||
|
target: req.target, address,
|
||||||
|
disconnected_at: None, peer_addresses: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Lock RELEASED — write response outside lock
|
||||||
|
write_typed_message(&mut send, MessageType::AddressResponse, &response).await?;
|
||||||
|
send.finish()?;
|
||||||
}
|
}
|
||||||
MessageType::SocialCheckin => {
|
MessageType::SocialCheckin => {
|
||||||
let payload: SocialCheckinPayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
|
let payload: SocialCheckinPayload = read_payload(&mut recv, MAX_PAYLOAD).await?;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue