Export/Import: ZIP export with scope selection, import with public post merge

Export (export.rs): ZIP archive with auto-chunking at 4GB. Four scopes:
identity only, posts only, posts+identity, everything (posts+key+follows+
profiles+settings). Includes blobs. Manifest JSON tracks metadata.

Import (import.rs): Read ZIP summary without importing (preview).
Import public posts into current identity with new PostIds + original
timestamps. Import as new identity (creates identity subdir from key).
Uses spawn_blocking for ZIP I/O to avoid Send issues with ZipArchive.

Tauri IPC: export_data, import_summary, import_public_posts,
import_as_new_identity commands. IdentityManager.base_dir() getter.

Frontend: Export wizard lightbox with scope radio buttons + output dir.
Import wizard with ZIP path, preview summary, action selection.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Scott Reimers 2026-03-31 20:56:03 -04:00
parent fb1e92985c
commit 8ef32e6df6
7 changed files with 786 additions and 3 deletions

View file

@ -18,6 +18,7 @@ curve25519-dalek = { version = "=5.0.0-pre.1", features = ["rand_core", "zeroize
ed25519-dalek = { version = "=3.0.0-pre.1", features = ["rand_core", "zeroize"] }
chacha20poly1305 = "0.10"
base64 = "0.22"
zip = { version = "2", default-features = false, features = ["deflate"] }
igd-next = { version = "0.16", features = ["tokio"] }
[dev-dependencies]

319
crates/core/src/export.rs Normal file
View file

@ -0,0 +1,319 @@
//! Export data as ZIP archives with auto-chunking at 4GB.
//!
//! Export scopes:
//! - IdentityOnly: just identity.key (tiny backup)
//! - PostsOnly: public posts + blobs (no key — safe to share)
//! - PostsWithIdentity: posts + blobs + identity.key (full migration)
//! - Everything: posts + blobs + key + follows + profiles + settings (complete backup)
use std::io::Write;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::blob::BlobStore;
use crate::storage::StoragePool;
use crate::types::NodeId;
/// Maximum bytes per ZIP chunk (4 GB).
const CHUNK_MAX_BYTES: u64 = 4 * 1024 * 1024 * 1024;
/// What to include in the export.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ExportScope {
IdentityOnly,
PostsOnly,
PostsWithIdentity,
Everything,
}
/// Manifest embedded in each ZIP chunk.
#[derive(Debug, Serialize, Deserialize)]
pub struct ExportManifest {
pub version: u32,
pub scope: ExportScope,
pub node_id: String,
pub export_date: u64,
pub chunk_index: u32,
pub total_chunks: u32,
pub post_count: usize,
pub blob_count: usize,
}
/// Exported post with visibility and header.
#[derive(Debug, Serialize, Deserialize)]
pub struct ExportedPost {
pub id: String,
pub author: String,
pub content: String,
pub attachments_json: String,
pub timestamp_ms: u64,
pub visibility_json: String,
pub header_json: Option<String>,
pub intent: Option<String>,
}
/// Result of an export operation.
pub struct ExportResult {
pub paths: Vec<PathBuf>,
pub post_count: usize,
pub blob_count: usize,
}
/// Run an export to the given output directory.
pub async fn export_data(
data_dir: &Path,
storage: &StoragePool,
blob_store: &BlobStore,
node_id: &NodeId,
scope: ExportScope,
output_dir: &Path,
) -> anyhow::Result<ExportResult> {
std::fs::create_dir_all(output_dir)?;
let node_id_hex = hex::encode(node_id);
let now = now_ms();
// Gather data based on scope
let identity_key = if scope != ExportScope::PostsOnly {
let key_path = data_dir.join("identity.key");
if key_path.exists() {
Some(std::fs::read(&key_path)?)
} else {
None
}
} else {
None
};
let (posts, blob_cids) = if scope == ExportScope::IdentityOnly {
(vec![], vec![])
} else {
gather_posts(storage, node_id).await?
};
let (follows, profiles, settings) = if scope == ExportScope::Everything {
gather_extras(storage).await?
} else {
(None, None, None)
};
let post_count = posts.len();
// Build the ZIP
let mut zip_paths: Vec<PathBuf> = Vec::new();
let mut chunk_index: u32 = 0;
let mut current_size: u64 = 0;
let base_name = format!("itsgoin-export-{}", &node_id_hex[..8]);
let chunk_path = |idx: u32| -> PathBuf {
if idx == 0 {
output_dir.join(format!("{}.zip", base_name))
} else {
output_dir.join(format!("{}.part{}.zip", base_name, idx + 1))
}
};
let path = chunk_path(chunk_index);
let file = std::fs::File::create(&path)?;
let mut zip = zip::ZipWriter::new(file);
let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Deflated);
// Write manifest (placeholder — updated at end)
let manifest = ExportManifest {
version: 1,
scope,
node_id: node_id_hex.clone(),
export_date: now,
chunk_index: 0,
total_chunks: 1, // updated later if chunked
post_count,
blob_count: blob_cids.len(),
};
let manifest_json = serde_json::to_string_pretty(&manifest)?;
zip.start_file("itsgoin-export/manifest.json", options)?;
zip.write_all(manifest_json.as_bytes())?;
current_size += manifest_json.len() as u64;
// Identity key
if let Some(ref key) = identity_key {
zip.start_file("itsgoin-export/identity.key", options)?;
zip.write_all(key)?;
current_size += key.len() as u64;
}
// Posts
if !posts.is_empty() {
let posts_json = serde_json::to_string_pretty(&posts)?;
zip.start_file("itsgoin-export/posts.json", options)?;
zip.write_all(posts_json.as_bytes())?;
current_size += posts_json.len() as u64;
}
// Follows, profiles, settings
if let Some(ref data) = follows {
let json = serde_json::to_string_pretty(data)?;
zip.start_file("itsgoin-export/follows.json", options)?;
zip.write_all(json.as_bytes())?;
current_size += json.len() as u64;
}
if let Some(ref data) = profiles {
let json = serde_json::to_string_pretty(data)?;
zip.start_file("itsgoin-export/profiles.json", options)?;
zip.write_all(json.as_bytes())?;
current_size += json.len() as u64;
}
if let Some(ref data) = settings {
let json = serde_json::to_string_pretty(data)?;
zip.start_file("itsgoin-export/settings.json", options)?;
zip.write_all(json.as_bytes())?;
current_size += json.len() as u64;
}
// Blobs
let mut blob_count = 0usize;
for cid in &blob_cids {
if let Some(data) = blob_store.get(cid)? {
// Check if we need to start a new chunk
if current_size + data.len() as u64 > CHUNK_MAX_BYTES && blob_count > 0 {
zip.finish()?;
zip_paths.push(path.clone());
chunk_index += 1;
let new_path = chunk_path(chunk_index);
let new_file = std::fs::File::create(&new_path)?;
zip = zip::ZipWriter::new(new_file);
current_size = 0;
// Continuation manifest
let cont_manifest = ExportManifest {
version: 1,
scope,
node_id: node_id_hex.clone(),
export_date: now,
chunk_index,
total_chunks: 0, // unknown yet
post_count: 0,
blob_count: 0,
};
let cont_json = serde_json::to_string_pretty(&cont_manifest)?;
zip.start_file("itsgoin-export/manifest.json", options)?;
zip.write_all(cont_json.as_bytes())?;
current_size += cont_json.len() as u64;
}
let cid_hex = hex::encode(cid);
zip.start_file(format!("itsgoin-export/blobs/{}", cid_hex), options)?;
zip.write_all(&data)?;
current_size += data.len() as u64;
blob_count += 1;
}
}
zip.finish()?;
zip_paths.push(chunk_path(chunk_index));
info!(
posts = post_count,
blobs = blob_count,
chunks = zip_paths.len(),
scope = ?scope,
"Export complete"
);
Ok(ExportResult {
paths: zip_paths,
post_count,
blob_count,
})
}
/// Gather own posts and their blob CIDs.
async fn gather_posts(
storage: &StoragePool,
node_id: &NodeId,
) -> anyhow::Result<(Vec<ExportedPost>, Vec<[u8; 32]>)> {
let s = storage.get().await;
let posts_with_vis = s.list_posts_with_visibility()?;
let mut exported = Vec::new();
let mut blob_cids = Vec::new();
for (id, post, vis) in &posts_with_vis {
// Only export our own posts
if post.author != *node_id {
continue;
}
let header = s.get_blob_header(id).ok().flatten().map(|(json, _)| json);
let intent = s.get_post_intent(id).ok().flatten().map(|i| format!("{:?}", i));
exported.push(ExportedPost {
id: hex::encode(id),
author: hex::encode(post.author),
content: post.content.clone(),
attachments_json: serde_json::to_string(&post.attachments).unwrap_or_default(),
timestamp_ms: post.timestamp_ms,
visibility_json: serde_json::to_string(vis).unwrap_or_default(),
header_json: header,
intent,
});
// Collect blob CIDs from attachments
for att in &post.attachments {
if !blob_cids.contains(&att.cid) {
blob_cids.push(att.cid);
}
}
}
Ok((exported, blob_cids))
}
/// Gather follows, profiles, and settings for "Everything" export.
async fn gather_extras(
storage: &StoragePool,
) -> anyhow::Result<(
Option<Vec<String>>, // follows (hex node_ids)
Option<Vec<serde_json::Value>>, // profiles
Option<Vec<(String, String)>>, // settings (key, value)
)> {
let s = storage.get().await;
// Follows
let follows: Vec<String> = s.list_follows()
.unwrap_or_default()
.into_iter()
.map(|nid| hex::encode(nid))
.collect();
// Profiles
let profiles: Vec<serde_json::Value> = s.list_profiles()
.unwrap_or_default()
.into_iter()
.map(|p| serde_json::to_value(&p).unwrap_or_default())
.collect();
// Settings — gather all known keys
let setting_keys = ["text_size", "notif_messages", "notif_posts", "notif_nearby",
"notif_reacts", "cache_size_bytes", "public_visible"];
let mut settings_vec = Vec::new();
for key in &setting_keys {
if let Some(val) = s.get_setting(key).ok().flatten() {
settings_vec.push((key.to_string(), val));
}
}
Ok((
if follows.is_empty() { None } else { Some(follows) },
if profiles.is_empty() { None } else { Some(profiles) },
if settings_vec.is_empty() { None } else { Some(settings_vec) },
))
}
fn now_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}

View file

@ -100,6 +100,11 @@ impl IdentityManager {
Ok(mgr)
}
/// Get the base data directory.
pub fn base_dir(&self) -> &Path {
&self.base_dir
}
/// Get the currently active Node, if any.
pub fn active_node(&self) -> Option<&Arc<Node>> {
self.active_node.as_ref()

249
crates/core/src/import.rs Normal file
View file

@ -0,0 +1,249 @@
//! Import data from ZIP archives exported by the export module.
//!
//! Import actions:
//! - AddAsIdentity: create a new identity from the export's key + data
//! - ImportPublicPosts: import only public posts into the current identity (new PostIds)
//! - MergeWithKey: decrypt encrypted posts using provided key, re-encrypt for current identity
use std::io::Read;
use std::path::Path;
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
use crate::blob::BlobStore;
use crate::content::compute_post_id;
use crate::export::{ExportManifest, ExportedPost};
use crate::storage::StoragePool;
use crate::types::{Attachment, NodeId, Post, PostVisibility};
/// What to do with the imported data.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ImportAction {
/// Create a new identity from the export's key and restore all data.
AddAsIdentity,
/// Import public posts into the current identity with new PostIds.
ImportPublicPosts,
/// Decrypt with the provided key, re-create posts under current identity.
MergeWithKey { key_hex: String },
}
/// Summary of what an import ZIP contains (shown to user before importing).
#[derive(Debug, Serialize, Deserialize)]
pub struct ImportSummary {
pub node_id: String,
pub scope: String,
pub export_date: u64,
pub post_count: usize,
pub blob_count: usize,
pub has_identity_key: bool,
pub has_follows: bool,
pub has_settings: bool,
}
/// Result of an import operation.
#[derive(Debug, Serialize, Deserialize)]
pub struct ImportResult {
pub posts_imported: usize,
pub posts_skipped: usize,
pub blobs_imported: usize,
pub message: String,
}
/// Read a ZIP and return a summary of its contents (without importing).
pub fn read_import_summary(zip_path: &Path) -> anyhow::Result<ImportSummary> {
let file = std::fs::File::open(zip_path)?;
let mut archive = zip::ZipArchive::new(file)?;
// Read manifest
let manifest: ExportManifest = {
let mut entry = archive.by_name("itsgoin-export/manifest.json")?;
let mut buf = String::new();
entry.read_to_string(&mut buf)?;
serde_json::from_str(&buf)?
};
let has_key = archive.by_name("itsgoin-export/identity.key").is_ok();
let has_follows = archive.by_name("itsgoin-export/follows.json").is_ok();
let has_settings = archive.by_name("itsgoin-export/settings.json").is_ok();
Ok(ImportSummary {
node_id: manifest.node_id,
scope: format!("{:?}", manifest.scope),
export_date: manifest.export_date,
post_count: manifest.post_count,
blob_count: manifest.blob_count,
has_identity_key: has_key,
has_follows,
has_settings,
})
}
/// Parsed data from a ZIP ready for async import.
struct ParsedImport {
posts: Vec<(Post, PostVisibility, Vec<(Attachment, Vec<u8>)>)>,
skipped: usize,
}
/// Import public posts from a ZIP into the current identity.
/// Creates new posts with the current node_id as author, preserving original timestamps.
pub async fn import_public_posts(
zip_path: &Path,
storage: &StoragePool,
blob_store: &BlobStore,
our_node_id: &NodeId,
) -> anyhow::Result<ImportResult> {
// Phase 1: Read everything from ZIP synchronously (no Send requirement)
let parsed = {
let zip_path = zip_path.to_path_buf();
let our_node_id = *our_node_id;
tokio::task::spawn_blocking(move || -> anyhow::Result<ParsedImport> {
let file = std::fs::File::open(&zip_path)?;
let mut archive = zip::ZipArchive::new(file)?;
let posts: Vec<ExportedPost> = {
let mut entry = archive.by_name("itsgoin-export/posts.json")?;
let mut buf = String::new();
entry.read_to_string(&mut buf)?;
serde_json::from_str(&buf)?
};
let mut result_posts = Vec::new();
let mut skipped = 0usize;
for ep in &posts {
let vis: PostVisibility = serde_json::from_str(&ep.visibility_json).unwrap_or(PostVisibility::Public);
if !matches!(vis, PostVisibility::Public) {
skipped += 1;
continue;
}
let attachments: Vec<Attachment> = serde_json::from_str(&ep.attachments_json).unwrap_or_default();
let new_post = Post {
author: our_node_id,
content: ep.content.clone(),
attachments: attachments.clone(),
timestamp_ms: ep.timestamp_ms,
};
// Read blob data from archive
let mut blob_data = Vec::new();
for att in &attachments {
let cid_hex = hex::encode(att.cid);
let blob_path = format!("itsgoin-export/blobs/{}", cid_hex);
if let Ok(mut blob_entry) = archive.by_name(&blob_path) {
let mut data = Vec::new();
blob_entry.read_to_end(&mut data)?;
blob_data.push((att.clone(), data));
}
}
result_posts.push((new_post, vis, blob_data));
}
Ok(ParsedImport { posts: result_posts, skipped })
}).await??
};
// Phase 2: Store to DB + blob store (async — needs storage.get().await)
let mut imported = 0usize;
let mut blobs_imported = 0usize;
for (new_post, _vis, blob_data) in &parsed.posts {
let new_id = compute_post_id(new_post);
let s = storage.get().await;
if s.get_post(&new_id).ok().flatten().is_some() {
continue; // duplicate
}
s.store_post_with_visibility(&new_id, new_post, &PostVisibility::Public)?;
drop(s);
for (att, data) in blob_data {
if !blob_store.has(&att.cid) {
blob_store.store(&att.cid, data)?;
let s = storage.get().await;
let _ = s.record_blob(&att.cid, &new_id, our_node_id, data.len() as u64, &att.mime_type, att.size_bytes);
blobs_imported += 1;
}
}
imported += 1;
}
info!(imported, skipped = parsed.skipped, blobs = blobs_imported, "Public post import complete");
Ok(ImportResult {
posts_imported: imported,
posts_skipped: parsed.skipped,
blobs_imported,
message: format!("Imported {} posts ({} skipped), {} blobs", imported, parsed.skipped, blobs_imported),
})
}
/// Import a ZIP as a new identity (create identity subdir, extract everything).
pub fn import_as_identity(
zip_path: &Path,
base_dir: &Path,
) -> anyhow::Result<String> {
let file = std::fs::File::open(zip_path)?;
let mut archive = zip::ZipArchive::new(file)?;
// Read manifest
let manifest: ExportManifest = {
let mut entry = archive.by_name("itsgoin-export/manifest.json")?;
let mut buf = String::new();
entry.read_to_string(&mut buf)?;
serde_json::from_str(&buf)?
};
// Read identity key
let key_data = {
let mut entry = archive.by_name("itsgoin-export/identity.key")
.map_err(|_| anyhow::anyhow!("Export doesn't contain an identity key"))?;
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
buf
};
// Create identity directory
let id_dir = base_dir.join("identities").join(&manifest.node_id);
if id_dir.exists() {
anyhow::bail!("Identity {} already exists", &manifest.node_id[..12]);
}
std::fs::create_dir_all(&id_dir)?;
// Write identity key
let key_path = id_dir.join("identity.key");
std::fs::write(&key_path, &key_data)?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ = std::fs::set_permissions(&key_path, std::fs::Permissions::from_mode(0o600));
}
// Write metadata
let now = now_ms();
let meta = serde_json::json!({
"display_name": format!("Imported {}", &manifest.node_id[..12]),
"created_at": now,
"last_used_at": now,
});
std::fs::write(id_dir.join("meta.json"), serde_json::to_string_pretty(&meta)?)?;
info!(identity = manifest.node_id, "Imported identity from ZIP — switch to it to restore data");
// Note: posts, blobs, follows, settings will be restored when the user switches to this
// identity and opens the node. The full DB restore could be done here, but it's simpler
// to let the user switch and then import posts via the import wizard.
Ok(manifest.node_id)
}
fn now_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}

View file

@ -4,7 +4,9 @@ pub mod connection;
pub mod content;
pub mod crypto;
pub mod http;
pub mod export;
pub mod identity;
pub mod import;
pub mod network;
pub mod node;
pub mod protocol;