Loading debian/changelog +11 −0 Original line number Diff line number Diff line mediadb (20260503+7) unstable; urgency=high * cluster/import: speed up synchronous media replication by removing unconditional pre-clean (remove+warmup) per media key; now tries fast replicate() first and performs cleanup only after a real failure * cluster/import: reduce retry/backoff stall time during import (short retry window for media keys, limited retries for store/index) to prevent multi-minute hangs on failed media writes -- Jan Koester <jan.koester@tuxist.de> Sat, 03 May 2026 18:55:00 +0200 mediadb (20260503+6) unstable; urgency=high * Rebuild against libnetplus 20260503+4 (CRITICAL: fix silent packet loss in Loading src/backend.cpp +20 −17 Original line number Diff line number Diff line Loading @@ -3130,28 +3130,31 @@ std::unique_ptr<ImportSession> ClusterMediaBackend::begin_import() { // Synchronous replicate: use pclient_ for all keys, same path as uploads. auto replicate_fn = [this](const std::string& key, const uint8_t* d, size_t len) -> bool { static constexpr int MAX_RETRIES = 5; const bool is_media = (key.compare(0, 6, "media:") == 0); // If a previous failed import left this media gid in a broken state // (seen as "retrieve returned empty" in rebalance), clear it once // before retrying writes. if (is_media) { cluster_.remove(key); cluster_.warmup_read_clients(); } // Fast path first: do not pre-clean every media key. That pre-clean // adds avoidable latency for healthy writes and slows large imports. if (cluster_.replicate(key, d, len)) return true; for (int attempt = 0; attempt < MAX_RETRIES; ++attempt) { if (attempt > 0) { // Retry policy tuned for import throughput: // - media keys: one short recovery retry after cleanup/warmup // - non-media keys (store/index): a small number of retries const int retries = is_media ? 1 : 2; for (int attempt = 1; attempt <= retries; ++attempt) { std::cerr << "[CLUSTER-IMPORT-STREAM] replicate retry " << attempt << " key=" << key << " size=" << len << "\n"; std::this_thread::sleep_for( std::chrono::milliseconds(500 * (1 << (attempt - 1)))); cluster_.warmup_read_clients(); if (is_media) { // Cleanup only on failure, not unconditionally per media. cluster_.remove(key); } bool ok = cluster_.replicate(key, d, len); if (ok) return true; cluster_.warmup_read_clients(); // Keep waits short to avoid multi-minute stalls in import. std::this_thread::sleep_for(std::chrono::milliseconds(150 * attempt)); if (cluster_.replicate(key, d, len)) return true; } std::cerr << "[CLUSTER-IMPORT-STREAM] replicate FAILED key=" << key << "\n"; return false; }; Loading Loading
debian/changelog +11 −0 Original line number Diff line number Diff line mediadb (20260503+7) unstable; urgency=high * cluster/import: speed up synchronous media replication by removing unconditional pre-clean (remove+warmup) per media key; now tries fast replicate() first and performs cleanup only after a real failure * cluster/import: reduce retry/backoff stall time during import (short retry window for media keys, limited retries for store/index) to prevent multi-minute hangs on failed media writes -- Jan Koester <jan.koester@tuxist.de> Sat, 03 May 2026 18:55:00 +0200 mediadb (20260503+6) unstable; urgency=high * Rebuild against libnetplus 20260503+4 (CRITICAL: fix silent packet loss in Loading
src/backend.cpp +20 −17 Original line number Diff line number Diff line Loading @@ -3130,28 +3130,31 @@ std::unique_ptr<ImportSession> ClusterMediaBackend::begin_import() { // Synchronous replicate: use pclient_ for all keys, same path as uploads. auto replicate_fn = [this](const std::string& key, const uint8_t* d, size_t len) -> bool { static constexpr int MAX_RETRIES = 5; const bool is_media = (key.compare(0, 6, "media:") == 0); // If a previous failed import left this media gid in a broken state // (seen as "retrieve returned empty" in rebalance), clear it once // before retrying writes. if (is_media) { cluster_.remove(key); cluster_.warmup_read_clients(); } // Fast path first: do not pre-clean every media key. That pre-clean // adds avoidable latency for healthy writes and slows large imports. if (cluster_.replicate(key, d, len)) return true; for (int attempt = 0; attempt < MAX_RETRIES; ++attempt) { if (attempt > 0) { // Retry policy tuned for import throughput: // - media keys: one short recovery retry after cleanup/warmup // - non-media keys (store/index): a small number of retries const int retries = is_media ? 1 : 2; for (int attempt = 1; attempt <= retries; ++attempt) { std::cerr << "[CLUSTER-IMPORT-STREAM] replicate retry " << attempt << " key=" << key << " size=" << len << "\n"; std::this_thread::sleep_for( std::chrono::milliseconds(500 * (1 << (attempt - 1)))); cluster_.warmup_read_clients(); if (is_media) { // Cleanup only on failure, not unconditionally per media. cluster_.remove(key); } bool ok = cluster_.replicate(key, d, len); if (ok) return true; cluster_.warmup_read_clients(); // Keep waits short to avoid multi-minute stalls in import. std::this_thread::sleep_for(std::chrono::milliseconds(150 * attempt)); if (cluster_.replicate(key, d, len)) return true; } std::cerr << "[CLUSTER-IMPORT-STREAM] replicate FAILED key=" << key << "\n"; return false; }; Loading