Commit 24934710 authored by jan.koester's avatar jan.koester
Browse files

test

parent 9c125675
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
mediadb (20260503+7) unstable; urgency=high

  * cluster/import: speed up synchronous media replication by removing
    unconditional pre-clean (remove+warmup) per media key; now tries fast
    replicate() first and performs cleanup only after a real failure
  * cluster/import: reduce retry/backoff stall time during import
    (short retry window for media keys, limited retries for store/index)
    to prevent multi-minute hangs on failed media writes

 -- Jan Koester <jan.koester@tuxist.de>  Sat, 03 May 2026 18:55:00 +0200

mediadb (20260503+6) unstable; urgency=high

  * Rebuild against libnetplus 20260503+4 (CRITICAL: fix silent packet loss in
+20 −17
Original line number Diff line number Diff line
@@ -3130,28 +3130,31 @@ std::unique_ptr<ImportSession> ClusterMediaBackend::begin_import() {

    // Synchronous replicate: use pclient_ for all keys, same path as uploads.
    auto replicate_fn = [this](const std::string& key, const uint8_t* d, size_t len) -> bool {
        static constexpr int MAX_RETRIES = 5;
        const bool is_media = (key.compare(0, 6, "media:") == 0);

        // If a previous failed import left this media gid in a broken state
        // (seen as "retrieve returned empty" in rebalance), clear it once
        // before retrying writes.
        if (is_media) {
            cluster_.remove(key);
            cluster_.warmup_read_clients();
        }
        // Fast path first: do not pre-clean every media key. That pre-clean
        // adds avoidable latency for healthy writes and slows large imports.
        if (cluster_.replicate(key, d, len)) return true;

        for (int attempt = 0; attempt < MAX_RETRIES; ++attempt) {
            if (attempt > 0) {
        // Retry policy tuned for import throughput:
        // - media keys: one short recovery retry after cleanup/warmup
        // - non-media keys (store/index): a small number of retries
        const int retries = is_media ? 1 : 2;
        for (int attempt = 1; attempt <= retries; ++attempt) {
            std::cerr << "[CLUSTER-IMPORT-STREAM] replicate retry " << attempt
                      << " key=" << key << " size=" << len << "\n";
                std::this_thread::sleep_for(
                    std::chrono::milliseconds(500 * (1 << (attempt - 1))));
                cluster_.warmup_read_clients();

            if (is_media) {
                // Cleanup only on failure, not unconditionally per media.
                cluster_.remove(key);
            }
            bool ok = cluster_.replicate(key, d, len);
            if (ok) return true;
            cluster_.warmup_read_clients();

            // Keep waits short to avoid multi-minute stalls in import.
            std::this_thread::sleep_for(std::chrono::milliseconds(150 * attempt));
            if (cluster_.replicate(key, d, len)) return true;
        }

        std::cerr << "[CLUSTER-IMPORT-STREAM] replicate FAILED key=" << key << "\n";
        return false;
    };