Commit e823c1d2 authored by jan.koester's avatar jan.koester
Browse files

deb

parent de25b1f8
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
mediadb (20260503+9) unstable; urgency=high

  * cluster/sync: fix race where a sync cycle that started just before import
    could still apply tombstones/store metadata during active import
  * cluster/sync: re-check import flags at critical sync stages and abort
    mid-cycle when import becomes active
  * cluster: hard-skip apply_tombstones(), repair_replication() and periodic
    sync loop passes while either local importing_ or global import flag is set

 -- Jan Koester <jan.koester@tuxist.de>  Sun, 03 May 2026 20:00:00 +0200

mediadb (20260503+8) unstable; urgency=high

  * server/import: fix logic bug for HTTP/3 import sessions where client
+31 −3
Original line number Diff line number Diff line
@@ -3292,6 +3292,10 @@ void ClusterMediaBackend::sync_from_cluster() {
        return;
    }

    auto import_active = [this]() {
        return cluster_.isImportRunning() || importing_.load();
    };

    uint64_t index_gid = cluster_group_id("index");
    bool has_index = false;
    for (const auto& pg : cluster_.list_peer_groups()) {
@@ -3326,6 +3330,14 @@ void ClusterMediaBackend::sync_from_cluster() {
        }
        return;
    }

    // Sync may have started just before import began. Re-check before mutating
    // local state so tombstones/index metadata cannot interleave with import.
    if (import_active()) {
        std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting before index apply\n";
        return;
    }

    std::cerr << "[CLUSTER-SYNC] index fetched OK, " << index_data.size() << " bytes\n";
    {
        std::unique_lock<std::shared_mutex> cguard(cluster_op_mutex_);
@@ -3335,6 +3347,12 @@ void ClusterMediaBackend::sync_from_cluster() {

    // Load tombstones from cluster and delete any stores that were deleted elsewhere
    load_tombstones();

    if (import_active()) {
        std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting before tombstones\n";
        return;
    }

    {
        std::unique_lock<std::shared_mutex> cguard(cluster_op_mutex_);
        apply_tombstones();
@@ -3352,6 +3370,11 @@ void ClusterMediaBackend::sync_from_cluster() {
    // but NOT raw media data — keeps RAM usage low.
    std::cerr << "[CLUSTER-SYNC] fetching metadata for " << sids.size() << " stores\n";
    for (const auto& sid : sids) {
        if (import_active()) {
            std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting store metadata fetch\n";
            return;
        }

        std::vector<uint8_t> store_data;
        bool ok = cluster_.fetch("store:" + sid, store_data);
        if (!ok || store_data.empty()) {
@@ -3619,6 +3642,11 @@ void ClusterMediaBackend::load_tombstones() {
}

void ClusterMediaBackend::apply_tombstones() {
    if (importing_.load() || cluster_.isImportRunning()) {
        std::cerr << "[CLUSTER-SYNC] apply_tombstones skipped: import active\n";
        return;
    }

    // Remove any stores present in the tombstone list (local only).
    // Do NOT call cluster_.remove() here — that would delete data from the
    // entire cluster. Only the node that initiated the delete (delete_store)
@@ -3635,7 +3663,7 @@ void ClusterMediaBackend::apply_tombstones() {

void ClusterMediaBackend::repair_replication() {
    if (!cluster_.isRunning()) return;
    if (importing_.load()) return;
    if (importing_.load() || cluster_.isImportRunning()) return;
    size_t total_peers = cluster_.getTotalPeers();
    if (total_peers < 2) return;

@@ -3724,7 +3752,7 @@ void ClusterMediaBackend::sync_loop() {
                              [this]{ return !sync_running_.load(); });
        }
        if (!sync_running_) break;
        if (importing_.load()) continue;
        if (importing_.load() || cluster_.isImportRunning()) continue;
        try {
            sync_from_cluster();
            // Run repair every 6th cycle (~30s).
@@ -3732,7 +3760,7 @@ void ClusterMediaBackend::sync_loop() {
            // so nodes stuck in "fetch failed" can recover.
            if (++cycle >= 6) {
                cycle = 0;
                if (!importing_.load()) {
                if (!importing_.load() && !cluster_.isImportRunning()) {
                    repair_replication();
                }
            }