deb (e823c1d2) · Commits · tuxist / mediadb

debian/changelog

+11 −0

Original line number	Diff line number	Diff line
		mediadb (20260503+9) unstable; urgency=high

		* cluster/sync: fix race where a sync cycle that started just before import
		could still apply tombstones/store metadata during active import
		* cluster/sync: re-check import flags at critical sync stages and abort
		mid-cycle when import becomes active
		* cluster: hard-skip apply_tombstones(), repair_replication() and periodic
		sync loop passes while either local importing_ or global import flag is set

		-- Jan Koester <jan.koester@tuxist.de> Sun, 03 May 2026 20:00:00 +0200

		mediadb (20260503+8) unstable; urgency=high

		* server/import: fix logic bug for HTTP/3 import sessions where client

src/backend.cpp

+31 −3

Original line number	Diff line number	Diff line
		@@ -3292,6 +3292,10 @@ void ClusterMediaBackend::sync_from_cluster() {
		return;
		}

		auto import_active = [this]() {
		return cluster_.isImportRunning() \|\| importing_.load();
		};

		uint64_t index_gid = cluster_group_id("index");
		bool has_index = false;
		for (const auto& pg : cluster_.list_peer_groups()) {
		@@ -3326,6 +3330,14 @@ void ClusterMediaBackend::sync_from_cluster() {
		}
		return;
		}

		// Sync may have started just before import began. Re-check before mutating
		// local state so tombstones/index metadata cannot interleave with import.
		if (import_active()) {
		std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting before index apply\n";
		return;
		}

		std::cerr << "[CLUSTER-SYNC] index fetched OK, " << index_data.size() << " bytes\n";
		{
		std::unique_lock<std::shared_mutex> cguard(cluster_op_mutex_);
		@@ -3335,6 +3347,12 @@ void ClusterMediaBackend::sync_from_cluster() {

		// Load tombstones from cluster and delete any stores that were deleted elsewhere
		load_tombstones();

		if (import_active()) {
		std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting before tombstones\n";
		return;
		}

		{
		std::unique_lock<std::shared_mutex> cguard(cluster_op_mutex_);
		apply_tombstones();
		@@ -3352,6 +3370,11 @@ void ClusterMediaBackend::sync_from_cluster() {
		// but NOT raw media data — keeps RAM usage low.
		std::cerr << "[CLUSTER-SYNC] fetching metadata for " << sids.size() << " stores\n";
		for (const auto& sid : sids) {
		if (import_active()) {
		std::cerr << "[CLUSTER-SYNC] import became active mid-sync, aborting store metadata fetch\n";
		return;
		}

		std::vector<uint8_t> store_data;
		bool ok = cluster_.fetch("store:" + sid, store_data);
		if (!ok \|\| store_data.empty()) {
		@@ -3619,6 +3642,11 @@ void ClusterMediaBackend::load_tombstones() {
		}

		void ClusterMediaBackend::apply_tombstones() {
		if (importing_.load() \|\| cluster_.isImportRunning()) {
		std::cerr << "[CLUSTER-SYNC] apply_tombstones skipped: import active\n";
		return;
		}

		// Remove any stores present in the tombstone list (local only).
		// Do NOT call cluster_.remove() here — that would delete data from the
		// entire cluster. Only the node that initiated the delete (delete_store)
		@@ -3635,7 +3663,7 @@ void ClusterMediaBackend::apply_tombstones() {

		void ClusterMediaBackend::repair_replication() {
		if (!cluster_.isRunning()) return;
		if (importing_.load()) return;
		if (importing_.load() \|\| cluster_.isImportRunning()) return;
		size_t total_peers = cluster_.getTotalPeers();
		if (total_peers < 2) return;

		@@ -3724,7 +3752,7 @@ void ClusterMediaBackend::sync_loop() {
		[this]{ return !sync_running_.load(); });
		}
		if (!sync_running_) break;
		if (importing_.load()) continue;
		if (importing_.load() \|\| cluster_.isImportRunning()) continue;
		try {
		sync_from_cluster();
		// Run repair every 6th cycle (~30s).
		@@ -3732,7 +3760,7 @@ void ClusterMediaBackend::sync_loop() {
		// so nodes stuck in "fetch failed" can recover.
		if (++cycle >= 6) {
		cycle = 0;
		if (!importing_.load()) {
		if (!importing_.load() && !cluster_.isImportRunning()) {
		repair_replication();
		}
		}