test (24934710) · Commits · tuxist / mediadb

debian/changelog

+11 −0

Original line number	Diff line number	Diff line
		mediadb (20260503+7) unstable; urgency=high

		* cluster/import: speed up synchronous media replication by removing
		unconditional pre-clean (remove+warmup) per media key; now tries fast
		replicate() first and performs cleanup only after a real failure
		* cluster/import: reduce retry/backoff stall time during import
		(short retry window for media keys, limited retries for store/index)
		to prevent multi-minute hangs on failed media writes

		-- Jan Koester <jan.koester@tuxist.de> Sat, 03 May 2026 18:55:00 +0200

		mediadb (20260503+6) unstable; urgency=high

		* Rebuild against libnetplus 20260503+4 (CRITICAL: fix silent packet loss in

src/backend.cpp

+20 −17

Original line number	Diff line number	Diff line
		@@ -3130,28 +3130,31 @@ std::unique_ptr<ImportSession> ClusterMediaBackend::begin_import() {

		// Synchronous replicate: use pclient_ for all keys, same path as uploads.
		auto replicate_fn = [this](const std::string& key, const uint8_t* d, size_t len) -> bool {
		static constexpr int MAX_RETRIES = 5;
		const bool is_media = (key.compare(0, 6, "media:") == 0);

		// If a previous failed import left this media gid in a broken state
		// (seen as "retrieve returned empty" in rebalance), clear it once
		// before retrying writes.
		if (is_media) {
		cluster_.remove(key);
		cluster_.warmup_read_clients();
		}
		// Fast path first: do not pre-clean every media key. That pre-clean
		// adds avoidable latency for healthy writes and slows large imports.
		if (cluster_.replicate(key, d, len)) return true;

		for (int attempt = 0; attempt < MAX_RETRIES; ++attempt) {
		if (attempt > 0) {
		// Retry policy tuned for import throughput:
		// - media keys: one short recovery retry after cleanup/warmup
		// - non-media keys (store/index): a small number of retries
		const int retries = is_media ? 1 : 2;
		for (int attempt = 1; attempt <= retries; ++attempt) {
		std::cerr << "[CLUSTER-IMPORT-STREAM] replicate retry " << attempt
		<< " key=" << key << " size=" << len << "\n";
		std::this_thread::sleep_for(
		std::chrono::milliseconds(500 * (1 << (attempt - 1))));
		cluster_.warmup_read_clients();

		if (is_media) {
		// Cleanup only on failure, not unconditionally per media.
		cluster_.remove(key);
		}
		bool ok = cluster_.replicate(key, d, len);
		if (ok) return true;
		cluster_.warmup_read_clients();

		// Keep waits short to avoid multi-minute stalls in import.
		std::this_thread::sleep_for(std::chrono::milliseconds(150 * attempt));
		if (cluster_.replicate(key, d, len)) return true;
		}

		std::cerr << "[CLUSTER-IMPORT-STREAM] replicate FAILED key=" << key << "\n";
		return false;
		};