deb (be56eb76) · Commits · tuxist / mediadb

debian/changelog

+14 −0

Original line number	Diff line number	Diff line
		mediadb (20260422+65) unstable; urgency=low

		* Fix HTTP timeouts on /raw and /preview endpoints:
		- Add 10 s deadline to Cluster::fetch() and Cluster::fetch_range()
		so warmup retries and pclient fallback are skipped once the
		deadline expires instead of blocking for 30+ s per client.
		- Reduce MAX_RANGE_CHUNK from 8 MB to 4 MB (= one paritypp stripe)
		to avoid multi-stripe fetches that compound retry delays.
		- Rate-limit on-demand sync_from_cluster() in get_media() and
		get_media_size() to at most once every 10 s, preventing repeated
		full-cluster syncs from blocking HTTP requests.

		-- Jan Koester <jan.koester@tuxist.de> Wed, 22 Apr 2026 00:00:00 +0200

		mediadb (20260422+64) unstable; urgency=low

		* Speed up preview generation (webp/jpeg):

src/app.cpp

+6 −5

Original line number	Diff line number	Diff line
		@@ -1598,9 +1598,10 @@ HttpResponse App::handle_get_media_raw(const HttpRequest& req) {
		if (range_end >= total_size) range_end = total_size - 1;

		// Cap open-ended range requests. Browsers send "Range: bytes=0-"
		// and expect a reasonable chunk. 8 MB balances throughput (fewer
		// HTTP round-trips) vs. memory use and cluster fetch latency.
		constexpr std::uint64_t MAX_RANGE_CHUNK = 8 * 1024 * 1024; // 8 MB
		// and expect a reasonable chunk. 4 MB aligns with one paritypp
		// stripe so each cluster fetch touches exactly one stripe — avoids
		// multi-stripe round-trips that can stall for 30+ s on retries.
		constexpr std::uint64_t MAX_RANGE_CHUNK = 4 * 1024 * 1024; // 4 MB
		if (range_end - range_start + 1 > MAX_RANGE_CHUNK)
		range_end = range_start + MAX_RANGE_CHUNK - 1;

		@@ -1645,10 +1646,10 @@ HttpResponse App::handle_get_media_raw(const HttpRequest& req) {
		if (total_size > MAX_NON_RANGE)
		return error_json(413, "file too large; use Range requests");

		// Assemble from range-based fetches (8 MB chunks) so the cluster
		// Assemble from range-based fetches (4 MB chunks) so the cluster
		// backend doesn't have to deliver the entire file in one round-trip
		// and each chunk gets cached individually.
		constexpr std::uint64_t CHUNK = 8 * 1024 * 1024;
		constexpr std::uint64_t CHUNK = 4 * 1024 * 1024;
		std::vector<std::uint8_t> full_data;
		full_data.reserve(static_cast<std::size_t>(total_size));
		for (std::uint64_t off = 0; off < total_size; off += CHUNK) {

src/backend.cpp

+13 −3

Original line number	Diff line number	Diff line
		@@ -2810,7 +2810,13 @@ std::optional<MediaRecord> ClusterMediaBackend::get_media(const std::string& id)
		auto m = local_.get_media(id);
		if (m) return m;
		}
		// Only attempt a re-sync if the last one wasn't too recent (avoids
		// blocking an HTTP request for 30+ s with a full cluster sync).
		auto now = std::chrono::steady_clock::now();
		if (now - last_on_demand_sync_.load() > std::chrono::seconds(10)) {
		last_on_demand_sync_.store(now);
		const_cast<ClusterMediaBackend*>(this)->sync_from_cluster();
		}
		std::shared_lock<std::shared_mutex> cguard(cluster_op_mutex_);
		return local_.get_media(id);
		}
		@@ -2894,8 +2900,12 @@ std::uint64_t ClusterMediaBackend::get_media_size(const std::string& media_id) c
		auto size = local_.get_media_size(media_id);
		if (size > 0) return size;
		}
		// Media metadata not yet synced — pull from cluster and retry
		// Only attempt a re-sync if the last one wasn't too recent
		auto now = std::chrono::steady_clock::now();
		if (now - last_on_demand_sync_.load() > std::chrono::seconds(10)) {
		last_on_demand_sync_.store(now);
		const_cast<ClusterMediaBackend*>(this)->sync_from_cluster();
		}
		std::shared_lock<std::shared_mutex> cguard(cluster_op_mutex_);
		return local_.get_media_size(media_id);
		}

src/backend.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -369,6 +369,12 @@ private:
		std::vector<std::uint8_t> cache_fetch_range(const std::string& media_id,
		std::uint64_t offset,
		std::uint64_t length) const;

		// Rate-limit on-demand sync_from_cluster() triggered by cache misses
		// in get_media()/get_media_size() — avoids blocking HTTP requests for
		// 30+ s when the metadata is simply not on this node yet.
		mutable std::atomic<std::chrono::steady_clock::time_point> last_on_demand_sync_{
		std::chrono::steady_clock::time_point{}};
		};

		} // namespace mediadb

src/cluster.cpp

+51 −32

Original line number	Diff line number	Diff line
		@@ -314,6 +314,11 @@ void Cluster::warmup_read_clients() {

		bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
		uint64_t gid = cluster_group_id(key);
		// Overall deadline so HTTP requests don't hang.
		auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(10);
		auto timed_out = [&]() {
		return std::chrono::steady_clock::now() >= deadline;
		};

		// Try read_client_ first (dedicated read connections)
		if (read_client_) {
		@@ -323,6 +328,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
		} catch (const netplus::NetException& e) {
		std::cerr << "[CLUSTER] fetch read_client NetException key=" << key
		<< " gid=" << gid << ": " << e.what() << "\n";
		if (!timed_out()) {
		try {
		read_client_->warmup();
		out = read_client_->retrieve(gid);
		@@ -334,6 +340,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
		std::cerr << "[CLUSTER] fetch read_client retry failed key=" << key
		<< " gid=" << gid << ": unknown exception\n";
		}
		}
		} catch (const std::exception& e) {
		// Data-level error (e.g. "stripe not found") — warmup won't help.
		std::cerr << "[CLUSTER] fetch read_client exception key=" << key
		@@ -343,7 +350,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {

		// Fallback to pclient_ (write client) — may succeed if read_client_
		// connections are stale (e.g. after fork or network glitch)
		if (pclient_) {
		if (pclient_ && !timed_out()) {
		try {
		out = pclient_->retrieve(gid);
		if (!out.empty()) {
		@@ -354,6 +361,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
		} catch (const netplus::NetException& e) {
		std::cerr << "[CLUSTER] fetch pclient NetException key=" << key
		<< " gid=" << gid << ": " << e.what() << "\n";
		if (!timed_out()) {
		try {
		pclient_->warmup();
		out = pclient_->retrieve(gid);
		@@ -365,6 +373,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
		std::cerr << "[CLUSTER] fetch pclient retry failed key=" << key
		<< " gid=" << gid << ": unknown exception\n";
		}
		}
		} catch (const std::exception& e) {
		// Data-level error — skip warmup retry.
		std::cerr << "[CLUSTER] fetch pclient exception key=" << key
		@@ -377,8 +386,14 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {

		bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t length,
		std::vector<uint8_t>& out) {
		// Overall deadline: fail fast so HTTP requests don't hang.
		auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(10);
		uint64_t gid = cluster_group_id(key);

		auto timed_out = [&]() {
		return std::chrono::steady_clock::now() >= deadline;
		};

		if (read_client_) {
		try {
		out = read_client_->retrieve_range(gid, offset, length);
		@@ -386,11 +401,13 @@ bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t leng
		} catch (const netplus::NetException& e) {
		std::cerr << "[CLUSTER] fetch_range read_client NetException key=" << key
		<< " gid=" << gid << ": " << e.what() << "\n";
		if (!timed_out()) {
		try {
		read_client_->warmup();
		out = read_client_->retrieve_range(gid, offset, length);
		if (!out.empty()) return true;
		} catch (...) {}
		}
		} catch (const std::exception& e) {
		// Data-level error (e.g. "stripe not found") — warmup won't help,
		// skip retry to avoid blocking 30+ s per dead node.
		@@ -399,18 +416,20 @@ bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t leng
		}
		}

		if (pclient_) {
		if (pclient_ && !timed_out()) {
		try {
		out = pclient_->retrieve_range(gid, offset, length);
		if (!out.empty()) return true;
		} catch (const netplus::NetException& e) {
		std::cerr << "[CLUSTER] fetch_range pclient NetException key=" << key
		<< " gid=" << gid << ": " << e.what() << "\n";
		if (!timed_out()) {
		try {
		pclient_->warmup();
		out = pclient_->retrieve_range(gid, offset, length);
		if (!out.empty()) return true;
		} catch (...) {}
		}
		} catch (const std::exception& e) {
		// Data-level error — skip warmup retry (same reason as above).
		std::cerr << "[CLUSTER] fetch_range pclient exception key=" << key