Commit be56eb76 authored by jan.koester's avatar jan.koester
Browse files

deb

parent 518d30a5
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
mediadb (20260422+65) unstable; urgency=low

  * Fix HTTP timeouts on /raw and /preview endpoints:
    - Add 10 s deadline to Cluster::fetch() and Cluster::fetch_range()
      so warmup retries and pclient fallback are skipped once the
      deadline expires instead of blocking for 30+ s per client.
    - Reduce MAX_RANGE_CHUNK from 8 MB to 4 MB (= one paritypp stripe)
      to avoid multi-stripe fetches that compound retry delays.
    - Rate-limit on-demand sync_from_cluster() in get_media() and
      get_media_size() to at most once every 10 s, preventing repeated
      full-cluster syncs from blocking HTTP requests.

 -- Jan Koester <jan.koester@tuxist.de>  Wed, 22 Apr 2026 00:00:00 +0200

mediadb (20260422+64) unstable; urgency=low

  * Speed up preview generation (webp/jpeg):
+6 −5
Original line number Diff line number Diff line
@@ -1598,9 +1598,10 @@ HttpResponse App::handle_get_media_raw(const HttpRequest& req) {
        if (range_end >= total_size) range_end = total_size - 1;

        // Cap open-ended range requests.  Browsers send "Range: bytes=0-"
        // and expect a reasonable chunk.  8 MB balances throughput (fewer
        // HTTP round-trips) vs. memory use and cluster fetch latency.
        constexpr std::uint64_t MAX_RANGE_CHUNK = 8 * 1024 * 1024; // 8 MB
        // and expect a reasonable chunk.  4 MB aligns with one paritypp
        // stripe so each cluster fetch touches exactly one stripe — avoids
        // multi-stripe round-trips that can stall for 30+ s on retries.
        constexpr std::uint64_t MAX_RANGE_CHUNK = 4 * 1024 * 1024; // 4 MB
        if (range_end - range_start + 1 > MAX_RANGE_CHUNK)
            range_end = range_start + MAX_RANGE_CHUNK - 1;

@@ -1645,10 +1646,10 @@ HttpResponse App::handle_get_media_raw(const HttpRequest& req) {
    if (total_size > MAX_NON_RANGE)
        return error_json(413, "file too large; use Range requests");

    // Assemble from range-based fetches (8 MB chunks) so the cluster
    // Assemble from range-based fetches (4 MB chunks) so the cluster
    // backend doesn't have to deliver the entire file in one round-trip
    // and each chunk gets cached individually.
    constexpr std::uint64_t CHUNK = 8 * 1024 * 1024;
    constexpr std::uint64_t CHUNK = 4 * 1024 * 1024;
    std::vector<std::uint8_t> full_data;
    full_data.reserve(static_cast<std::size_t>(total_size));
    for (std::uint64_t off = 0; off < total_size; off += CHUNK) {
+13 −3
Original line number Diff line number Diff line
@@ -2810,7 +2810,13 @@ std::optional<MediaRecord> ClusterMediaBackend::get_media(const std::string& id)
        auto m = local_.get_media(id);
        if (m) return m;
    }
    // Only attempt a re-sync if the last one wasn't too recent (avoids
    // blocking an HTTP request for 30+ s with a full cluster sync).
    auto now = std::chrono::steady_clock::now();
    if (now - last_on_demand_sync_.load() > std::chrono::seconds(10)) {
        last_on_demand_sync_.store(now);
        const_cast<ClusterMediaBackend*>(this)->sync_from_cluster();
    }
    std::shared_lock<std::shared_mutex> cguard(cluster_op_mutex_);
    return local_.get_media(id);
}
@@ -2894,8 +2900,12 @@ std::uint64_t ClusterMediaBackend::get_media_size(const std::string& media_id) c
        auto size = local_.get_media_size(media_id);
        if (size > 0) return size;
    }
    // Media metadata not yet synced — pull from cluster and retry
    // Only attempt a re-sync if the last one wasn't too recent
    auto now = std::chrono::steady_clock::now();
    if (now - last_on_demand_sync_.load() > std::chrono::seconds(10)) {
        last_on_demand_sync_.store(now);
        const_cast<ClusterMediaBackend*>(this)->sync_from_cluster();
    }
    std::shared_lock<std::shared_mutex> cguard(cluster_op_mutex_);
    return local_.get_media_size(media_id);
}
+6 −0
Original line number Diff line number Diff line
@@ -369,6 +369,12 @@ private:
    std::vector<std::uint8_t> cache_fetch_range(const std::string& media_id,
                                                 std::uint64_t offset,
                                                 std::uint64_t length) const;

    // Rate-limit on-demand sync_from_cluster() triggered by cache misses
    // in get_media()/get_media_size() — avoids blocking HTTP requests for
    // 30+ s when the metadata is simply not on this node yet.
    mutable std::atomic<std::chrono::steady_clock::time_point> last_on_demand_sync_{
        std::chrono::steady_clock::time_point{}};
};

} // namespace mediadb
+51 −32
Original line number Diff line number Diff line
@@ -314,6 +314,11 @@ void Cluster::warmup_read_clients() {

bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
    uint64_t gid = cluster_group_id(key);
    // Overall deadline so HTTP requests don't hang.
    auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(10);
    auto timed_out = [&]() {
        return std::chrono::steady_clock::now() >= deadline;
    };

    // Try read_client_ first (dedicated read connections)
    if (read_client_) {
@@ -323,6 +328,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
        } catch (const netplus::NetException& e) {
            std::cerr << "[CLUSTER] fetch read_client NetException key=" << key
                      << " gid=" << gid << ": " << e.what() << "\n";
            if (!timed_out()) {
                try {
                    read_client_->warmup();
                    out = read_client_->retrieve(gid);
@@ -334,6 +340,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
                    std::cerr << "[CLUSTER] fetch read_client retry failed key=" << key
                              << " gid=" << gid << ": unknown exception\n";
                }
            }
        } catch (const std::exception& e) {
            // Data-level error (e.g. "stripe not found") — warmup won't help.
            std::cerr << "[CLUSTER] fetch read_client exception key=" << key
@@ -343,7 +350,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {

    // Fallback to pclient_ (write client) — may succeed if read_client_
    // connections are stale (e.g. after fork or network glitch)
    if (pclient_) {
    if (pclient_ && !timed_out()) {
        try {
            out = pclient_->retrieve(gid);
            if (!out.empty()) {
@@ -354,6 +361,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
        } catch (const netplus::NetException& e) {
            std::cerr << "[CLUSTER] fetch pclient NetException key=" << key
                      << " gid=" << gid << ": " << e.what() << "\n";
            if (!timed_out()) {
                try {
                    pclient_->warmup();
                    out = pclient_->retrieve(gid);
@@ -365,6 +373,7 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {
                    std::cerr << "[CLUSTER] fetch pclient retry failed key=" << key
                              << " gid=" << gid << ": unknown exception\n";
                }
            }
        } catch (const std::exception& e) {
            // Data-level error — skip warmup retry.
            std::cerr << "[CLUSTER] fetch pclient exception key=" << key
@@ -377,8 +386,14 @@ bool Cluster::fetch(const std::string& key, std::vector<uint8_t>& out) {

bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t length,
                           std::vector<uint8_t>& out) {
    // Overall deadline: fail fast so HTTP requests don't hang.
    auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(10);
    uint64_t gid = cluster_group_id(key);

    auto timed_out = [&]() {
        return std::chrono::steady_clock::now() >= deadline;
    };

    if (read_client_) {
        try {
            out = read_client_->retrieve_range(gid, offset, length);
@@ -386,11 +401,13 @@ bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t leng
        } catch (const netplus::NetException& e) {
            std::cerr << "[CLUSTER] fetch_range read_client NetException key=" << key
                      << " gid=" << gid << ": " << e.what() << "\n";
            if (!timed_out()) {
                try {
                    read_client_->warmup();
                    out = read_client_->retrieve_range(gid, offset, length);
                    if (!out.empty()) return true;
                } catch (...) {}
            }
        } catch (const std::exception& e) {
            // Data-level error (e.g. "stripe not found") — warmup won't help,
            // skip retry to avoid blocking 30+ s per dead node.
@@ -399,18 +416,20 @@ bool Cluster::fetch_range(const std::string& key, uint64_t offset, uint64_t leng
        }
    }

    if (pclient_) {
    if (pclient_ && !timed_out()) {
        try {
            out = pclient_->retrieve_range(gid, offset, length);
            if (!out.empty()) return true;
        } catch (const netplus::NetException& e) {
            std::cerr << "[CLUSTER] fetch_range pclient NetException key=" << key
                      << " gid=" << gid << ": " << e.what() << "\n";
            if (!timed_out()) {
                try {
                    pclient_->warmup();
                    out = pclient_->retrieve_range(gid, offset, length);
                    if (!out.empty()) return true;
                } catch (...) {}
            }
        } catch (const std::exception& e) {
            // Data-level error — skip warmup retry (same reason as above).
            std::cerr << "[CLUSTER] fetch_range pclient exception key=" << key