Commit a310c53e authored by jan.koester's avatar jan.koester
Browse files

test

parent 5aae08e0
Loading
Loading
Loading
Loading
+9 −25
Original line number Diff line number Diff line
@@ -18,10 +18,10 @@ namespace mediadb {

static int sync_interval_seconds() {
    const char* v = std::getenv("MEDIADB_SYNC_INTERVAL_SEC");
    if (!v || v[0] == '\0') return 5;
    if (!v || v[0] == '\0') return 60;
    int x = std::atoi(v);
    if (x < 1) x = 1;
    if (x > 300) x = 300;
    if (x > 3600) x = 3600;
    return x;
}

@@ -3352,19 +3352,6 @@ void ClusterMediaBackend::sync_from_cluster_now() {
        return;
    }

    uint64_t index_gid = cluster_group_id("index");
    bool has_index = false;
    for (const auto& pg : cluster_.list_peer_groups()) {
        if (std::find(pg.groups.begin(), pg.groups.end(), index_gid) != pg.groups.end()) {
            has_index = true;
            break;
        }
    }

    // Warm up read connections before fetching (QUIC idle timeout may have
    // closed them since start() or last sync cycle)
    cluster_.warmup_read_clients();

    // Fetch index from cluster (contains store/album/ACL metadata, no media data)
    std::vector<uint8_t> index_data;
    bool fetch_ok = cluster_.fetch("index", index_data);
@@ -3377,8 +3364,7 @@ void ClusterMediaBackend::sync_from_cluster_now() {
            std::cerr << "[CLUSTER-SYNC] import active, skipping index fetch retry\n";
            return;
        }
        // Retry once after warmup — connections may be stale after heavy
        // import replication traffic on peers.
        // Retry once after warmup — connections may be stale
        cluster_.warmup_read_clients();
        std::this_thread::sleep_for(std::chrono::milliseconds(500));
        fetch_ok = cluster_.fetch("index", index_data);
@@ -3392,13 +3378,12 @@ void ClusterMediaBackend::sync_from_cluster_now() {
            std::cerr << "[CLUSTER-SYNC] import active after index fetch failure, suppressing sync error\n";
            return;
        }
        if (!has_index) {
            std::cerr << "[CLUSTER-SYNC] no index exists on any peer — fresh/empty cluster\n";
        if (fetch_ok) {
            // fetch succeeded but returned empty data — fresh/empty cluster
            std::cerr << "[CLUSTER-SYNC] index returned empty data — fresh/empty cluster\n";
            initial_sync_ok_.store(true);
        } else {
            std::cerr << "[CLUSTER-SYNC] index fetch "
                      << (fetch_ok ? "returned empty data" : "failed (exception in cluster layer)")
                      << " (has_index=true, peers report having group)\n";
            std::cerr << "[CLUSTER-SYNC] index fetch failed (exception in cluster layer)\n";
        }
        return;
    }
@@ -3450,8 +3435,7 @@ void ClusterMediaBackend::sync_from_cluster_now() {
        std::vector<uint8_t> store_data;
        bool ok = cluster_.fetch("store:" + sid, store_data);
        if (!ok || store_data.empty()) {
            // Retry once after warmup — peer connections may be stale
            cluster_.warmup_read_clients();
            // Retry once — peer connections may be stale
            std::this_thread::sleep_for(std::chrono::milliseconds(300));
            ok = cluster_.fetch("store:" + sid, store_data);
        }
@@ -3835,7 +3819,7 @@ void ClusterMediaBackend::sync_loop() {
            const auto now = std::chrono::steady_clock::now();
            if (now >= next_sync) {
                sync_requested_ = true;
                if (++cycle >= 6) {
                if (++cycle >= 10) {
                    cycle = 0;
                    repair_requested_ = true;
                }
+3 −10
Original line number Diff line number Diff line
@@ -549,7 +549,7 @@ void Cluster::health_loop() {
    size_t k = cfg_.data_blocks;
    size_t n = cfg_.data_blocks + cfg_.parity_blocks;
    int healthy_cycles = 0;
    static constexpr int REBALANCE_INTERVAL = 10; // rebalance every ~5min (10 × 30s)
    static constexpr int REBALANCE_INTERVAL = 10; // rebalance every ~10min (10 × 60s)
    while (running_) {
        if (pclient_) {
            uint32_t online = 0;
@@ -564,13 +564,6 @@ void Cluster::health_loop() {
                std::vector<uint64_t> dummy;
                if (pclient_->list_groups_on_node(it->second, dummy)) {
                    ++online;
                } else {
                    // Retry once after short delay — peers may be busy
                    // during import or scrub and miss the first probe
                    std::this_thread::sleep_for(std::chrono::milliseconds(500));
                    dummy.clear();
                    if (pclient_->list_groups_on_node(it->second, dummy))
                        ++online;
                }
            }
            server_->set_peer_status_callback([online, total]() -> std::pair<uint32_t, uint32_t> {
@@ -682,8 +675,8 @@ void Cluster::health_loop() {
                critical_ = true;
            }
        }
        // Probe faster when degraded/critical (5s) vs normal (30s)
        int interval = (degraded_ || critical_) ? 5 : 30;
        // Probe faster when degraded/critical (10s) vs normal (60s)
        int interval = (degraded_ || critical_) ? 10 : 60;
        for (int i = 0; i < interval && running_; ++i)
            std::this_thread::sleep_for(std::chrono::seconds(1));
    }