Commit 20226c65 authored by jan.koester's avatar jan.koester
Browse files

tools

parent 7d67350a
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -82,6 +82,12 @@ if(NOT CLIENT_ONLY)
    add_executable(mds_convert src/mds_convert.cpp)
    target_compile_features(mds_convert PRIVATE cxx_std_20)

    add_executable(mde_check utils/mde_check.cpp)
    target_compile_features(mde_check PRIVATE cxx_std_20)

    add_executable(mde_extract utils/mde_extract.cpp)
    target_compile_features(mde_extract PRIVATE cxx_std_20)

    add_executable(mediadb_test
        test/mediadb_test.cpp
        src/backend.cpp

utils/mde_check.cpp

0 → 100644
+307 −0
Original line number Diff line number Diff line
// mde_check — validate MDE1/MDE2 export files
//
// Usage: mde_check <file.mdb>
//   Parses the MDE export file and reports structure, entry counts,
//   and any format errors (truncated strings, size mismatches, etc.).

#include <cstdint>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>

namespace fs = std::filesystem;

// ---- format constants ----

static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'};
static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'};

// ---- binary read helpers ----

static bool read_bytes(std::istream& in, void* dest, std::size_t n) {
    in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n));
    return static_cast<std::size_t>(in.gcount()) == n;
}

static bool read_u32(std::istream& in, std::uint32_t& out) {
    return read_bytes(in, &out, 4);
}

static bool read_u64(std::istream& in, std::uint64_t& out) {
    return read_bytes(in, &out, 8);
}

static bool read_str(std::istream& in, std::string& out, const char* field_name,
                     std::size_t file_size) {
    std::uint32_t len;
    if (!read_u32(in, len)) {
        std::cerr << "  ERROR: truncated string length for " << field_name
                  << " at offset " << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n";
        return false;
    }
    if (len > file_size) {
        std::cerr << "  ERROR: string length " << len << " for " << field_name
                  << " exceeds file size (" << file_size << ") at offset "
                  << (static_cast<std::size_t>(in.tellg())) << "\n";
        return false;
    }
    if (len == 0) { out.clear(); return true; }
    out.resize(len);
    if (!read_bytes(in, out.data(), len)) {
        std::cerr << "  ERROR: truncated string data for " << field_name
                  << " (expected " << len << " bytes) at offset "
                  << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n";
        return false;
    }
    return true;
}

// ---- check if string looks like valid UTF-8 text ----

static bool looks_printable(const std::string& s) {
    for (unsigned char c : s) {
        if (c < 0x20 && c != '\t' && c != '\n' && c != '\r') return false;
    }
    return true;
}

int main(int argc, char* argv[]) {
    if (argc < 2) {
        std::cerr << "Usage: mde_check <file.mdb>\n";
        return 1;
    }

    const char* path = argv[1];
    if (!fs::exists(path)) {
        std::cerr << "File not found: " << path << "\n";
        return 1;
    }

    auto file_size = fs::file_size(path);
    std::cout << "File: " << path << "\n";
    std::cout << "Size: " << file_size << " bytes\n";

    std::ifstream in(path, std::ios::binary);
    if (!in.is_open()) {
        std::cerr << "Cannot open file\n";
        return 1;
    }

    // ---- magic ----
    char magic[4]{};
    if (!read_bytes(in, magic, 4)) {
        std::cerr << "ERROR: file too small for magic bytes\n";
        return 1;
    }

    bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0);
    bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0);

    if (!is_v1 && !is_v2) {
        std::cerr << "ERROR: unknown magic: "
                  << magic[0] << magic[1] << magic[2] << magic[3]
                  << " (0x" << std::hex
                  << (unsigned)(unsigned char)magic[0]
                  << (unsigned)(unsigned char)magic[1]
                  << (unsigned)(unsigned char)magic[2]
                  << (unsigned)(unsigned char)magic[3]
                  << std::dec << ")\n";
        return 1;
    }

    std::cout << "Format: " << (is_v2 ? "MDE2" : "MDE1") << "\n";

    // ---- num stores ----
    std::uint32_t num_stores;
    if (!read_u32(in, num_stores)) {
        std::cerr << "ERROR: truncated store count\n";
        return 1;
    }
    std::cout << "Stores: " << num_stores << "\n\n";

    std::size_t total_albums = 0;
    std::size_t total_media = 0;
    std::size_t total_data_bytes = 0;
    std::size_t zero_size_media = 0;
    std::size_t errors = 0;

    for (std::uint32_t si = 0; si < num_stores; ++si) {
        std::string store_id, store_name, store_created;
        if (!read_str(in, store_id, "store_id", file_size) ||
            !read_str(in, store_name, "store_name", file_size) ||
            !read_str(in, store_created, "store_created", file_size)) {
            std::cerr << "ERROR: truncated store header at store #" << si << "\n";
            ++errors;
            break;
        }

        std::cout << "Store #" << si << ": id=" << store_id
                  << " name=\"" << store_name << "\""
                  << " created=" << store_created << "\n";

        if (!looks_printable(store_id) || !looks_printable(store_name)) {
            std::cerr << "  WARNING: store fields contain non-printable characters\n";
        }

        std::uint32_t num_albums;
        if (!read_u32(in, num_albums)) {
            std::cerr << "  ERROR: truncated album count\n";
            ++errors;
            break;
        }
        std::cout << "  Albums: " << num_albums << "\n";

        for (std::uint32_t ai = 0; ai < num_albums; ++ai) {
            std::string album_id, album_name, album_created;
            if (!read_str(in, album_id, "album_id", file_size) ||
                !read_str(in, album_name, "album_name", file_size) ||
                !read_str(in, album_created, "album_created", file_size)) {
                std::cerr << "  ERROR: truncated album header at album #" << ai
                          << " in store #" << si << "\n";
                ++errors;
                goto done;
            }

            bool album_public = false;
            if (is_v2) {
                std::uint8_t pub;
                if (!read_bytes(in, &pub, 1)) {
                    std::cerr << "  ERROR: truncated is_public byte\n";
                    ++errors;
                    goto done;
                }
                album_public = (pub != 0);
                if (pub > 1) {
                    std::cerr << "  WARNING: is_public=" << (int)pub
                              << " (expected 0 or 1)\n";
                }
            }

            std::cout << "  Album #" << ai << ": id=" << album_id
                      << " name=\"" << album_name << "\""
                      << " created=" << album_created;
            if (is_v2) std::cout << " public=" << (album_public ? "yes" : "no");
            std::cout << "\n";

            if (!looks_printable(album_id) || !looks_printable(album_name)) {
                std::cerr << "    WARNING: album fields contain non-printable characters\n";
            }

            std::uint32_t num_media;
            if (!read_u32(in, num_media)) {
                std::cerr << "    ERROR: truncated media count\n";
                ++errors;
                goto done;
            }
            std::cout << "    Media: " << num_media << "\n";

            for (std::uint32_t mi = 0; mi < num_media; ++mi) {
                auto entry_offset = static_cast<std::size_t>(in.tellg());

                std::string id, filename, kind, content_type, created;
                if (!read_str(in, id, "media_id", file_size) ||
                    !read_str(in, filename, "original_filename", file_size) ||
                    !read_str(in, kind, "media_kind", file_size) ||
                    !read_str(in, content_type, "content_type", file_size) ||
                    !read_str(in, created, "created_at", file_size)) {
                    std::cerr << "    ERROR: truncated media header at media #"
                              << mi << " offset=" << entry_offset << "\n";
                    ++errors;
                    goto done;
                }

                std::uint64_t size_bytes;
                if (!read_u64(in, size_bytes)) {
                    std::cerr << "    ERROR: truncated size_bytes at media #"
                              << mi << "\n";
                    ++errors;
                    goto done;
                }

                auto data_offset = static_cast<std::size_t>(in.tellg());

                // Validate size
                bool size_ok = true;
                if (size_bytes > file_size) {
                    std::cerr << "    ERROR: media #" << mi << " id=" << id
                              << " size_bytes=" << size_bytes
                              << " exceeds file size!\n";
                    ++errors;
                    size_ok = false;
                }
                if (data_offset + size_bytes > file_size) {
                    std::cerr << "    ERROR: media #" << mi << " id=" << id
                              << " data would extend past EOF"
                              << " (data_offset=" << data_offset
                              << " + size=" << size_bytes
                              << " > file_size=" << file_size << ")\n";
                    ++errors;
                    size_ok = false;
                }

                if (size_bytes == 0) {
                    ++zero_size_media;
                }

                // Print entry summary
                std::cout << "    [" << mi << "] id=" << id
                          << " file=\"" << filename << "\""
                          << " kind=" << kind
                          << " type=" << content_type
                          << " size=" << size_bytes
                          << " @offset=" << data_offset;

                if (!size_ok) {
                    std::cout << " CORRUPT";
                    goto done;
                }

                // Skip over media data
                if (size_bytes > 0) {
                    in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur);
                    if (!in.good()) {
                        std::cout << " TRUNCATED\n";
                        std::cerr << "    ERROR: could not seek past media data\n";
                        ++errors;
                        goto done;
                    }
                }

                std::cout << " OK\n";

                total_data_bytes += size_bytes;
                ++total_media;
            }
            ++total_albums;
        }
    }

done:
    auto end_pos = static_cast<std::size_t>(in.tellg());
    std::size_t remaining = 0;
    if (end_pos < file_size && in.good()) {
        remaining = file_size - end_pos;
    }

    std::cout << "\n=== Summary ===\n";
    std::cout << "Format:         " << (is_v2 ? "MDE2" : "MDE1") << "\n";
    std::cout << "File size:      " << file_size << " bytes\n";
    std::cout << "Stores:         " << num_stores << "\n";
    std::cout << "Albums:         " << total_albums << "\n";
    std::cout << "Media entries:  " << total_media << "\n";
    std::cout << "Total data:     " << total_data_bytes << " bytes\n";
    if (zero_size_media > 0) {
        std::cout << "Zero-size:      " << zero_size_media << " entries (missing data)\n";
    }
    if (remaining > 0) {
        std::cout << "Trailing bytes: " << remaining << " (unexpected)\n";
        ++errors;
    }
    std::cout << "Errors:         " << errors << "\n";
    std::cout << "Result:         " << (errors == 0 ? "OK" : "ERRORS FOUND") << "\n";

    return errors == 0 ? 0 : 1;
}

utils/mde_extract.cpp

0 → 100644
+279 −0
Original line number Diff line number Diff line
// mde_extract — extract media files from MDE1/MDE2 export files
//
// Usage:
//   mde_extract <file.mdb> <output_dir>              — extract all media
//   mde_extract <file.mdb> <output_dir> --id <id>    — extract single entry
//   mde_extract <file.mdb> <output_dir> --store <id> — extract one store
//   mde_extract <file.mdb> <output_dir> --album <id> — extract one album
//   mde_extract <file.mdb> <output_dir> --list        — list entries only

#include <cstdint>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>

namespace fs = std::filesystem;

// ---- format constants ----

static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'};
static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'};

// ---- binary read helpers ----

static bool read_bytes(std::istream& in, void* dest, std::size_t n) {
    in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n));
    return static_cast<std::size_t>(in.gcount()) == n;
}

static bool read_u32(std::istream& in, std::uint32_t& out) {
    return read_bytes(in, &out, 4);
}

static bool read_u64(std::istream& in, std::uint64_t& out) {
    return read_bytes(in, &out, 8);
}

static bool read_str(std::istream& in, std::string& out) {
    std::uint32_t len;
    if (!read_u32(in, len)) return false;
    if (len == 0) { out.clear(); return true; }
    out.resize(len);
    return read_bytes(in, out.data(), len);
}

// ---- sanitise filename for filesystem ----

static std::string safe_filename(const std::string& name) {
    std::string out;
    out.reserve(name.size());
    for (char c : name) {
        if (c == '/' || c == '\\' || c == '\0' || c == ':' || c == '*'
            || c == '?' || c == '"' || c == '<' || c == '>' || c == '|')
            out += '_';
        else
            out += c;
    }
    if (out.empty()) out = "unnamed";
    return out;
}

static void usage() {
    std::cerr << "Usage: mde_extract <file.mdb> <output_dir> [options]\n"
              << "  --list           List entries without extracting\n"
              << "  --id <media_id>  Extract only this media entry\n"
              << "  --store <id>     Extract only media from this store\n"
              << "  --album <id>     Extract only media from this album\n";
}

int main(int argc, char* argv[]) {
    if (argc < 3) { usage(); return 1; }

    const std::string input_path = argv[1];
    const std::string output_dir = argv[2];

    bool list_only = false;
    std::string filter_media_id;
    std::string filter_store_id;
    std::string filter_album_id;

    for (int i = 3; i < argc; ++i) {
        std::string arg = argv[i];
        if (arg == "--list") { list_only = true; }
        else if (arg == "--id" && i + 1 < argc) { filter_media_id = argv[++i]; }
        else if (arg == "--store" && i + 1 < argc) { filter_store_id = argv[++i]; }
        else if (arg == "--album" && i + 1 < argc) { filter_album_id = argv[++i]; }
        else { std::cerr << "Unknown option: " << arg << "\n"; usage(); return 1; }
    }

    if (!fs::exists(input_path)) {
        std::cerr << "File not found: " << input_path << "\n";
        return 1;
    }

    std::ifstream in(input_path, std::ios::binary);
    if (!in.is_open()) {
        std::cerr << "Cannot open file: " << input_path << "\n";
        return 1;
    }

    // ---- magic ----
    char magic[4]{};
    if (!read_bytes(in, magic, 4)) {
        std::cerr << "ERROR: file too small\n";
        return 1;
    }

    bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0);
    bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0);
    if (!is_v1 && !is_v2) {
        std::cerr << "ERROR: not an MDE file (bad magic)\n";
        return 1;
    }

    if (!list_only) {
        std::error_code ec;
        fs::create_directories(output_dir, ec);
        if (ec) {
            std::cerr << "Cannot create output dir: " << ec.message() << "\n";
            return 1;
        }
    }

    std::uint32_t num_stores;
    if (!read_u32(in, num_stores)) {
        std::cerr << "ERROR: truncated store count\n";
        return 1;
    }

    std::size_t extracted = 0;
    std::size_t skipped = 0;

    for (std::uint32_t si = 0; si < num_stores && in.good(); ++si) {
        std::string store_id, store_name, store_created;
        if (!read_str(in, store_id) || !read_str(in, store_name) ||
            !read_str(in, store_created)) {
            std::cerr << "ERROR: truncated store header\n";
            return 1;
        }

        bool store_match = filter_store_id.empty() || filter_store_id == store_id;

        std::uint32_t num_albums;
        if (!read_u32(in, num_albums)) {
            std::cerr << "ERROR: truncated album count\n";
            return 1;
        }

        for (std::uint32_t ai = 0; ai < num_albums && in.good(); ++ai) {
            std::string album_id, album_name, album_created;
            if (!read_str(in, album_id) || !read_str(in, album_name) ||
                !read_str(in, album_created)) {
                std::cerr << "ERROR: truncated album header\n";
                return 1;
            }

            if (is_v2) {
                std::uint8_t pub;
                if (!read_bytes(in, &pub, 1)) {
                    std::cerr << "ERROR: truncated is_public\n";
                    return 1;
                }
            }

            bool album_match = filter_album_id.empty() || filter_album_id == album_id;

            std::uint32_t num_media;
            if (!read_u32(in, num_media)) {
                std::cerr << "ERROR: truncated media count\n";
                return 1;
            }

            for (std::uint32_t mi = 0; mi < num_media && in.good(); ++mi) {
                std::string id, filename, kind, content_type, created;
                if (!read_str(in, id) || !read_str(in, filename) ||
                    !read_str(in, kind) || !read_str(in, content_type) ||
                    !read_str(in, created)) {
                    std::cerr << "ERROR: truncated media entry\n";
                    return 1;
                }

                std::uint64_t size_bytes;
                if (!read_u64(in, size_bytes)) {
                    std::cerr << "ERROR: truncated size_bytes\n";
                    return 1;
                }

                bool media_match = filter_media_id.empty() || filter_media_id == id;
                bool want = store_match && album_match && media_match;

                if (list_only) {
                    if (want) {
                        std::cout << "store=" << store_id
                                  << " album=" << album_id
                                  << " id=" << id
                                  << " file=\"" << filename << "\""
                                  << " kind=" << kind
                                  << " type=" << content_type
                                  << " size=" << size_bytes << "\n";
                    }
                    // Skip data
                    if (size_bytes > 0)
                        in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur);
                    continue;
                }

                if (!want || size_bytes == 0) {
                    // Skip data
                    if (size_bytes > 0)
                        in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur);
                    if (want && size_bytes == 0)
                        std::cerr << "  SKIP (zero size): " << id << " " << filename << "\n";
                    ++skipped;
                    continue;
                }

                // Build output path: output_dir/store_name/album_name/filename
                fs::path out_dir = fs::path(output_dir)
                    / safe_filename(store_name)
                    / safe_filename(album_name);
                std::error_code ec;
                fs::create_directories(out_dir, ec);

                // Use original filename, append id if collision
                std::string out_name = safe_filename(filename);
                fs::path out_path = out_dir / out_name;
                if (fs::exists(out_path)) {
                    // Deduplicate: prepend media id
                    out_path = out_dir / (id + "_" + out_name);
                }

                std::ofstream out(out_path, std::ios::binary | std::ios::trunc);
                if (!out.is_open()) {
                    std::cerr << "ERROR: cannot create " << out_path << "\n";
                    in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur);
                    ++skipped;
                    continue;
                }

                // Stream copy in 1MB chunks
                constexpr std::size_t BUF_SIZE = 1024 * 1024;
                std::vector<char> buf(BUF_SIZE);
                std::uint64_t remaining = size_bytes;
                bool ok = true;
                while (remaining > 0) {
                    auto chunk = static_cast<std::streamsize>(
                        std::min<std::uint64_t>(remaining, BUF_SIZE));
                    in.read(buf.data(), chunk);
                    auto got = in.gcount();
                    if (got <= 0) {
                        std::cerr << "ERROR: truncated data for " << id << "\n";
                        ok = false;
                        break;
                    }
                    out.write(buf.data(), got);
                    remaining -= static_cast<std::uint64_t>(got);
                }
                out.close();

                if (ok) {
                    std::cout << out_path.string() << "  (" << size_bytes << " bytes)\n";
                    ++extracted;
                } else {
                    ++skipped;
                }
            }
        }
    }

    if (!list_only) {
        std::cout << "\nExtracted: " << extracted << " files\n";
        if (skipped > 0)
            std::cout << "Skipped:   " << skipped << "\n";
    }

    return 0;
}