Loading CMakeLists.txt +6 −0 Original line number Diff line number Diff line Loading @@ -82,6 +82,12 @@ if(NOT CLIENT_ONLY) add_executable(mds_convert src/mds_convert.cpp) target_compile_features(mds_convert PRIVATE cxx_std_20) add_executable(mde_check utils/mde_check.cpp) target_compile_features(mde_check PRIVATE cxx_std_20) add_executable(mde_extract utils/mde_extract.cpp) target_compile_features(mde_extract PRIVATE cxx_std_20) add_executable(mediadb_test test/mediadb_test.cpp src/backend.cpp Loading utils/mde_check.cpp 0 → 100644 +307 −0 Original line number Diff line number Diff line // mde_check — validate MDE1/MDE2 export files // // Usage: mde_check <file.mdb> // Parses the MDE export file and reports structure, entry counts, // and any format errors (truncated strings, size mismatches, etc.). #include <cstdint> #include <cstring> #include <filesystem> #include <fstream> #include <iostream> #include <string> #include <vector> namespace fs = std::filesystem; // ---- format constants ---- static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'}; static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'}; // ---- binary read helpers ---- static bool read_bytes(std::istream& in, void* dest, std::size_t n) { in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n)); return static_cast<std::size_t>(in.gcount()) == n; } static bool read_u32(std::istream& in, std::uint32_t& out) { return read_bytes(in, &out, 4); } static bool read_u64(std::istream& in, std::uint64_t& out) { return read_bytes(in, &out, 8); } static bool read_str(std::istream& in, std::string& out, const char* field_name, std::size_t file_size) { std::uint32_t len; if (!read_u32(in, len)) { std::cerr << " ERROR: truncated string length for " << field_name << " at offset " << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n"; return false; } if (len > file_size) { std::cerr << " ERROR: string length " << len << " for " << field_name << " exceeds file size (" << file_size << ") at offset " << (static_cast<std::size_t>(in.tellg())) << "\n"; return false; } if (len == 0) { out.clear(); return true; } out.resize(len); if (!read_bytes(in, out.data(), len)) { std::cerr << " ERROR: truncated string data for " << field_name << " (expected " << len << " bytes) at offset " << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n"; return false; } return true; } // ---- check if string looks like valid UTF-8 text ---- static bool looks_printable(const std::string& s) { for (unsigned char c : s) { if (c < 0x20 && c != '\t' && c != '\n' && c != '\r') return false; } return true; } int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "Usage: mde_check <file.mdb>\n"; return 1; } const char* path = argv[1]; if (!fs::exists(path)) { std::cerr << "File not found: " << path << "\n"; return 1; } auto file_size = fs::file_size(path); std::cout << "File: " << path << "\n"; std::cout << "Size: " << file_size << " bytes\n"; std::ifstream in(path, std::ios::binary); if (!in.is_open()) { std::cerr << "Cannot open file\n"; return 1; } // ---- magic ---- char magic[4]{}; if (!read_bytes(in, magic, 4)) { std::cerr << "ERROR: file too small for magic bytes\n"; return 1; } bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0); bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0); if (!is_v1 && !is_v2) { std::cerr << "ERROR: unknown magic: " << magic[0] << magic[1] << magic[2] << magic[3] << " (0x" << std::hex << (unsigned)(unsigned char)magic[0] << (unsigned)(unsigned char)magic[1] << (unsigned)(unsigned char)magic[2] << (unsigned)(unsigned char)magic[3] << std::dec << ")\n"; return 1; } std::cout << "Format: " << (is_v2 ? "MDE2" : "MDE1") << "\n"; // ---- num stores ---- std::uint32_t num_stores; if (!read_u32(in, num_stores)) { std::cerr << "ERROR: truncated store count\n"; return 1; } std::cout << "Stores: " << num_stores << "\n\n"; std::size_t total_albums = 0; std::size_t total_media = 0; std::size_t total_data_bytes = 0; std::size_t zero_size_media = 0; std::size_t errors = 0; for (std::uint32_t si = 0; si < num_stores; ++si) { std::string store_id, store_name, store_created; if (!read_str(in, store_id, "store_id", file_size) || !read_str(in, store_name, "store_name", file_size) || !read_str(in, store_created, "store_created", file_size)) { std::cerr << "ERROR: truncated store header at store #" << si << "\n"; ++errors; break; } std::cout << "Store #" << si << ": id=" << store_id << " name=\"" << store_name << "\"" << " created=" << store_created << "\n"; if (!looks_printable(store_id) || !looks_printable(store_name)) { std::cerr << " WARNING: store fields contain non-printable characters\n"; } std::uint32_t num_albums; if (!read_u32(in, num_albums)) { std::cerr << " ERROR: truncated album count\n"; ++errors; break; } std::cout << " Albums: " << num_albums << "\n"; for (std::uint32_t ai = 0; ai < num_albums; ++ai) { std::string album_id, album_name, album_created; if (!read_str(in, album_id, "album_id", file_size) || !read_str(in, album_name, "album_name", file_size) || !read_str(in, album_created, "album_created", file_size)) { std::cerr << " ERROR: truncated album header at album #" << ai << " in store #" << si << "\n"; ++errors; goto done; } bool album_public = false; if (is_v2) { std::uint8_t pub; if (!read_bytes(in, &pub, 1)) { std::cerr << " ERROR: truncated is_public byte\n"; ++errors; goto done; } album_public = (pub != 0); if (pub > 1) { std::cerr << " WARNING: is_public=" << (int)pub << " (expected 0 or 1)\n"; } } std::cout << " Album #" << ai << ": id=" << album_id << " name=\"" << album_name << "\"" << " created=" << album_created; if (is_v2) std::cout << " public=" << (album_public ? "yes" : "no"); std::cout << "\n"; if (!looks_printable(album_id) || !looks_printable(album_name)) { std::cerr << " WARNING: album fields contain non-printable characters\n"; } std::uint32_t num_media; if (!read_u32(in, num_media)) { std::cerr << " ERROR: truncated media count\n"; ++errors; goto done; } std::cout << " Media: " << num_media << "\n"; for (std::uint32_t mi = 0; mi < num_media; ++mi) { auto entry_offset = static_cast<std::size_t>(in.tellg()); std::string id, filename, kind, content_type, created; if (!read_str(in, id, "media_id", file_size) || !read_str(in, filename, "original_filename", file_size) || !read_str(in, kind, "media_kind", file_size) || !read_str(in, content_type, "content_type", file_size) || !read_str(in, created, "created_at", file_size)) { std::cerr << " ERROR: truncated media header at media #" << mi << " offset=" << entry_offset << "\n"; ++errors; goto done; } std::uint64_t size_bytes; if (!read_u64(in, size_bytes)) { std::cerr << " ERROR: truncated size_bytes at media #" << mi << "\n"; ++errors; goto done; } auto data_offset = static_cast<std::size_t>(in.tellg()); // Validate size bool size_ok = true; if (size_bytes > file_size) { std::cerr << " ERROR: media #" << mi << " id=" << id << " size_bytes=" << size_bytes << " exceeds file size!\n"; ++errors; size_ok = false; } if (data_offset + size_bytes > file_size) { std::cerr << " ERROR: media #" << mi << " id=" << id << " data would extend past EOF" << " (data_offset=" << data_offset << " + size=" << size_bytes << " > file_size=" << file_size << ")\n"; ++errors; size_ok = false; } if (size_bytes == 0) { ++zero_size_media; } // Print entry summary std::cout << " [" << mi << "] id=" << id << " file=\"" << filename << "\"" << " kind=" << kind << " type=" << content_type << " size=" << size_bytes << " @offset=" << data_offset; if (!size_ok) { std::cout << " CORRUPT"; goto done; } // Skip over media data if (size_bytes > 0) { in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); if (!in.good()) { std::cout << " TRUNCATED\n"; std::cerr << " ERROR: could not seek past media data\n"; ++errors; goto done; } } std::cout << " OK\n"; total_data_bytes += size_bytes; ++total_media; } ++total_albums; } } done: auto end_pos = static_cast<std::size_t>(in.tellg()); std::size_t remaining = 0; if (end_pos < file_size && in.good()) { remaining = file_size - end_pos; } std::cout << "\n=== Summary ===\n"; std::cout << "Format: " << (is_v2 ? "MDE2" : "MDE1") << "\n"; std::cout << "File size: " << file_size << " bytes\n"; std::cout << "Stores: " << num_stores << "\n"; std::cout << "Albums: " << total_albums << "\n"; std::cout << "Media entries: " << total_media << "\n"; std::cout << "Total data: " << total_data_bytes << " bytes\n"; if (zero_size_media > 0) { std::cout << "Zero-size: " << zero_size_media << " entries (missing data)\n"; } if (remaining > 0) { std::cout << "Trailing bytes: " << remaining << " (unexpected)\n"; ++errors; } std::cout << "Errors: " << errors << "\n"; std::cout << "Result: " << (errors == 0 ? "OK" : "ERRORS FOUND") << "\n"; return errors == 0 ? 0 : 1; } utils/mde_extract.cpp 0 → 100644 +279 −0 Original line number Diff line number Diff line // mde_extract — extract media files from MDE1/MDE2 export files // // Usage: // mde_extract <file.mdb> <output_dir> — extract all media // mde_extract <file.mdb> <output_dir> --id <id> — extract single entry // mde_extract <file.mdb> <output_dir> --store <id> — extract one store // mde_extract <file.mdb> <output_dir> --album <id> — extract one album // mde_extract <file.mdb> <output_dir> --list — list entries only #include <cstdint> #include <cstring> #include <filesystem> #include <fstream> #include <iostream> #include <string> #include <vector> namespace fs = std::filesystem; // ---- format constants ---- static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'}; static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'}; // ---- binary read helpers ---- static bool read_bytes(std::istream& in, void* dest, std::size_t n) { in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n)); return static_cast<std::size_t>(in.gcount()) == n; } static bool read_u32(std::istream& in, std::uint32_t& out) { return read_bytes(in, &out, 4); } static bool read_u64(std::istream& in, std::uint64_t& out) { return read_bytes(in, &out, 8); } static bool read_str(std::istream& in, std::string& out) { std::uint32_t len; if (!read_u32(in, len)) return false; if (len == 0) { out.clear(); return true; } out.resize(len); return read_bytes(in, out.data(), len); } // ---- sanitise filename for filesystem ---- static std::string safe_filename(const std::string& name) { std::string out; out.reserve(name.size()); for (char c : name) { if (c == '/' || c == '\\' || c == '\0' || c == ':' || c == '*' || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') out += '_'; else out += c; } if (out.empty()) out = "unnamed"; return out; } static void usage() { std::cerr << "Usage: mde_extract <file.mdb> <output_dir> [options]\n" << " --list List entries without extracting\n" << " --id <media_id> Extract only this media entry\n" << " --store <id> Extract only media from this store\n" << " --album <id> Extract only media from this album\n"; } int main(int argc, char* argv[]) { if (argc < 3) { usage(); return 1; } const std::string input_path = argv[1]; const std::string output_dir = argv[2]; bool list_only = false; std::string filter_media_id; std::string filter_store_id; std::string filter_album_id; for (int i = 3; i < argc; ++i) { std::string arg = argv[i]; if (arg == "--list") { list_only = true; } else if (arg == "--id" && i + 1 < argc) { filter_media_id = argv[++i]; } else if (arg == "--store" && i + 1 < argc) { filter_store_id = argv[++i]; } else if (arg == "--album" && i + 1 < argc) { filter_album_id = argv[++i]; } else { std::cerr << "Unknown option: " << arg << "\n"; usage(); return 1; } } if (!fs::exists(input_path)) { std::cerr << "File not found: " << input_path << "\n"; return 1; } std::ifstream in(input_path, std::ios::binary); if (!in.is_open()) { std::cerr << "Cannot open file: " << input_path << "\n"; return 1; } // ---- magic ---- char magic[4]{}; if (!read_bytes(in, magic, 4)) { std::cerr << "ERROR: file too small\n"; return 1; } bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0); bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0); if (!is_v1 && !is_v2) { std::cerr << "ERROR: not an MDE file (bad magic)\n"; return 1; } if (!list_only) { std::error_code ec; fs::create_directories(output_dir, ec); if (ec) { std::cerr << "Cannot create output dir: " << ec.message() << "\n"; return 1; } } std::uint32_t num_stores; if (!read_u32(in, num_stores)) { std::cerr << "ERROR: truncated store count\n"; return 1; } std::size_t extracted = 0; std::size_t skipped = 0; for (std::uint32_t si = 0; si < num_stores && in.good(); ++si) { std::string store_id, store_name, store_created; if (!read_str(in, store_id) || !read_str(in, store_name) || !read_str(in, store_created)) { std::cerr << "ERROR: truncated store header\n"; return 1; } bool store_match = filter_store_id.empty() || filter_store_id == store_id; std::uint32_t num_albums; if (!read_u32(in, num_albums)) { std::cerr << "ERROR: truncated album count\n"; return 1; } for (std::uint32_t ai = 0; ai < num_albums && in.good(); ++ai) { std::string album_id, album_name, album_created; if (!read_str(in, album_id) || !read_str(in, album_name) || !read_str(in, album_created)) { std::cerr << "ERROR: truncated album header\n"; return 1; } if (is_v2) { std::uint8_t pub; if (!read_bytes(in, &pub, 1)) { std::cerr << "ERROR: truncated is_public\n"; return 1; } } bool album_match = filter_album_id.empty() || filter_album_id == album_id; std::uint32_t num_media; if (!read_u32(in, num_media)) { std::cerr << "ERROR: truncated media count\n"; return 1; } for (std::uint32_t mi = 0; mi < num_media && in.good(); ++mi) { std::string id, filename, kind, content_type, created; if (!read_str(in, id) || !read_str(in, filename) || !read_str(in, kind) || !read_str(in, content_type) || !read_str(in, created)) { std::cerr << "ERROR: truncated media entry\n"; return 1; } std::uint64_t size_bytes; if (!read_u64(in, size_bytes)) { std::cerr << "ERROR: truncated size_bytes\n"; return 1; } bool media_match = filter_media_id.empty() || filter_media_id == id; bool want = store_match && album_match && media_match; if (list_only) { if (want) { std::cout << "store=" << store_id << " album=" << album_id << " id=" << id << " file=\"" << filename << "\"" << " kind=" << kind << " type=" << content_type << " size=" << size_bytes << "\n"; } // Skip data if (size_bytes > 0) in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); continue; } if (!want || size_bytes == 0) { // Skip data if (size_bytes > 0) in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); if (want && size_bytes == 0) std::cerr << " SKIP (zero size): " << id << " " << filename << "\n"; ++skipped; continue; } // Build output path: output_dir/store_name/album_name/filename fs::path out_dir = fs::path(output_dir) / safe_filename(store_name) / safe_filename(album_name); std::error_code ec; fs::create_directories(out_dir, ec); // Use original filename, append id if collision std::string out_name = safe_filename(filename); fs::path out_path = out_dir / out_name; if (fs::exists(out_path)) { // Deduplicate: prepend media id out_path = out_dir / (id + "_" + out_name); } std::ofstream out(out_path, std::ios::binary | std::ios::trunc); if (!out.is_open()) { std::cerr << "ERROR: cannot create " << out_path << "\n"; in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); ++skipped; continue; } // Stream copy in 1MB chunks constexpr std::size_t BUF_SIZE = 1024 * 1024; std::vector<char> buf(BUF_SIZE); std::uint64_t remaining = size_bytes; bool ok = true; while (remaining > 0) { auto chunk = static_cast<std::streamsize>( std::min<std::uint64_t>(remaining, BUF_SIZE)); in.read(buf.data(), chunk); auto got = in.gcount(); if (got <= 0) { std::cerr << "ERROR: truncated data for " << id << "\n"; ok = false; break; } out.write(buf.data(), got); remaining -= static_cast<std::uint64_t>(got); } out.close(); if (ok) { std::cout << out_path.string() << " (" << size_bytes << " bytes)\n"; ++extracted; } else { ++skipped; } } } } if (!list_only) { std::cout << "\nExtracted: " << extracted << " files\n"; if (skipped > 0) std::cout << "Skipped: " << skipped << "\n"; } return 0; } Loading
CMakeLists.txt +6 −0 Original line number Diff line number Diff line Loading @@ -82,6 +82,12 @@ if(NOT CLIENT_ONLY) add_executable(mds_convert src/mds_convert.cpp) target_compile_features(mds_convert PRIVATE cxx_std_20) add_executable(mde_check utils/mde_check.cpp) target_compile_features(mde_check PRIVATE cxx_std_20) add_executable(mde_extract utils/mde_extract.cpp) target_compile_features(mde_extract PRIVATE cxx_std_20) add_executable(mediadb_test test/mediadb_test.cpp src/backend.cpp Loading
utils/mde_check.cpp 0 → 100644 +307 −0 Original line number Diff line number Diff line // mde_check — validate MDE1/MDE2 export files // // Usage: mde_check <file.mdb> // Parses the MDE export file and reports structure, entry counts, // and any format errors (truncated strings, size mismatches, etc.). #include <cstdint> #include <cstring> #include <filesystem> #include <fstream> #include <iostream> #include <string> #include <vector> namespace fs = std::filesystem; // ---- format constants ---- static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'}; static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'}; // ---- binary read helpers ---- static bool read_bytes(std::istream& in, void* dest, std::size_t n) { in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n)); return static_cast<std::size_t>(in.gcount()) == n; } static bool read_u32(std::istream& in, std::uint32_t& out) { return read_bytes(in, &out, 4); } static bool read_u64(std::istream& in, std::uint64_t& out) { return read_bytes(in, &out, 8); } static bool read_str(std::istream& in, std::string& out, const char* field_name, std::size_t file_size) { std::uint32_t len; if (!read_u32(in, len)) { std::cerr << " ERROR: truncated string length for " << field_name << " at offset " << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n"; return false; } if (len > file_size) { std::cerr << " ERROR: string length " << len << " for " << field_name << " exceeds file size (" << file_size << ") at offset " << (static_cast<std::size_t>(in.tellg())) << "\n"; return false; } if (len == 0) { out.clear(); return true; } out.resize(len); if (!read_bytes(in, out.data(), len)) { std::cerr << " ERROR: truncated string data for " << field_name << " (expected " << len << " bytes) at offset " << (static_cast<std::size_t>(in.tellg()) - in.gcount()) << "\n"; return false; } return true; } // ---- check if string looks like valid UTF-8 text ---- static bool looks_printable(const std::string& s) { for (unsigned char c : s) { if (c < 0x20 && c != '\t' && c != '\n' && c != '\r') return false; } return true; } int main(int argc, char* argv[]) { if (argc < 2) { std::cerr << "Usage: mde_check <file.mdb>\n"; return 1; } const char* path = argv[1]; if (!fs::exists(path)) { std::cerr << "File not found: " << path << "\n"; return 1; } auto file_size = fs::file_size(path); std::cout << "File: " << path << "\n"; std::cout << "Size: " << file_size << " bytes\n"; std::ifstream in(path, std::ios::binary); if (!in.is_open()) { std::cerr << "Cannot open file\n"; return 1; } // ---- magic ---- char magic[4]{}; if (!read_bytes(in, magic, 4)) { std::cerr << "ERROR: file too small for magic bytes\n"; return 1; } bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0); bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0); if (!is_v1 && !is_v2) { std::cerr << "ERROR: unknown magic: " << magic[0] << magic[1] << magic[2] << magic[3] << " (0x" << std::hex << (unsigned)(unsigned char)magic[0] << (unsigned)(unsigned char)magic[1] << (unsigned)(unsigned char)magic[2] << (unsigned)(unsigned char)magic[3] << std::dec << ")\n"; return 1; } std::cout << "Format: " << (is_v2 ? "MDE2" : "MDE1") << "\n"; // ---- num stores ---- std::uint32_t num_stores; if (!read_u32(in, num_stores)) { std::cerr << "ERROR: truncated store count\n"; return 1; } std::cout << "Stores: " << num_stores << "\n\n"; std::size_t total_albums = 0; std::size_t total_media = 0; std::size_t total_data_bytes = 0; std::size_t zero_size_media = 0; std::size_t errors = 0; for (std::uint32_t si = 0; si < num_stores; ++si) { std::string store_id, store_name, store_created; if (!read_str(in, store_id, "store_id", file_size) || !read_str(in, store_name, "store_name", file_size) || !read_str(in, store_created, "store_created", file_size)) { std::cerr << "ERROR: truncated store header at store #" << si << "\n"; ++errors; break; } std::cout << "Store #" << si << ": id=" << store_id << " name=\"" << store_name << "\"" << " created=" << store_created << "\n"; if (!looks_printable(store_id) || !looks_printable(store_name)) { std::cerr << " WARNING: store fields contain non-printable characters\n"; } std::uint32_t num_albums; if (!read_u32(in, num_albums)) { std::cerr << " ERROR: truncated album count\n"; ++errors; break; } std::cout << " Albums: " << num_albums << "\n"; for (std::uint32_t ai = 0; ai < num_albums; ++ai) { std::string album_id, album_name, album_created; if (!read_str(in, album_id, "album_id", file_size) || !read_str(in, album_name, "album_name", file_size) || !read_str(in, album_created, "album_created", file_size)) { std::cerr << " ERROR: truncated album header at album #" << ai << " in store #" << si << "\n"; ++errors; goto done; } bool album_public = false; if (is_v2) { std::uint8_t pub; if (!read_bytes(in, &pub, 1)) { std::cerr << " ERROR: truncated is_public byte\n"; ++errors; goto done; } album_public = (pub != 0); if (pub > 1) { std::cerr << " WARNING: is_public=" << (int)pub << " (expected 0 or 1)\n"; } } std::cout << " Album #" << ai << ": id=" << album_id << " name=\"" << album_name << "\"" << " created=" << album_created; if (is_v2) std::cout << " public=" << (album_public ? "yes" : "no"); std::cout << "\n"; if (!looks_printable(album_id) || !looks_printable(album_name)) { std::cerr << " WARNING: album fields contain non-printable characters\n"; } std::uint32_t num_media; if (!read_u32(in, num_media)) { std::cerr << " ERROR: truncated media count\n"; ++errors; goto done; } std::cout << " Media: " << num_media << "\n"; for (std::uint32_t mi = 0; mi < num_media; ++mi) { auto entry_offset = static_cast<std::size_t>(in.tellg()); std::string id, filename, kind, content_type, created; if (!read_str(in, id, "media_id", file_size) || !read_str(in, filename, "original_filename", file_size) || !read_str(in, kind, "media_kind", file_size) || !read_str(in, content_type, "content_type", file_size) || !read_str(in, created, "created_at", file_size)) { std::cerr << " ERROR: truncated media header at media #" << mi << " offset=" << entry_offset << "\n"; ++errors; goto done; } std::uint64_t size_bytes; if (!read_u64(in, size_bytes)) { std::cerr << " ERROR: truncated size_bytes at media #" << mi << "\n"; ++errors; goto done; } auto data_offset = static_cast<std::size_t>(in.tellg()); // Validate size bool size_ok = true; if (size_bytes > file_size) { std::cerr << " ERROR: media #" << mi << " id=" << id << " size_bytes=" << size_bytes << " exceeds file size!\n"; ++errors; size_ok = false; } if (data_offset + size_bytes > file_size) { std::cerr << " ERROR: media #" << mi << " id=" << id << " data would extend past EOF" << " (data_offset=" << data_offset << " + size=" << size_bytes << " > file_size=" << file_size << ")\n"; ++errors; size_ok = false; } if (size_bytes == 0) { ++zero_size_media; } // Print entry summary std::cout << " [" << mi << "] id=" << id << " file=\"" << filename << "\"" << " kind=" << kind << " type=" << content_type << " size=" << size_bytes << " @offset=" << data_offset; if (!size_ok) { std::cout << " CORRUPT"; goto done; } // Skip over media data if (size_bytes > 0) { in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); if (!in.good()) { std::cout << " TRUNCATED\n"; std::cerr << " ERROR: could not seek past media data\n"; ++errors; goto done; } } std::cout << " OK\n"; total_data_bytes += size_bytes; ++total_media; } ++total_albums; } } done: auto end_pos = static_cast<std::size_t>(in.tellg()); std::size_t remaining = 0; if (end_pos < file_size && in.good()) { remaining = file_size - end_pos; } std::cout << "\n=== Summary ===\n"; std::cout << "Format: " << (is_v2 ? "MDE2" : "MDE1") << "\n"; std::cout << "File size: " << file_size << " bytes\n"; std::cout << "Stores: " << num_stores << "\n"; std::cout << "Albums: " << total_albums << "\n"; std::cout << "Media entries: " << total_media << "\n"; std::cout << "Total data: " << total_data_bytes << " bytes\n"; if (zero_size_media > 0) { std::cout << "Zero-size: " << zero_size_media << " entries (missing data)\n"; } if (remaining > 0) { std::cout << "Trailing bytes: " << remaining << " (unexpected)\n"; ++errors; } std::cout << "Errors: " << errors << "\n"; std::cout << "Result: " << (errors == 0 ? "OK" : "ERRORS FOUND") << "\n"; return errors == 0 ? 0 : 1; }
utils/mde_extract.cpp 0 → 100644 +279 −0 Original line number Diff line number Diff line // mde_extract — extract media files from MDE1/MDE2 export files // // Usage: // mde_extract <file.mdb> <output_dir> — extract all media // mde_extract <file.mdb> <output_dir> --id <id> — extract single entry // mde_extract <file.mdb> <output_dir> --store <id> — extract one store // mde_extract <file.mdb> <output_dir> --album <id> — extract one album // mde_extract <file.mdb> <output_dir> --list — list entries only #include <cstdint> #include <cstring> #include <filesystem> #include <fstream> #include <iostream> #include <string> #include <vector> namespace fs = std::filesystem; // ---- format constants ---- static constexpr char EXPORT_MAGIC_V1[4] = {'M','D','E','1'}; static constexpr char EXPORT_MAGIC_V2[4] = {'M','D','E','2'}; // ---- binary read helpers ---- static bool read_bytes(std::istream& in, void* dest, std::size_t n) { in.read(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(n)); return static_cast<std::size_t>(in.gcount()) == n; } static bool read_u32(std::istream& in, std::uint32_t& out) { return read_bytes(in, &out, 4); } static bool read_u64(std::istream& in, std::uint64_t& out) { return read_bytes(in, &out, 8); } static bool read_str(std::istream& in, std::string& out) { std::uint32_t len; if (!read_u32(in, len)) return false; if (len == 0) { out.clear(); return true; } out.resize(len); return read_bytes(in, out.data(), len); } // ---- sanitise filename for filesystem ---- static std::string safe_filename(const std::string& name) { std::string out; out.reserve(name.size()); for (char c : name) { if (c == '/' || c == '\\' || c == '\0' || c == ':' || c == '*' || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') out += '_'; else out += c; } if (out.empty()) out = "unnamed"; return out; } static void usage() { std::cerr << "Usage: mde_extract <file.mdb> <output_dir> [options]\n" << " --list List entries without extracting\n" << " --id <media_id> Extract only this media entry\n" << " --store <id> Extract only media from this store\n" << " --album <id> Extract only media from this album\n"; } int main(int argc, char* argv[]) { if (argc < 3) { usage(); return 1; } const std::string input_path = argv[1]; const std::string output_dir = argv[2]; bool list_only = false; std::string filter_media_id; std::string filter_store_id; std::string filter_album_id; for (int i = 3; i < argc; ++i) { std::string arg = argv[i]; if (arg == "--list") { list_only = true; } else if (arg == "--id" && i + 1 < argc) { filter_media_id = argv[++i]; } else if (arg == "--store" && i + 1 < argc) { filter_store_id = argv[++i]; } else if (arg == "--album" && i + 1 < argc) { filter_album_id = argv[++i]; } else { std::cerr << "Unknown option: " << arg << "\n"; usage(); return 1; } } if (!fs::exists(input_path)) { std::cerr << "File not found: " << input_path << "\n"; return 1; } std::ifstream in(input_path, std::ios::binary); if (!in.is_open()) { std::cerr << "Cannot open file: " << input_path << "\n"; return 1; } // ---- magic ---- char magic[4]{}; if (!read_bytes(in, magic, 4)) { std::cerr << "ERROR: file too small\n"; return 1; } bool is_v2 = (std::memcmp(magic, EXPORT_MAGIC_V2, 4) == 0); bool is_v1 = (std::memcmp(magic, EXPORT_MAGIC_V1, 4) == 0); if (!is_v1 && !is_v2) { std::cerr << "ERROR: not an MDE file (bad magic)\n"; return 1; } if (!list_only) { std::error_code ec; fs::create_directories(output_dir, ec); if (ec) { std::cerr << "Cannot create output dir: " << ec.message() << "\n"; return 1; } } std::uint32_t num_stores; if (!read_u32(in, num_stores)) { std::cerr << "ERROR: truncated store count\n"; return 1; } std::size_t extracted = 0; std::size_t skipped = 0; for (std::uint32_t si = 0; si < num_stores && in.good(); ++si) { std::string store_id, store_name, store_created; if (!read_str(in, store_id) || !read_str(in, store_name) || !read_str(in, store_created)) { std::cerr << "ERROR: truncated store header\n"; return 1; } bool store_match = filter_store_id.empty() || filter_store_id == store_id; std::uint32_t num_albums; if (!read_u32(in, num_albums)) { std::cerr << "ERROR: truncated album count\n"; return 1; } for (std::uint32_t ai = 0; ai < num_albums && in.good(); ++ai) { std::string album_id, album_name, album_created; if (!read_str(in, album_id) || !read_str(in, album_name) || !read_str(in, album_created)) { std::cerr << "ERROR: truncated album header\n"; return 1; } if (is_v2) { std::uint8_t pub; if (!read_bytes(in, &pub, 1)) { std::cerr << "ERROR: truncated is_public\n"; return 1; } } bool album_match = filter_album_id.empty() || filter_album_id == album_id; std::uint32_t num_media; if (!read_u32(in, num_media)) { std::cerr << "ERROR: truncated media count\n"; return 1; } for (std::uint32_t mi = 0; mi < num_media && in.good(); ++mi) { std::string id, filename, kind, content_type, created; if (!read_str(in, id) || !read_str(in, filename) || !read_str(in, kind) || !read_str(in, content_type) || !read_str(in, created)) { std::cerr << "ERROR: truncated media entry\n"; return 1; } std::uint64_t size_bytes; if (!read_u64(in, size_bytes)) { std::cerr << "ERROR: truncated size_bytes\n"; return 1; } bool media_match = filter_media_id.empty() || filter_media_id == id; bool want = store_match && album_match && media_match; if (list_only) { if (want) { std::cout << "store=" << store_id << " album=" << album_id << " id=" << id << " file=\"" << filename << "\"" << " kind=" << kind << " type=" << content_type << " size=" << size_bytes << "\n"; } // Skip data if (size_bytes > 0) in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); continue; } if (!want || size_bytes == 0) { // Skip data if (size_bytes > 0) in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); if (want && size_bytes == 0) std::cerr << " SKIP (zero size): " << id << " " << filename << "\n"; ++skipped; continue; } // Build output path: output_dir/store_name/album_name/filename fs::path out_dir = fs::path(output_dir) / safe_filename(store_name) / safe_filename(album_name); std::error_code ec; fs::create_directories(out_dir, ec); // Use original filename, append id if collision std::string out_name = safe_filename(filename); fs::path out_path = out_dir / out_name; if (fs::exists(out_path)) { // Deduplicate: prepend media id out_path = out_dir / (id + "_" + out_name); } std::ofstream out(out_path, std::ios::binary | std::ios::trunc); if (!out.is_open()) { std::cerr << "ERROR: cannot create " << out_path << "\n"; in.seekg(static_cast<std::streamoff>(size_bytes), std::ios::cur); ++skipped; continue; } // Stream copy in 1MB chunks constexpr std::size_t BUF_SIZE = 1024 * 1024; std::vector<char> buf(BUF_SIZE); std::uint64_t remaining = size_bytes; bool ok = true; while (remaining > 0) { auto chunk = static_cast<std::streamsize>( std::min<std::uint64_t>(remaining, BUF_SIZE)); in.read(buf.data(), chunk); auto got = in.gcount(); if (got <= 0) { std::cerr << "ERROR: truncated data for " << id << "\n"; ok = false; break; } out.write(buf.data(), got); remaining -= static_cast<std::uint64_t>(got); } out.close(); if (ok) { std::cout << out_path.string() << " (" << size_bytes << " bytes)\n"; ++extracted; } else { ++skipped; } } } } if (!list_only) { std::cout << "\nExtracted: " << extracted << " files\n"; if (skipped > 0) std::cout << "Skipped: " << skipped << "\n"; } return 0; }