Commit ed103f8d authored by jan.koester's avatar jan.koester
Browse files

js can have attr

parent f5535978
Loading
Loading
Loading
Loading
+83 −30
Original line number Diff line number Diff line
@@ -1470,65 +1470,118 @@ size_t libhtmlpp::ScriptElement::parseElement(
    bool& termination
){
termination = false;

    el = std::make_unique<ScriptElement>();
    auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig
    auto* self = static_cast<ScriptElement*>(el.get());

    size_t i = start;
    if (i >= in.size() || in[i] != '<') {
        // If it doesn't start with '<', we can't parse a tag here.
        return start;
    }

    // Helper to perform case-insensitive comparison
    auto iequals = [](char a, char b) {
        return std::tolower(static_cast<unsigned char>(a)) ==
               std::tolower(static_cast<unsigned char>(b));
    };

    // Helper to perform case-insensitive match for a keyword starting at 'pos'
    auto match_ci = [&](size_t pos, const char* k) -> bool {
        for (size_t i = 0; k[i]; ++i) {
            if (pos + i >= in.size()) return false;
            if (!iequals(in[pos + i], k[i])) return false;
        for (size_t j = 0; k[j]; ++j) {
            if (pos + j >= in.size() || !iequals(in[pos + j], k[j])) {
                return false;
            }
        }
        return true;
    };

    size_t i = start;
    // --- 1. Validate Opening Tag Name (<script) ---
    ++i; // Consume '<'

    if (i >= in.size() || in[i] != '<') return i;
    // Skip leading whitespace after '<'
    while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) {
        ++i;
    }

    ++i; // '<'
    while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
    const char* kw = "script";
    for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) {
        if (!iequals(in[i], kw[k])) {
            while (i < in.size() && in[i] != '>') ++i;
            if (i < in.size()) ++i; // '>'
    const char* tag_keyword = "script";
    size_t keyword_len = std::char_traits<char>::length(tag_keyword);

    if (i + keyword_len >= in.size() || !match_ci(i, tag_keyword)) {
        // Tag name doesn't match "script"
        // Skip till next '>' and return the position after it.
        while (i < in.size() && in[i] != '>') {
            ++i;
        }
        if (i < in.size()) {
            ++i; // Consume '>'
        }
        return i;
    }
    i += keyword_len; // Consume "script"

    // --- 2. Extract Opening Tag and Attributes ---
    // Find the closing '>' of the opening tag.
    size_t tag_end = i;
    while (i < in.size() && in[i] != '>') {
        ++i;
    }

    while (i < in.size() && in[i] != '>') ++i;
    if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren
    // Capture the raw opening tag data (including '<script' and attributes) for serialization.
    if (i > start && in[i] == '>') {
        // Copy data from '<' (start) up to and including '>' (i)
        std::vector<char> raw_tag_data(in.begin() + start, in.begin() + i + 1);
        self->_serialelize(raw_tag_data);
    }

    if (i >= in.size() || in[i] != '>') {
        // The tag was never closed (e.g., '<script src="..." EOF')
        return i;
    }

    ++i; // Consume '>' and move to content start
    size_t content_begin = i;

    // --- 3. Extract Script Content (CDATA-like section) ---
    for (; i < in.size(); ++i) {
        // Look for the start of the closing tag sequence: </script
        if (in[i] == '<' && match_ci(i, "</script")) {
            size_t j = i + 8; // strlen("</script") == 8
            while (j < in.size() && in[j] != '>') ++j;
            size_t content_end = i;

            if (i > content_begin) {
                self->_Script.reserve(self->_Script.size() + (i - content_begin));
                std::copy(in.begin() + content_begin, in.begin() + i,
                          std::back_inserter(self->_Script));
            // 3a. Extract content preceding the closing tag
            if (content_end > content_begin) {
                // FIX: Use std::vector::insert instead of non-existent append
                self->_Script.insert(self->_Script.end(),
                                     in.begin() + content_begin,
                                     in.begin() + content_end);
            }

            if (j < in.size() && in[j] == '>') {
                i = j + 1;
            // 3b. Find the end of the closing tag: </script>
            size_t closing_tag_end_pos = i + keyword_len + 2; // +2 for '</'

            // Skip any characters/whitespace between </script and the final '>'
            while (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] != '>') {
                ++closing_tag_end_pos;
            }

            if (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] == '>') {
                i = closing_tag_end_pos + 1; // Position after '>'
                return i;
            }

            // If we found "</script" but not the final ">", return the last processed position.
            return closing_tag_end_pos;
        }
    }

    // --- 4. End of Input Reached ---
    // If the input ends without a closing </script> tag, capture the remaining content.
    if (in.size() > content_begin) {
        self->_Script.reserve(self->_Script.size() + (in.size() - content_begin));
        std::copy(in.begin() + content_begin, in.end(),
                  std::back_inserter(self->_Script));
        // FIX: Use std::vector::insert instead of non-existent append
        self->_Script.insert(self->_Script.end(),
                             in.begin() + content_begin,
                             in.end());
    }

    return i;
}