Loading src/html.cpp +83 −30 Original line number Diff line number Diff line Loading @@ -1470,65 +1470,118 @@ size_t libhtmlpp::ScriptElement::parseElement( bool& termination ){ termination = false; el = std::make_unique<ScriptElement>(); auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig auto* self = static_cast<ScriptElement*>(el.get()); size_t i = start; if (i >= in.size() || in[i] != '<') { // If it doesn't start with '<', we can't parse a tag here. return start; } // Helper to perform case-insensitive comparison auto iequals = [](char a, char b) { return std::tolower(static_cast<unsigned char>(a)) == std::tolower(static_cast<unsigned char>(b)); }; // Helper to perform case-insensitive match for a keyword starting at 'pos' auto match_ci = [&](size_t pos, const char* k) -> bool { for (size_t i = 0; k[i]; ++i) { if (pos + i >= in.size()) return false; if (!iequals(in[pos + i], k[i])) return false; for (size_t j = 0; k[j]; ++j) { if (pos + j >= in.size() || !iequals(in[pos + j], k[j])) { return false; } } return true; }; size_t i = start; // --- 1. Validate Opening Tag Name (<script) --- ++i; // Consume '<' if (i >= in.size() || in[i] != '<') return i; // Skip leading whitespace after '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) { ++i; } ++i; // '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; const char* kw = "script"; for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) { if (!iequals(in[i], kw[k])) { while (i < in.size() && in[i] != '>') ++i; if (i < in.size()) ++i; // '>' const char* tag_keyword = "script"; size_t keyword_len = std::char_traits<char>::length(tag_keyword); if (i + keyword_len >= in.size() || !match_ci(i, tag_keyword)) { // Tag name doesn't match "script" // Skip till next '>' and return the position after it. while (i < in.size() && in[i] != '>') { ++i; } if (i < in.size()) { ++i; // Consume '>' } return i; } i += keyword_len; // Consume "script" // --- 2. Extract Opening Tag and Attributes --- // Find the closing '>' of the opening tag. size_t tag_end = i; while (i < in.size() && in[i] != '>') { ++i; } while (i < in.size() && in[i] != '>') ++i; if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren // Capture the raw opening tag data (including '<script' and attributes) for serialization. if (i > start && in[i] == '>') { // Copy data from '<' (start) up to and including '>' (i) std::vector<char> raw_tag_data(in.begin() + start, in.begin() + i + 1); self->_serialelize(raw_tag_data); } if (i >= in.size() || in[i] != '>') { // The tag was never closed (e.g., '<script src="..." EOF') return i; } ++i; // Consume '>' and move to content start size_t content_begin = i; // --- 3. Extract Script Content (CDATA-like section) --- for (; i < in.size(); ++i) { // Look for the start of the closing tag sequence: </script if (in[i] == '<' && match_ci(i, "</script")) { size_t j = i + 8; // strlen("</script") == 8 while (j < in.size() && in[j] != '>') ++j; size_t content_end = i; if (i > content_begin) { self->_Script.reserve(self->_Script.size() + (i - content_begin)); std::copy(in.begin() + content_begin, in.begin() + i, std::back_inserter(self->_Script)); // 3a. Extract content preceding the closing tag if (content_end > content_begin) { // FIX: Use std::vector::insert instead of non-existent append self->_Script.insert(self->_Script.end(), in.begin() + content_begin, in.begin() + content_end); } if (j < in.size() && in[j] == '>') { i = j + 1; // 3b. Find the end of the closing tag: </script> size_t closing_tag_end_pos = i + keyword_len + 2; // +2 for '</' // Skip any characters/whitespace between </script and the final '>' while (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] != '>') { ++closing_tag_end_pos; } if (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] == '>') { i = closing_tag_end_pos + 1; // Position after '>' return i; } // If we found "</script" but not the final ">", return the last processed position. return closing_tag_end_pos; } } // --- 4. End of Input Reached --- // If the input ends without a closing </script> tag, capture the remaining content. if (in.size() > content_begin) { self->_Script.reserve(self->_Script.size() + (in.size() - content_begin)); std::copy(in.begin() + content_begin, in.end(), std::back_inserter(self->_Script)); // FIX: Use std::vector::insert instead of non-existent append self->_Script.insert(self->_Script.end(), in.begin() + content_begin, in.end()); } return i; } Loading Loading
src/html.cpp +83 −30 Original line number Diff line number Diff line Loading @@ -1470,65 +1470,118 @@ size_t libhtmlpp::ScriptElement::parseElement( bool& termination ){ termination = false; el = std::make_unique<ScriptElement>(); auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig auto* self = static_cast<ScriptElement*>(el.get()); size_t i = start; if (i >= in.size() || in[i] != '<') { // If it doesn't start with '<', we can't parse a tag here. return start; } // Helper to perform case-insensitive comparison auto iequals = [](char a, char b) { return std::tolower(static_cast<unsigned char>(a)) == std::tolower(static_cast<unsigned char>(b)); }; // Helper to perform case-insensitive match for a keyword starting at 'pos' auto match_ci = [&](size_t pos, const char* k) -> bool { for (size_t i = 0; k[i]; ++i) { if (pos + i >= in.size()) return false; if (!iequals(in[pos + i], k[i])) return false; for (size_t j = 0; k[j]; ++j) { if (pos + j >= in.size() || !iequals(in[pos + j], k[j])) { return false; } } return true; }; size_t i = start; // --- 1. Validate Opening Tag Name (<script) --- ++i; // Consume '<' if (i >= in.size() || in[i] != '<') return i; // Skip leading whitespace after '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) { ++i; } ++i; // '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; const char* kw = "script"; for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) { if (!iequals(in[i], kw[k])) { while (i < in.size() && in[i] != '>') ++i; if (i < in.size()) ++i; // '>' const char* tag_keyword = "script"; size_t keyword_len = std::char_traits<char>::length(tag_keyword); if (i + keyword_len >= in.size() || !match_ci(i, tag_keyword)) { // Tag name doesn't match "script" // Skip till next '>' and return the position after it. while (i < in.size() && in[i] != '>') { ++i; } if (i < in.size()) { ++i; // Consume '>' } return i; } i += keyword_len; // Consume "script" // --- 2. Extract Opening Tag and Attributes --- // Find the closing '>' of the opening tag. size_t tag_end = i; while (i < in.size() && in[i] != '>') { ++i; } while (i < in.size() && in[i] != '>') ++i; if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren // Capture the raw opening tag data (including '<script' and attributes) for serialization. if (i > start && in[i] == '>') { // Copy data from '<' (start) up to and including '>' (i) std::vector<char> raw_tag_data(in.begin() + start, in.begin() + i + 1); self->_serialelize(raw_tag_data); } if (i >= in.size() || in[i] != '>') { // The tag was never closed (e.g., '<script src="..." EOF') return i; } ++i; // Consume '>' and move to content start size_t content_begin = i; // --- 3. Extract Script Content (CDATA-like section) --- for (; i < in.size(); ++i) { // Look for the start of the closing tag sequence: </script if (in[i] == '<' && match_ci(i, "</script")) { size_t j = i + 8; // strlen("</script") == 8 while (j < in.size() && in[j] != '>') ++j; size_t content_end = i; if (i > content_begin) { self->_Script.reserve(self->_Script.size() + (i - content_begin)); std::copy(in.begin() + content_begin, in.begin() + i, std::back_inserter(self->_Script)); // 3a. Extract content preceding the closing tag if (content_end > content_begin) { // FIX: Use std::vector::insert instead of non-existent append self->_Script.insert(self->_Script.end(), in.begin() + content_begin, in.begin() + content_end); } if (j < in.size() && in[j] == '>') { i = j + 1; // 3b. Find the end of the closing tag: </script> size_t closing_tag_end_pos = i + keyword_len + 2; // +2 for '</' // Skip any characters/whitespace between </script and the final '>' while (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] != '>') { ++closing_tag_end_pos; } if (closing_tag_end_pos < in.size() && in[closing_tag_end_pos] == '>') { i = closing_tag_end_pos + 1; // Position after '>' return i; } // If we found "</script" but not the final ">", return the last processed position. return closing_tag_end_pos; } } // --- 4. End of Input Reached --- // If the input ends without a closing </script> tag, capture the remaining content. if (in.size() > content_begin) { self->_Script.reserve(self->_Script.size() + (in.size() - content_begin)); std::copy(in.begin() + content_begin, in.end(), std::back_inserter(self->_Script)); // FIX: Use std::vector::insert instead of non-existent append self->_Script.insert(self->_Script.end(), in.begin() + content_begin, in.end()); } return i; } Loading