Loading src/html.cpp +119 −86 Original line number Diff line number Diff line Loading @@ -277,9 +277,7 @@ void libhtmlpp::HtmlString::_buildtreenode( auto checkContainer = [&](const std::string &tag) { for (size_t i = 0; i < ContainerTypes.size(); ++i) { if (tag == ContainerTypes[i]) { return true; } if (tag == ContainerTypes[i]) return true; } return false; }; Loading @@ -293,8 +291,7 @@ void libhtmlpp::HtmlString::_buildtreenode( auto find_terminator = [&skip_empty, checkContainer](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; open->element->getType() != HtmlEl) return nullptr; const std::string &tag = static_cast<HtmlElement*>(open->element.get())->getTagname(); int nest = 0; Loading @@ -302,15 +299,13 @@ void libhtmlpp::HtmlString::_buildtreenode( while (cur && cur != bound) { cur = skip_empty(cur, bound); if (!cur || cur == bound) break; if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tag) { if (cur->terminator) { if (nest == 0) return cur; // Found! if (nest == 0) return cur; // Found matching end tag --nest; } else { ++nest; Loading @@ -332,6 +327,15 @@ void libhtmlpp::HtmlString::_buildtreenode( for (;;) { start = skip_empty(start, end); // --- NEU: Terminator-Knoten auf dieser Ebene einfach überspringen --- // (End-Tags werden nur benutzt, um in find_terminator/Stack zu schließen, // aber niemals als eigene Tree-Knoten eingebaut.) if (start && start != end && start->terminator) { start = start->nextel.get(); continue; } // -------------------------------------------------------------------- if (!start || start == end) { if (stack.empty()) { if (firstel->element) { Loading @@ -343,7 +347,8 @@ void libhtmlpp::HtmlString::_buildtreenode( Frame fr = stack.top(); stack.pop(); HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get()); if (fr.open->nextel->element ) { // SAFETY: open->nextel kann null sein if (fr.open->nextel && fr.open->nextel->element) { opener_el->_childElement = std::move(fr.open->nextel->element); } Loading @@ -356,7 +361,9 @@ void libhtmlpp::HtmlString::_buildtreenode( } prev_el_in_tree = opener_el; start = fr.close->nextel.get(); // SAFETY: close kann null sein (sollte nicht vorkommen, aber robust sein) start = (fr.close ? fr.close->nextel.get() : nullptr); end = fr.outer_end; continue; Loading @@ -364,19 +371,16 @@ void libhtmlpp::HtmlString::_buildtreenode( if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { stack.push(Frame{start, close, end, prev_el_in_tree}); prev_el_in_tree = nullptr; start = start->nextel.get(); end = close; continue; } } if (start->element && !start->terminator) { Element *current_el = start->element.get(); if (prev_el_in_tree) { Loading @@ -387,9 +391,11 @@ void libhtmlpp::HtmlString::_buildtreenode( prev_el_in_tree = current_el; } } start = start->nextel.get(); } } /** * @brief Tokenizes the buffer and constructs the intermediate DocElements list, * then converts it into a tree stored in @_rootEl. Loading @@ -399,6 +405,10 @@ void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; std::unique_ptr<DocElements> firstEl = nullptr; auto is_ignore_sign = [](unsigned char ch){ return ch == '\r' || ch == '\t' || ch == '\n' || ch == ' '; }; auto starts_with_ci = [&] (const std::vector<char> &s, size_t pos,const std::string_view k) { if (pos + k.size() > s.size()) return false; for (size_t i = 0; i < k.size(); ++i) { Loading Loading @@ -441,7 +451,7 @@ void libhtmlpp::HtmlString::_buildTree() { addelement(&lastEl); ii = HtmlElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } } else if (!std::isspace(static_cast<unsigned char>(_Data[ii]))) { } else if (!is_ignore_sign(static_cast<unsigned char>(_Data[ii]))) { addelement(&lastEl); ii = TextElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { Loading Loading @@ -770,19 +780,12 @@ void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) { if(vend!=std::string::npos && vstart!=std::string::npos){ std::copy(in.begin()+vstart,in.begin()+vend,std::back_inserter(val)); } std::cout << _TagName.data() << "->" << key << ": " << val << std::endl; setAttribute(key,val); } } } } #include <cctype> #include <vector> #include <memory> size_t libhtmlpp::HtmlElement::parseElement( const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, Loading @@ -791,47 +794,39 @@ size_t libhtmlpp::HtmlElement::parseElement( ){ el = std::make_unique<HtmlElement>(); termination = false; bool selfClosing = false; size_t i = start; if (i >= in.size() || in[i] != HTMLTAG_OPEN) { return i; } if (i >= in.size() || in[i] != HTMLTAG_OPEN) return i; ++i; while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i < in.size() && in[i] == HTMLTAG_TERMINATE) { // '/' termination = true; ++i; // '/' konsumieren while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; } std::vector<char> tel; while (i < in.size() && in[i] != HTMLTAG_CLOSE) { // '>' tel.emplace_back(in[i]); ++i; size_t close = i; while (close < in.size() && in[close] != HTMLTAG_CLOSE) { // '>' ++close; } while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back()))) tel.pop_back(); size_t k = i; while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k; if (!termination && !tel.empty() && tel.back() == HTMLTAG_TERMINATE) { selfClosing = true; tel.pop_back(); // den '/' entfernen while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back()))) tel.pop_back(); std::vector<char> tel; if (k < close && in[k] == HTMLTAG_TERMINATE) { // '/' termination = true; ++k; while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k; } reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel); tel.insert(tel.end(), in.begin() + k, in.begin() + close); if (i < in.size() && in[i] == HTMLTAG_CLOSE) ++i; reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel); return i; return ++close; } namespace libhtmlpp { void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src){ Loading Loading @@ -1245,28 +1240,34 @@ int libhtmlpp::CommentElement::getType() const{ return ElementType::CommentEl; } size_t libhtmlpp::CommentElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start+3; while(start<in.size()){ if(std::equal(in.begin()+start,in.begin()+(start+3),"-->")) { size_t libhtmlpp::CommentElement::parseElement( const std::vector<char>& in, std::unique_ptr<Element>& el, size_t start, bool& termination ){ termination = false; el = std::make_unique<CommentElement>(); std::copy(in.begin()+startel,in.begin()+start, std::back_inserter(static_cast<CommentElement*>(el.get())->_Comment)); start+=2; size_t i = start; if (i + 3 >= in.size()) return i; if (!(in[i] == '<' && in[i+1] == '!' && in[i+2] == '-' && in[i+3] == '-')) return i; i += 4; while (i + 2 < in.size()) { if (in[i] == '-' && in[i+1] == '-' && in[i+2] == '>') { i += 3; // nach "-->" break; } ++start; static_cast<CommentElement*>(el.get())->_Comment.push_back(in[i]); ++i; } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing --> closing tag."; return i; } return start; } libhtmlpp::ScriptElement::ScriptElement() : HtmlElement("script"){ } Loading Loading @@ -1301,43 +1302,75 @@ int libhtmlpp::ScriptElement::getType() const{ return ElementType::ScriptEL; } size_t libhtmlpp::ScriptElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start; size_t libhtmlpp::ScriptElement::parseElement( const std::vector<char>& in, std::unique_ptr<Element>& el, size_t start, bool& termination ){ termination = false; while(start<in.size()){ if(in[start]==HTMLTAG_CLOSE) { el = std::make_unique<ScriptElement>(); std::vector<char> tel; auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig std::copy(in.begin()+startel,in.begin()+start,std::back_inserter(tel)); auto iequals = [](char a, char b) { return std::tolower(static_cast<unsigned char>(a)) == std::tolower(static_cast<unsigned char>(b)); }; auto match_ci = [&](size_t pos, const char* k) -> bool { for (size_t i = 0; k[i]; ++i) { if (pos + i >= in.size()) return false; if (!iequals(in[pos + i], k[i])) return false; } return true; }; static_cast<ScriptElement*>(el.get())->_serialelize(tel); size_t i = start; break; if (i >= in.size() || in[i] != '<') return i; ++i; // '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; const char* kw = "script"; for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) { if (!iequals(in[i], kw[k])) { while (i < in.size() && in[i] != '>') ++i; if (i < in.size()) ++i; // '>' return i; } ++start; } size_t close=++start; while (i < in.size() && in[i] != '>') ++i; if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren while(start<in.size()){ if( std::equal(in.begin()+start,in.begin()+(start+7),"</script")) { size_t content_begin = i; std::copy(in.begin()+close,in.begin()+start, std::back_inserter(static_cast<ScriptElement*>(el.get())->_Script)); start+=8; break; } ++start; for (; i < in.size(); ++i) { if (in[i] == '<' && match_ci(i, "</script")) { size_t j = i + 8; // strlen("</script") == 8 while (j < in.size() && in[j] != '>') ++j; if (i > content_begin) { self->_Script.reserve(self->_Script.size() + (i - content_begin)); std::copy(in.begin() + content_begin, in.begin() + i, std::back_inserter(self->_Script)); } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </script closing tag."; if (j < in.size() && in[j] == '>') { i = j + 1; } return i; } } return start; if (in.size() > content_begin) { self->_Script.reserve(self->_Script.size() + (in.size() - content_begin)); std::copy(in.begin() + content_begin, in.end(), std::back_inserter(self->_Script)); } return i; } libhtmlpp::SvgElement::SvgElement() : HtmlElement("svg"){ Loading Loading
src/html.cpp +119 −86 Original line number Diff line number Diff line Loading @@ -277,9 +277,7 @@ void libhtmlpp::HtmlString::_buildtreenode( auto checkContainer = [&](const std::string &tag) { for (size_t i = 0; i < ContainerTypes.size(); ++i) { if (tag == ContainerTypes[i]) { return true; } if (tag == ContainerTypes[i]) return true; } return false; }; Loading @@ -293,8 +291,7 @@ void libhtmlpp::HtmlString::_buildtreenode( auto find_terminator = [&skip_empty, checkContainer](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; open->element->getType() != HtmlEl) return nullptr; const std::string &tag = static_cast<HtmlElement*>(open->element.get())->getTagname(); int nest = 0; Loading @@ -302,15 +299,13 @@ void libhtmlpp::HtmlString::_buildtreenode( while (cur && cur != bound) { cur = skip_empty(cur, bound); if (!cur || cur == bound) break; if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tag) { if (cur->terminator) { if (nest == 0) return cur; // Found! if (nest == 0) return cur; // Found matching end tag --nest; } else { ++nest; Loading @@ -332,6 +327,15 @@ void libhtmlpp::HtmlString::_buildtreenode( for (;;) { start = skip_empty(start, end); // --- NEU: Terminator-Knoten auf dieser Ebene einfach überspringen --- // (End-Tags werden nur benutzt, um in find_terminator/Stack zu schließen, // aber niemals als eigene Tree-Knoten eingebaut.) if (start && start != end && start->terminator) { start = start->nextel.get(); continue; } // -------------------------------------------------------------------- if (!start || start == end) { if (stack.empty()) { if (firstel->element) { Loading @@ -343,7 +347,8 @@ void libhtmlpp::HtmlString::_buildtreenode( Frame fr = stack.top(); stack.pop(); HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get()); if (fr.open->nextel->element ) { // SAFETY: open->nextel kann null sein if (fr.open->nextel && fr.open->nextel->element) { opener_el->_childElement = std::move(fr.open->nextel->element); } Loading @@ -356,7 +361,9 @@ void libhtmlpp::HtmlString::_buildtreenode( } prev_el_in_tree = opener_el; start = fr.close->nextel.get(); // SAFETY: close kann null sein (sollte nicht vorkommen, aber robust sein) start = (fr.close ? fr.close->nextel.get() : nullptr); end = fr.outer_end; continue; Loading @@ -364,19 +371,16 @@ void libhtmlpp::HtmlString::_buildtreenode( if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { stack.push(Frame{start, close, end, prev_el_in_tree}); prev_el_in_tree = nullptr; start = start->nextel.get(); end = close; continue; } } if (start->element && !start->terminator) { Element *current_el = start->element.get(); if (prev_el_in_tree) { Loading @@ -387,9 +391,11 @@ void libhtmlpp::HtmlString::_buildtreenode( prev_el_in_tree = current_el; } } start = start->nextel.get(); } } /** * @brief Tokenizes the buffer and constructs the intermediate DocElements list, * then converts it into a tree stored in @_rootEl. Loading @@ -399,6 +405,10 @@ void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; std::unique_ptr<DocElements> firstEl = nullptr; auto is_ignore_sign = [](unsigned char ch){ return ch == '\r' || ch == '\t' || ch == '\n' || ch == ' '; }; auto starts_with_ci = [&] (const std::vector<char> &s, size_t pos,const std::string_view k) { if (pos + k.size() > s.size()) return false; for (size_t i = 0; i < k.size(); ++i) { Loading Loading @@ -441,7 +451,7 @@ void libhtmlpp::HtmlString::_buildTree() { addelement(&lastEl); ii = HtmlElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } } else if (!std::isspace(static_cast<unsigned char>(_Data[ii]))) { } else if (!is_ignore_sign(static_cast<unsigned char>(_Data[ii]))) { addelement(&lastEl); ii = TextElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { Loading Loading @@ -770,19 +780,12 @@ void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) { if(vend!=std::string::npos && vstart!=std::string::npos){ std::copy(in.begin()+vstart,in.begin()+vend,std::back_inserter(val)); } std::cout << _TagName.data() << "->" << key << ": " << val << std::endl; setAttribute(key,val); } } } } #include <cctype> #include <vector> #include <memory> size_t libhtmlpp::HtmlElement::parseElement( const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, Loading @@ -791,47 +794,39 @@ size_t libhtmlpp::HtmlElement::parseElement( ){ el = std::make_unique<HtmlElement>(); termination = false; bool selfClosing = false; size_t i = start; if (i >= in.size() || in[i] != HTMLTAG_OPEN) { return i; } if (i >= in.size() || in[i] != HTMLTAG_OPEN) return i; ++i; while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i < in.size() && in[i] == HTMLTAG_TERMINATE) { // '/' termination = true; ++i; // '/' konsumieren while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; } std::vector<char> tel; while (i < in.size() && in[i] != HTMLTAG_CLOSE) { // '>' tel.emplace_back(in[i]); ++i; size_t close = i; while (close < in.size() && in[close] != HTMLTAG_CLOSE) { // '>' ++close; } while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back()))) tel.pop_back(); size_t k = i; while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k; if (!termination && !tel.empty() && tel.back() == HTMLTAG_TERMINATE) { selfClosing = true; tel.pop_back(); // den '/' entfernen while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back()))) tel.pop_back(); std::vector<char> tel; if (k < close && in[k] == HTMLTAG_TERMINATE) { // '/' termination = true; ++k; while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k; } reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel); tel.insert(tel.end(), in.begin() + k, in.begin() + close); if (i < in.size() && in[i] == HTMLTAG_CLOSE) ++i; reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel); return i; return ++close; } namespace libhtmlpp { void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src){ Loading Loading @@ -1245,28 +1240,34 @@ int libhtmlpp::CommentElement::getType() const{ return ElementType::CommentEl; } size_t libhtmlpp::CommentElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start+3; while(start<in.size()){ if(std::equal(in.begin()+start,in.begin()+(start+3),"-->")) { size_t libhtmlpp::CommentElement::parseElement( const std::vector<char>& in, std::unique_ptr<Element>& el, size_t start, bool& termination ){ termination = false; el = std::make_unique<CommentElement>(); std::copy(in.begin()+startel,in.begin()+start, std::back_inserter(static_cast<CommentElement*>(el.get())->_Comment)); start+=2; size_t i = start; if (i + 3 >= in.size()) return i; if (!(in[i] == '<' && in[i+1] == '!' && in[i+2] == '-' && in[i+3] == '-')) return i; i += 4; while (i + 2 < in.size()) { if (in[i] == '-' && in[i+1] == '-' && in[i+2] == '>') { i += 3; // nach "-->" break; } ++start; static_cast<CommentElement*>(el.get())->_Comment.push_back(in[i]); ++i; } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing --> closing tag."; return i; } return start; } libhtmlpp::ScriptElement::ScriptElement() : HtmlElement("script"){ } Loading Loading @@ -1301,43 +1302,75 @@ int libhtmlpp::ScriptElement::getType() const{ return ElementType::ScriptEL; } size_t libhtmlpp::ScriptElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start; size_t libhtmlpp::ScriptElement::parseElement( const std::vector<char>& in, std::unique_ptr<Element>& el, size_t start, bool& termination ){ termination = false; while(start<in.size()){ if(in[start]==HTMLTAG_CLOSE) { el = std::make_unique<ScriptElement>(); std::vector<char> tel; auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig std::copy(in.begin()+startel,in.begin()+start,std::back_inserter(tel)); auto iequals = [](char a, char b) { return std::tolower(static_cast<unsigned char>(a)) == std::tolower(static_cast<unsigned char>(b)); }; auto match_ci = [&](size_t pos, const char* k) -> bool { for (size_t i = 0; k[i]; ++i) { if (pos + i >= in.size()) return false; if (!iequals(in[pos + i], k[i])) return false; } return true; }; static_cast<ScriptElement*>(el.get())->_serialelize(tel); size_t i = start; break; if (i >= in.size() || in[i] != '<') return i; ++i; // '<' while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i; const char* kw = "script"; for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) { if (!iequals(in[i], kw[k])) { while (i < in.size() && in[i] != '>') ++i; if (i < in.size()) ++i; // '>' return i; } ++start; } size_t close=++start; while (i < in.size() && in[i] != '>') ++i; if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren while(start<in.size()){ if( std::equal(in.begin()+start,in.begin()+(start+7),"</script")) { size_t content_begin = i; std::copy(in.begin()+close,in.begin()+start, std::back_inserter(static_cast<ScriptElement*>(el.get())->_Script)); start+=8; break; } ++start; for (; i < in.size(); ++i) { if (in[i] == '<' && match_ci(i, "</script")) { size_t j = i + 8; // strlen("</script") == 8 while (j < in.size() && in[j] != '>') ++j; if (i > content_begin) { self->_Script.reserve(self->_Script.size() + (i - content_begin)); std::copy(in.begin() + content_begin, in.begin() + i, std::back_inserter(self->_Script)); } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </script closing tag."; if (j < in.size() && in[j] == '>') { i = j + 1; } return i; } } return start; if (in.size() > content_begin) { self->_Script.reserve(self->_Script.size() + (in.size() - content_begin)); std::copy(in.begin() + content_begin, in.end(), std::back_inserter(self->_Script)); } return i; } libhtmlpp::SvgElement::SvgElement() : HtmlElement("svg"){ Loading