Loading src/html.cpp +174 −94 Original line number Diff line number Diff line Loading @@ -440,21 +440,27 @@ void libhtmlpp::HtmlString::_buildTree() { DocElements* lastEl = nullptr; std::unique_ptr<DocElements> firstEl = nullptr; auto is_ignore_sign = [](unsigned char ch){ return ch == '\r' || ch == '\t' || ch == '\n' || ch == ' '; auto is_ws = [](unsigned char ch) -> bool { // space, \t, \n, \r return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; auto starts_with_ci = [&] (const std::vector<char> &s, size_t pos,const std::string_view k) { if (pos + k.size() > s.size()) return false; for (size_t i = 0; i < k.size(); ++i) { unsigned char a = static_cast<unsigned char>(s[pos + i]); auto ascii_tolower = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; auto starts_with_ci = [&](const char* s, const char* e, const char* k) -> bool { const size_t klen = std::char_traits<char>::length(k); if (static_cast<size_t>(e - s) < klen) return false; for (size_t i = 0; i < klen; ++i) { unsigned char a = static_cast<unsigned char>(s[i]); unsigned char b = static_cast<unsigned char>(k[i]); if (std::tolower(a) != std::tolower(b)) return false; if (ascii_tolower(a) != ascii_tolower(b)) return false; } return true; }; auto addelement = [&firstEl](DocElements **last){ auto add_element_node = [&](DocElements** last) { if (!firstEl) { firstEl = std::make_unique<DocElements>(); *last = firstEl.get(); Loading @@ -465,37 +471,75 @@ void libhtmlpp::HtmlString::_buildTree() { } }; HTMLException excp; const char* const base = _Data.data(); const size_t n = _Data.size(); const char* const end = base + n; size_t ii = 0; while (ii < _Data.size()) { if (_Data[ii] == HTMLTAG_OPEN) { if (starts_with_ci(_Data, ii, "<!--")) { addelement(&lastEl); ii = CommentElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<script")) { addelement(&lastEl); ii = ScriptElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<svg")) { addelement(&lastEl); ii = SvgElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<textarea")) { addelement(&lastEl); ii = TextArea::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { addelement(&lastEl); ii = HtmlElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); const char* p = base; while (p < end) { while (p < end && is_ws(static_cast<unsigned char>(*p))) ++p; if (p >= end) break; if (*p == HTMLTAG_OPEN) { // '<' const char* const remain_end = end; const size_t remain = static_cast<size_t>(remain_end - p); if (remain >= 2) { const unsigned char c1 = ascii_tolower(static_cast<unsigned char>(p[1])); if (p[1] == '!') { // <!-- ... --> if (starts_with_ci(p, end, "<!--")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = CommentElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } else if (c1 == 's') { if (starts_with_ci(p, end, "<script")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = ScriptElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } if (starts_with_ci(p, end, "<svg")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = SvgElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } else if (c1 == 't') { if (starts_with_ci(p, end, "<textarea")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = TextArea::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } } { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = HtmlElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; } } else if (!is_ignore_sign(static_cast<unsigned char>(_Data[ii]))) { addelement(&lastEl); ii = TextElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { ++ii; // nur Whitespace „verbrauchen“ add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = TextElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; } } _buildtreenode(firstEl.get(), nullptr, _rootEl); } /** * @brief Streams an HtmlString to an output stream using its underlying string. * @param os Output stream. Loading Loading @@ -1479,44 +1523,53 @@ int libhtmlpp::SvgElement::getType() const{ return ElementType::SvgEL; } size_t libhtmlpp::SvgElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start; while(start<in.size()){ if(in[start]==HTMLTAG_CLOSE) { el=std::make_unique<SvgElement>(); std::vector<char> tel; std::copy(in.begin()+startel,in.begin()+start,std::back_inserter(tel)); static_cast<SvgElement*>(el.get())->_serialelize(tel); size_t libhtmlpp::SvgElement::parseElement(const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination) { const size_t startel = start; termination = false; break; const auto begin = in.begin(); if (start >= in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } ++start; auto it_close_angle = std::find(begin + start, in.end(), HTMLTAG_CLOSE); if (it_close_angle == in.end()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing '>' for <svg> open tag."; } size_t close=++start; el = std::make_unique<SvgElement>(); auto* svgEl = static_cast<SvgElement*>(el.get()); while(start<in.size()){ if( std::equal(in.begin()+start,in.begin()+(start+4),"</svg")) { std::copy(in.begin()+close,in.begin()+start, std::back_inserter(static_cast<SvgElement*>(el.get())->_Svg)); start+=5; break; } ++start; { std::vector<char> tel; tel.assign(begin + startel, it_close_angle); svgEl->_serialelize(tel); } if (start == in.size()) { auto it_content_begin = it_close_angle; if (it_content_begin != in.end()) ++it_content_begin; // safe increment static constexpr char kEndTag[] = "</svg>"; auto it_end = std::search(it_content_begin, in.end(), std::begin(kEndTag), std::end(kEndTag) - 1 /* no '\0' */); if (it_end == in.end()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </svg closing tag."; throw excp[HTMLException::Error] << "Parsing error: Missing </svg> closing tag."; } return start; svgEl->_Svg.insert(svgEl->_Svg.end(), it_content_begin, it_end); const size_t consumed = static_cast<size_t>((it_end - begin) + (std::size(kEndTag) - 1)); return consumed; } libhtmlpp::TextArea::TextArea() : HtmlElement("svg"){ } Loading Loading @@ -1551,56 +1604,83 @@ int libhtmlpp::TextArea::getType() const{ return ElementType::TextAreaEL; } size_t libhtmlpp::TextArea::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el,size_t start, bool& termination){ size_t startel = start; while(start < in.size()){ if(in[start] == HTMLTAG_CLOSE) { el = std::make_unique<TextArea>(); std::vector<char> tel; std::copy(in.begin() + startel, in.begin() + start, std::back_inserter(tel)); static_cast<TextArea*>(el.get())->_serialelize(tel); size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination) { termination = false; const auto begin = in.begin(); const auto end = in.end(); break; } ++start; if (start >= in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } if (start == in.size()) { auto it_gt = std::find(begin + start, end, HTMLTAG_CLOSE); if (it_gt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed <svg tag."; throw excp[HTMLException::Error] << "Parsing error: Unclosed <textarea> tag."; } size_t close = ++start; el = std::make_unique<TextArea>(); auto* ta = static_cast<TextArea*>(el.get()); { std::vector<char> tel; tel.assign(begin + start, it_gt); ta->_serialelize(tel); } auto it = it_gt; if (it != end) ++it; auto content_begin = it; size_t lvl = 0; while(start < in.size()){ if( (start + 9) <= in.size() && std::equal(in.begin()+start,in.begin()+(start+9),"<textarea")){ ++lvl; for (;;) { auto lt = std::find(it, end, '<'); if (lt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </textarea> closing tag."; } if( (start + 10) <= in.size() && std::equal(in.begin()+start,in.begin()+(start+10),"</textarea")) { static constexpr char kCloseHead[] = "</textarea"; const ptrdiff_t remain_after_lt = end - lt; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kCloseHead) - 1) && std::equal(lt, lt + (sizeof(kCloseHead) - 1), kCloseHead)) { if (lvl == 0) { std::copy(in.begin() + close, in.begin() + start, std::back_inserter(static_cast<TextArea*>(el.get())->_Text)); auto it_close_gt = std::find(lt, end, '>'); if (it_close_gt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed </textarea> end tag."; } start += 10; break; ta->_Text.insert(ta->_Text.end(), content_begin, lt); const size_t consumed = static_cast<size_t>((it_close_gt - begin) + 1); return consumed; } else { --lvl; it = lt + (sizeof(kCloseHead) - 1); continue; } } ++start; } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </textarea closing tag."; static constexpr char kOpenHead[] = "<textarea"; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kOpenHead) - 1) && std::equal(lt, lt + (sizeof(kOpenHead) - 1), kOpenHead)) { ++lvl; it = lt + (sizeof(kOpenHead) - 1); continue; } return start; it = lt + 1; } } libhtmlpp::HtmlPage::HtmlPage(){ Loading Loading
src/html.cpp +174 −94 Original line number Diff line number Diff line Loading @@ -440,21 +440,27 @@ void libhtmlpp::HtmlString::_buildTree() { DocElements* lastEl = nullptr; std::unique_ptr<DocElements> firstEl = nullptr; auto is_ignore_sign = [](unsigned char ch){ return ch == '\r' || ch == '\t' || ch == '\n' || ch == ' '; auto is_ws = [](unsigned char ch) -> bool { // space, \t, \n, \r return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; auto starts_with_ci = [&] (const std::vector<char> &s, size_t pos,const std::string_view k) { if (pos + k.size() > s.size()) return false; for (size_t i = 0; i < k.size(); ++i) { unsigned char a = static_cast<unsigned char>(s[pos + i]); auto ascii_tolower = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; auto starts_with_ci = [&](const char* s, const char* e, const char* k) -> bool { const size_t klen = std::char_traits<char>::length(k); if (static_cast<size_t>(e - s) < klen) return false; for (size_t i = 0; i < klen; ++i) { unsigned char a = static_cast<unsigned char>(s[i]); unsigned char b = static_cast<unsigned char>(k[i]); if (std::tolower(a) != std::tolower(b)) return false; if (ascii_tolower(a) != ascii_tolower(b)) return false; } return true; }; auto addelement = [&firstEl](DocElements **last){ auto add_element_node = [&](DocElements** last) { if (!firstEl) { firstEl = std::make_unique<DocElements>(); *last = firstEl.get(); Loading @@ -465,37 +471,75 @@ void libhtmlpp::HtmlString::_buildTree() { } }; HTMLException excp; const char* const base = _Data.data(); const size_t n = _Data.size(); const char* const end = base + n; size_t ii = 0; while (ii < _Data.size()) { if (_Data[ii] == HTMLTAG_OPEN) { if (starts_with_ci(_Data, ii, "<!--")) { addelement(&lastEl); ii = CommentElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<script")) { addelement(&lastEl); ii = ScriptElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<svg")) { addelement(&lastEl); ii = SvgElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else if (starts_with_ci(_Data, ii, "<textarea")) { addelement(&lastEl); ii = TextArea::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { addelement(&lastEl); ii = HtmlElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); const char* p = base; while (p < end) { while (p < end && is_ws(static_cast<unsigned char>(*p))) ++p; if (p >= end) break; if (*p == HTMLTAG_OPEN) { // '<' const char* const remain_end = end; const size_t remain = static_cast<size_t>(remain_end - p); if (remain >= 2) { const unsigned char c1 = ascii_tolower(static_cast<unsigned char>(p[1])); if (p[1] == '!') { // <!-- ... --> if (starts_with_ci(p, end, "<!--")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = CommentElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } else if (c1 == 's') { if (starts_with_ci(p, end, "<script")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = ScriptElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } if (starts_with_ci(p, end, "<svg")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = SvgElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } else if (c1 == 't') { if (starts_with_ci(p, end, "<textarea")) { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = TextArea::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; continue; } } } { add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = HtmlElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; } } else if (!is_ignore_sign(static_cast<unsigned char>(_Data[ii]))) { addelement(&lastEl); ii = TextElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator); } else { ++ii; // nur Whitespace „verbrauchen“ add_element_node(&lastEl); size_t i = static_cast<size_t>(p - base); i = TextElement::parseElement(_Data, lastEl->element, i, lastEl->terminator); p = base + i; } } _buildtreenode(firstEl.get(), nullptr, _rootEl); } /** * @brief Streams an HtmlString to an output stream using its underlying string. * @param os Output stream. Loading Loading @@ -1479,44 +1523,53 @@ int libhtmlpp::SvgElement::getType() const{ return ElementType::SvgEL; } size_t libhtmlpp::SvgElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){ size_t startel=start; while(start<in.size()){ if(in[start]==HTMLTAG_CLOSE) { el=std::make_unique<SvgElement>(); std::vector<char> tel; std::copy(in.begin()+startel,in.begin()+start,std::back_inserter(tel)); static_cast<SvgElement*>(el.get())->_serialelize(tel); size_t libhtmlpp::SvgElement::parseElement(const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination) { const size_t startel = start; termination = false; break; const auto begin = in.begin(); if (start >= in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } ++start; auto it_close_angle = std::find(begin + start, in.end(), HTMLTAG_CLOSE); if (it_close_angle == in.end()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing '>' for <svg> open tag."; } size_t close=++start; el = std::make_unique<SvgElement>(); auto* svgEl = static_cast<SvgElement*>(el.get()); while(start<in.size()){ if( std::equal(in.begin()+start,in.begin()+(start+4),"</svg")) { std::copy(in.begin()+close,in.begin()+start, std::back_inserter(static_cast<SvgElement*>(el.get())->_Svg)); start+=5; break; } ++start; { std::vector<char> tel; tel.assign(begin + startel, it_close_angle); svgEl->_serialelize(tel); } if (start == in.size()) { auto it_content_begin = it_close_angle; if (it_content_begin != in.end()) ++it_content_begin; // safe increment static constexpr char kEndTag[] = "</svg>"; auto it_end = std::search(it_content_begin, in.end(), std::begin(kEndTag), std::end(kEndTag) - 1 /* no '\0' */); if (it_end == in.end()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </svg closing tag."; throw excp[HTMLException::Error] << "Parsing error: Missing </svg> closing tag."; } return start; svgEl->_Svg.insert(svgEl->_Svg.end(), it_content_begin, it_end); const size_t consumed = static_cast<size_t>((it_end - begin) + (std::size(kEndTag) - 1)); return consumed; } libhtmlpp::TextArea::TextArea() : HtmlElement("svg"){ } Loading Loading @@ -1551,56 +1604,83 @@ int libhtmlpp::TextArea::getType() const{ return ElementType::TextAreaEL; } size_t libhtmlpp::TextArea::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el,size_t start, bool& termination){ size_t startel = start; while(start < in.size()){ if(in[start] == HTMLTAG_CLOSE) { el = std::make_unique<TextArea>(); std::vector<char> tel; std::copy(in.begin() + startel, in.begin() + start, std::back_inserter(tel)); static_cast<TextArea*>(el.get())->_serialelize(tel); size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination) { termination = false; const auto begin = in.begin(); const auto end = in.end(); break; } ++start; if (start >= in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } if (start == in.size()) { auto it_gt = std::find(begin + start, end, HTMLTAG_CLOSE); if (it_gt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed <svg tag."; throw excp[HTMLException::Error] << "Parsing error: Unclosed <textarea> tag."; } size_t close = ++start; el = std::make_unique<TextArea>(); auto* ta = static_cast<TextArea*>(el.get()); { std::vector<char> tel; tel.assign(begin + start, it_gt); ta->_serialelize(tel); } auto it = it_gt; if (it != end) ++it; auto content_begin = it; size_t lvl = 0; while(start < in.size()){ if( (start + 9) <= in.size() && std::equal(in.begin()+start,in.begin()+(start+9),"<textarea")){ ++lvl; for (;;) { auto lt = std::find(it, end, '<'); if (lt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </textarea> closing tag."; } if( (start + 10) <= in.size() && std::equal(in.begin()+start,in.begin()+(start+10),"</textarea")) { static constexpr char kCloseHead[] = "</textarea"; const ptrdiff_t remain_after_lt = end - lt; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kCloseHead) - 1) && std::equal(lt, lt + (sizeof(kCloseHead) - 1), kCloseHead)) { if (lvl == 0) { std::copy(in.begin() + close, in.begin() + start, std::back_inserter(static_cast<TextArea*>(el.get())->_Text)); auto it_close_gt = std::find(lt, end, '>'); if (it_close_gt == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed </textarea> end tag."; } start += 10; break; ta->_Text.insert(ta->_Text.end(), content_begin, lt); const size_t consumed = static_cast<size_t>((it_close_gt - begin) + 1); return consumed; } else { --lvl; it = lt + (sizeof(kCloseHead) - 1); continue; } } ++start; } if (start == in.size()) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Missing </textarea closing tag."; static constexpr char kOpenHead[] = "<textarea"; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kOpenHead) - 1) && std::equal(lt, lt + (sizeof(kOpenHead) - 1), kOpenHead)) { ++lvl; it = lt + (sizeof(kOpenHead) - 1); continue; } return start; it = lt + 1; } } libhtmlpp::HtmlPage::HtmlPage(){ Loading