Loading src/html.cpp +99 −67 Original line number Diff line number Diff line Loading @@ -783,95 +783,119 @@ void libhtmlpp::HtmlElement::remove(libhtmlpp::Element* el){ * @throws HTMLException if no tag could be determined. */ void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) { void libhtmlpp::HtmlElement::_serialelize(const std::vector<char>& in) { _TagName.clear(); bool end_tag = false; auto is_space = [](unsigned char c) -> bool { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }; auto tolower_ascii = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; size_t i = 0, n = in.size(); while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i < n && in[i] == '<') ++i; while (i < n && is_space(static_cast<unsigned char>(in[i]))) ++i; bool end_tag = false; if (i < n && in[i] == '/') { end_tag = true; ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < n && is_space(static_cast<unsigned char>(in[i]))) ++i; } size_t st = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break; size_t r = n; while (r > i && is_space(static_cast<unsigned char>(in[r - 1]))) --r; if (r > i && in[r - 1] == '>') --r; // ignore '>' if present while (r > i && is_space(static_cast<unsigned char>(in[r - 1]))) --r; if (i >= r) { HTMLException excp; throw excp[HTMLException::Critical] << "no tag in element found!"; } const size_t name_start = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '/' || c == '>') break; ++i; } size_t et = i; if (st == et) { const size_t name_end = i; if (name_start == name_end) { HTMLException excp; throw excp[HTMLException::Critical] << "no tag in element found!"; } _TagName.assign(in.begin() + st, in.begin() + et); for (auto &ch : _TagName) ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch))); _TagName.assign(in.begin() + name_start, in.begin() + name_end); for (char& ch : _TagName) ch = static_cast<char>(tolower_ascii(static_cast<unsigned char>(ch))); if (end_tag) { return; } while (i < n) { while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i >= n) break; bool maybe_self_closing = false; while (i < r) { while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; if (i >= r) break; if (in[i] == '/') { maybe_self_closing = true; ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; continue; } if (in[i] == '>') { while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; break; } size_t kstart = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '=' || c == '/' || c == '>') break; const size_t kstart = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '=' || c == '/' || c == '>') break; ++i; } size_t kend = i; const size_t kend = i; if (kstart == kend) { ++i; continue; } std::string key(in.begin() + kstart, in.begin() + kend); for (char& ch : key) ch = static_cast<char>(tolower_ascii(static_cast<unsigned char>(ch))); while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; std::string val; if (i < n && in[i] == '=') { if (i < r && in[i] == '=') { ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; if (i < n && (in[i] == '"' || in[i] == '\'')) { if (i < r && (in[i] == '"' || in[i] == '\'')) { char quote = in[i++]; size_t vstart = i; while (i < n && in[i] != quote) ++i; size_t vend = i; const size_t vstart = i; while (i < r && in[i] != quote) ++i; const size_t vend = i; val.assign(in.begin() + vstart, in.begin() + vend); if (i < n && in[i] == quote) ++i; if (i < r && in[i] == quote) ++i; } else { size_t vstart = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break; const size_t vstart = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '/' || c == '>') break; ++i; } size_t vend = i; const size_t vend = i; val.assign(in.begin() + vstart, in.begin() + vend); } } else { val.clear(); } setAttribute(key, val); } } size_t libhtmlpp::HtmlElement::parseElement( const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, Loading Loading @@ -1610,6 +1634,7 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, bool& termination) { termination = false; const auto begin = in.begin(); const auto end = in.end(); Loading @@ -1618,6 +1643,27 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } // ---- helpers ----------------------------------------------------------- auto is_ws = [](unsigned char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }; auto tolower_ascii = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; auto ieq_prefix = [&](std::vector<char>::const_iterator it, std::vector<char>::const_iterator it_end, const char* lit) -> bool { for (; *lit; ++lit, ++it) { if (it == it_end) return false; if (tolower_ascii(static_cast<unsigned char>(*it)) != tolower_ascii(static_cast<unsigned char>(*lit))) { return false; } } return true; }; auto it_gt = std::find(begin + start, end, HTMLTAG_CLOSE); if (it_gt == end) { HTMLException excp; Loading @@ -1628,15 +1674,13 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, auto* ta = static_cast<TextArea*>(el.get()); { std::vector<char> tel; tel.assign(begin + start, it_gt); tel.assign(begin + start, it_gt); // opening tag without '>' ta->_serialelize(tel); } auto it = it_gt; if (it != end) ++it; auto content_begin = it; size_t lvl = 0; const auto content_begin = it; for (;;) { auto lt = std::find(it, end, '<'); Loading @@ -1645,35 +1689,22 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, throw excp[HTMLException::Error] << "Parsing error: Missing </textarea> closing tag."; } static constexpr char kCloseHead[] = "</textarea"; const ptrdiff_t remain_after_lt = end - lt; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kCloseHead) - 1) && std::equal(lt, lt + (sizeof(kCloseHead) - 1), kCloseHead)) { if (lvl == 0) { auto it_close_gt = std::find(lt, end, '>'); if (it_close_gt == end) { if (ieq_prefix(lt, end, "</textarea")) { auto after_head = lt + std::strlen("</textarea"); while (after_head != end && is_ws(static_cast<unsigned char>(*after_head))) { ++after_head; } if (after_head == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed </textarea> end tag."; } if (*after_head == '>') { ta->_Text.insert(ta->_Text.end(), content_begin, lt); const size_t consumed = static_cast<size_t>((it_close_gt - begin) + 1); const size_t consumed = static_cast<size_t>((after_head - begin) + 1); return consumed; } else { --lvl; it = lt + (sizeof(kCloseHead) - 1); continue; } } static constexpr char kOpenHead[] = "<textarea"; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kOpenHead) - 1) && std::equal(lt, lt + (sizeof(kOpenHead) - 1), kOpenHead)) { ++lvl; it = lt + (sizeof(kOpenHead) - 1); it = lt + 1; continue; } Loading @@ -1682,6 +1713,7 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, } libhtmlpp::HtmlPage::HtmlPage(){ } Loading src/html.h +1 −1 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ namespace libhtmlpp { std::unique_ptr<Attributes> _nextAttr; }; void _serialelize(std::vector<char> in); void _serialelize(const std::vector<char> &in); private: //if text tagname must be zero std::vector<char> _TagName; Loading Loading
src/html.cpp +99 −67 Original line number Diff line number Diff line Loading @@ -783,95 +783,119 @@ void libhtmlpp::HtmlElement::remove(libhtmlpp::Element* el){ * @throws HTMLException if no tag could be determined. */ void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) { void libhtmlpp::HtmlElement::_serialelize(const std::vector<char>& in) { _TagName.clear(); bool end_tag = false; auto is_space = [](unsigned char c) -> bool { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }; auto tolower_ascii = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; size_t i = 0, n = in.size(); while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i < n && in[i] == '<') ++i; while (i < n && is_space(static_cast<unsigned char>(in[i]))) ++i; bool end_tag = false; if (i < n && in[i] == '/') { end_tag = true; ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < n && is_space(static_cast<unsigned char>(in[i]))) ++i; } size_t st = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break; size_t r = n; while (r > i && is_space(static_cast<unsigned char>(in[r - 1]))) --r; if (r > i && in[r - 1] == '>') --r; // ignore '>' if present while (r > i && is_space(static_cast<unsigned char>(in[r - 1]))) --r; if (i >= r) { HTMLException excp; throw excp[HTMLException::Critical] << "no tag in element found!"; } const size_t name_start = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '/' || c == '>') break; ++i; } size_t et = i; if (st == et) { const size_t name_end = i; if (name_start == name_end) { HTMLException excp; throw excp[HTMLException::Critical] << "no tag in element found!"; } _TagName.assign(in.begin() + st, in.begin() + et); for (auto &ch : _TagName) ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch))); _TagName.assign(in.begin() + name_start, in.begin() + name_end); for (char& ch : _TagName) ch = static_cast<char>(tolower_ascii(static_cast<unsigned char>(ch))); if (end_tag) { return; } while (i < n) { while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; if (i >= n) break; bool maybe_self_closing = false; while (i < r) { while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; if (i >= r) break; if (in[i] == '/') { maybe_self_closing = true; ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; continue; } if (in[i] == '>') { while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; break; } size_t kstart = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '=' || c == '/' || c == '>') break; const size_t kstart = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '=' || c == '/' || c == '>') break; ++i; } size_t kend = i; const size_t kend = i; if (kstart == kend) { ++i; continue; } std::string key(in.begin() + kstart, in.begin() + kend); for (char& ch : key) ch = static_cast<char>(tolower_ascii(static_cast<unsigned char>(ch))); while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; std::string val; if (i < n && in[i] == '=') { if (i < r && in[i] == '=') { ++i; while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i; while (i < r && is_space(static_cast<unsigned char>(in[i]))) ++i; if (i < n && (in[i] == '"' || in[i] == '\'')) { if (i < r && (in[i] == '"' || in[i] == '\'')) { char quote = in[i++]; size_t vstart = i; while (i < n && in[i] != quote) ++i; size_t vend = i; const size_t vstart = i; while (i < r && in[i] != quote) ++i; const size_t vend = i; val.assign(in.begin() + vstart, in.begin() + vend); if (i < n && in[i] == quote) ++i; if (i < r && in[i] == quote) ++i; } else { size_t vstart = i; while (i < n) { char c = in[i]; if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break; const size_t vstart = i; while (i < r) { unsigned char c = static_cast<unsigned char>(in[i]); if (is_space(c) || c == '/' || c == '>') break; ++i; } size_t vend = i; const size_t vend = i; val.assign(in.begin() + vstart, in.begin() + vend); } } else { val.clear(); } setAttribute(key, val); } } size_t libhtmlpp::HtmlElement::parseElement( const std::vector<char>& in, std::unique_ptr<libhtmlpp::Element>& el, Loading Loading @@ -1610,6 +1634,7 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, bool& termination) { termination = false; const auto begin = in.begin(); const auto end = in.end(); Loading @@ -1618,6 +1643,27 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, throw excp[HTMLException::Error] << "Parsing error: start offset beyond buffer."; } // ---- helpers ----------------------------------------------------------- auto is_ws = [](unsigned char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }; auto tolower_ascii = [](unsigned char c) -> unsigned char { return (c >= 'A' && c <= 'Z') ? static_cast<unsigned char>(c + 32) : c; }; auto ieq_prefix = [&](std::vector<char>::const_iterator it, std::vector<char>::const_iterator it_end, const char* lit) -> bool { for (; *lit; ++lit, ++it) { if (it == it_end) return false; if (tolower_ascii(static_cast<unsigned char>(*it)) != tolower_ascii(static_cast<unsigned char>(*lit))) { return false; } } return true; }; auto it_gt = std::find(begin + start, end, HTMLTAG_CLOSE); if (it_gt == end) { HTMLException excp; Loading @@ -1628,15 +1674,13 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, auto* ta = static_cast<TextArea*>(el.get()); { std::vector<char> tel; tel.assign(begin + start, it_gt); tel.assign(begin + start, it_gt); // opening tag without '>' ta->_serialelize(tel); } auto it = it_gt; if (it != end) ++it; auto content_begin = it; size_t lvl = 0; const auto content_begin = it; for (;;) { auto lt = std::find(it, end, '<'); Loading @@ -1645,35 +1689,22 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, throw excp[HTMLException::Error] << "Parsing error: Missing </textarea> closing tag."; } static constexpr char kCloseHead[] = "</textarea"; const ptrdiff_t remain_after_lt = end - lt; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kCloseHead) - 1) && std::equal(lt, lt + (sizeof(kCloseHead) - 1), kCloseHead)) { if (lvl == 0) { auto it_close_gt = std::find(lt, end, '>'); if (it_close_gt == end) { if (ieq_prefix(lt, end, "</textarea")) { auto after_head = lt + std::strlen("</textarea"); while (after_head != end && is_ws(static_cast<unsigned char>(*after_head))) { ++after_head; } if (after_head == end) { HTMLException excp; throw excp[HTMLException::Error] << "Parsing error: Unclosed </textarea> end tag."; } if (*after_head == '>') { ta->_Text.insert(ta->_Text.end(), content_begin, lt); const size_t consumed = static_cast<size_t>((it_close_gt - begin) + 1); const size_t consumed = static_cast<size_t>((after_head - begin) + 1); return consumed; } else { --lvl; it = lt + (sizeof(kCloseHead) - 1); continue; } } static constexpr char kOpenHead[] = "<textarea"; if (remain_after_lt >= static_cast<ptrdiff_t>(sizeof(kOpenHead) - 1) && std::equal(lt, lt + (sizeof(kOpenHead) - 1), kOpenHead)) { ++lvl; it = lt + (sizeof(kOpenHead) - 1); it = lt + 1; continue; } Loading @@ -1682,6 +1713,7 @@ size_t libhtmlpp::TextArea::parseElement(const std::vector<char>& in, } libhtmlpp::HtmlPage::HtmlPage(){ } Loading
src/html.h +1 −1 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ namespace libhtmlpp { std::unique_ptr<Attributes> _nextAttr; }; void _serialelize(std::vector<char> in); void _serialelize(const std::vector<char> &in); private: //if text tagname must be zero std::vector<char> _TagName; Loading