Loading src/html.cpp +52 −0 Original line number Diff line number Diff line /** * @file html.cpp * @brief Definitions for libhtmlpp HTML element types and utilities. * @date 2025-10-30 * * This file is part of libhtmlpp and provides HTML parsing, DOM-like element * types, serialization helpers, and encoding utilities. */ /******************************************************************************* * Copyright (c) 2021, Jan Koester jan.koester@gmx.net * All rights reserved. Loading Loading @@ -43,6 +51,10 @@ #define HTMLTAG_TERMINATE '/' #define HTMLTAG_CLOSE '>' #define HTMLTAG_COMMENT '!' /** * @namespace libhtmlpp * @brief Core namespace for the libhtmlpp HTML parsing and printing library. */ namespace libhtmlpp { Loading Loading @@ -374,6 +386,10 @@ void libhtmlpp::HtmlString::_buildtreenode( start = start->nextel.get(); } } /** * @brief Tokenizes the buffer and constructs the intermediate DocElements list, * then converts it into a tree stored in @_rootEl. */ void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; Loading Loading @@ -527,11 +543,23 @@ void libhtmlpp::HtmlString::_buildTree() { _buildtreenode(firstEl.get(),nullptr,_rootEl); } /** * @brief Streams an HtmlString to an output stream using its underlying string. * @param os Output stream. * @param p HtmlString to stream. * @return Reference to @p os. */ std::ostream& operator<<(std::ostream& os, const libhtmlpp::HtmlString& p) { os << p.str(); return os; } /** * @brief Extracts tag name and attributes from a token vector into an HtmlElement. * @param in Token buffer containing a single open tag. * @param out Destination HtmlElement to populate. * @throws HTMLException if no tag could be determined. */ void libhtmlpp::HtmlString::_serialelize(std::vector<char> in, libhtmlpp::HtmlElement *out) { size_t st=0,et=0; Loading Loading @@ -1241,6 +1269,12 @@ libhtmlpp::HtmlPage::HtmlPage(){ libhtmlpp::HtmlPage::~HtmlPage(){ } /** * @brief Loads an HTML file from disk into a given HtmlElement root. * @param html Output root element to populate. * @param path Filesystem path to the HTML file. * @throws HTMLException if the file cannot be opened or parsed. */ void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::string& path){ std::string data; Loading @@ -1264,6 +1298,12 @@ void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::strin _CheckHeader(data); loadString(html,data); } /** * @brief Parses an HTML source string and copies the result into @p html. * @param html Output root element to populate. * @param src Input HTML source. * @throws HTMLException on malformed input. */ void libhtmlpp::HtmlPage::loadString(libhtmlpp::HtmlElement &html,const std::string &src){ HtmlString buf=src; Loading @@ -1285,6 +1325,12 @@ void loadString(libhtmlpp::HtmlElement &html,const libhtmlpp::HtmlString *node){ libhtmlpp::HtmlString buf=*node; html=(libhtmlpp::HtmlElement&)buf.parse(); } /** * @brief Serializes an HtmlElement subtree and writes it to a file. * @param html Element to serialize. * @param path Destination file path. * @throws HTMLException on I/O errors. */ void libhtmlpp::HtmlPage::saveFile(libhtmlpp::HtmlElement &html,const std::string& path){ HtmlString data; Loading Loading @@ -1389,6 +1435,12 @@ void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){ _Html5=!html4; } /** * @brief Serializes an element (and its subtree) into an HtmlString (definition). * @param element Root element to print. * @param output Output buffer to append serialized HTML to. * @param formated If true, pretty-prints the output with line breaks/indentation. */ void libhtmlpp::print(const Element &element, HtmlString &output,bool formated) { Loading src/html.h +63 −0 Original line number Diff line number Diff line /** * @file html.h * @brief Public declarations for libhtmlpp HTML element types and utilities. * @date 2025-10-30 * * This file is part of libhtmlpp and provides HTML parsing, DOM-like element * types, serialization helpers, and encoding utilities. */ /******************************************************************************* Copyright (c) 2014, Jan Koester jan.koester@gmx.net All rights reserved. Loading Loading @@ -34,10 +42,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <memory> #pragma once /** * @namespace libhtmlpp * @brief Core namespace for the libhtmlpp HTML parsing and printing library. */ namespace libhtmlpp { /** * @brief Internal helper frame used while building the tree during parsing. */ class DocElements; /** * @brief Concrete element representing a generic HTML tag with attributes and child/next links. */ class HtmlElement; /** * @brief Mutable string buffer for HTML input/output with parsing facilities. */ class HtmlString; enum ElementType{ Loading @@ -47,6 +68,9 @@ namespace libhtmlpp { ScriptEL=3, SvgEL=4 }; /** * @brief Abstract base class for all nodes in the HTML tree. Provides linkage and common operations. */ class Element { public: Loading Loading @@ -140,6 +164,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Leaf node representing plain text content of an HTML document. */ class TextElement : public Element { public: Loading @@ -161,6 +188,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Leaf node representing an HTML comment (<!-- -->). */ class CommentElement : public Element{ public: Loading @@ -181,6 +211,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Element representing a <script> tag and its text content. */ class ScriptElement : public HtmlElement{ public: Loading Loading @@ -210,6 +243,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Element representing an embedded <svg> tag and its attributes/content. */ class SvgElement : public HtmlElement{ public: Loading Loading @@ -239,6 +275,13 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Serializes an element (and its subtree) into an HtmlString. * @param element Root element to print. * @param output Output buffer to append serialized HTML to. * @param formated If true, pretty-prints the output with line breaks/indentation. */ void print(const Element& element, HtmlString &output,bool formated=false); Loading Loading @@ -284,6 +327,11 @@ namespace libhtmlpp { const std::vector<char>& data() const; const std::string str() const; const char *c_str() const; /** * @brief Parses the current buffer into a DOM-like tree and returns the root element. * @return Reference to the root Element stored internally. * @throws HTMLException on malformed input. */ libhtmlpp::Element &parse(); Loading @@ -296,9 +344,24 @@ namespace libhtmlpp { friend void HtmlEncode(const std::string &input,HtmlString *output); friend class HtmlPage; }; /** * @brief Encodes special HTML characters in a string and appends to an HtmlString. * @param input Plain input string. * @param output Destination HtmlString that receives encoded characters. */ void HtmlEncode(const std::string &input,HtmlString *output); /** * @brief Encodes special HTML characters in a string and writes into std::string. * @param input Plain input string. * @param output Receives encoded HTML string. */ void HtmlEncode(const std::string &input,std::string &output); /** * @brief High level loader/saver for HTML documents (files and strings). */ class HtmlPage { public: Loading Loading
src/html.cpp +52 −0 Original line number Diff line number Diff line /** * @file html.cpp * @brief Definitions for libhtmlpp HTML element types and utilities. * @date 2025-10-30 * * This file is part of libhtmlpp and provides HTML parsing, DOM-like element * types, serialization helpers, and encoding utilities. */ /******************************************************************************* * Copyright (c) 2021, Jan Koester jan.koester@gmx.net * All rights reserved. Loading Loading @@ -43,6 +51,10 @@ #define HTMLTAG_TERMINATE '/' #define HTMLTAG_CLOSE '>' #define HTMLTAG_COMMENT '!' /** * @namespace libhtmlpp * @brief Core namespace for the libhtmlpp HTML parsing and printing library. */ namespace libhtmlpp { Loading Loading @@ -374,6 +386,10 @@ void libhtmlpp::HtmlString::_buildtreenode( start = start->nextel.get(); } } /** * @brief Tokenizes the buffer and constructs the intermediate DocElements list, * then converts it into a tree stored in @_rootEl. */ void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; Loading Loading @@ -527,11 +543,23 @@ void libhtmlpp::HtmlString::_buildTree() { _buildtreenode(firstEl.get(),nullptr,_rootEl); } /** * @brief Streams an HtmlString to an output stream using its underlying string. * @param os Output stream. * @param p HtmlString to stream. * @return Reference to @p os. */ std::ostream& operator<<(std::ostream& os, const libhtmlpp::HtmlString& p) { os << p.str(); return os; } /** * @brief Extracts tag name and attributes from a token vector into an HtmlElement. * @param in Token buffer containing a single open tag. * @param out Destination HtmlElement to populate. * @throws HTMLException if no tag could be determined. */ void libhtmlpp::HtmlString::_serialelize(std::vector<char> in, libhtmlpp::HtmlElement *out) { size_t st=0,et=0; Loading Loading @@ -1241,6 +1269,12 @@ libhtmlpp::HtmlPage::HtmlPage(){ libhtmlpp::HtmlPage::~HtmlPage(){ } /** * @brief Loads an HTML file from disk into a given HtmlElement root. * @param html Output root element to populate. * @param path Filesystem path to the HTML file. * @throws HTMLException if the file cannot be opened or parsed. */ void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::string& path){ std::string data; Loading @@ -1264,6 +1298,12 @@ void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::strin _CheckHeader(data); loadString(html,data); } /** * @brief Parses an HTML source string and copies the result into @p html. * @param html Output root element to populate. * @param src Input HTML source. * @throws HTMLException on malformed input. */ void libhtmlpp::HtmlPage::loadString(libhtmlpp::HtmlElement &html,const std::string &src){ HtmlString buf=src; Loading @@ -1285,6 +1325,12 @@ void loadString(libhtmlpp::HtmlElement &html,const libhtmlpp::HtmlString *node){ libhtmlpp::HtmlString buf=*node; html=(libhtmlpp::HtmlElement&)buf.parse(); } /** * @brief Serializes an HtmlElement subtree and writes it to a file. * @param html Element to serialize. * @param path Destination file path. * @throws HTMLException on I/O errors. */ void libhtmlpp::HtmlPage::saveFile(libhtmlpp::HtmlElement &html,const std::string& path){ HtmlString data; Loading Loading @@ -1389,6 +1435,12 @@ void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){ _Html5=!html4; } /** * @brief Serializes an element (and its subtree) into an HtmlString (definition). * @param element Root element to print. * @param output Output buffer to append serialized HTML to. * @param formated If true, pretty-prints the output with line breaks/indentation. */ void libhtmlpp::print(const Element &element, HtmlString &output,bool formated) { Loading
src/html.h +63 −0 Original line number Diff line number Diff line /** * @file html.h * @brief Public declarations for libhtmlpp HTML element types and utilities. * @date 2025-10-30 * * This file is part of libhtmlpp and provides HTML parsing, DOM-like element * types, serialization helpers, and encoding utilities. */ /******************************************************************************* Copyright (c) 2014, Jan Koester jan.koester@gmx.net All rights reserved. Loading Loading @@ -34,10 +42,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <memory> #pragma once /** * @namespace libhtmlpp * @brief Core namespace for the libhtmlpp HTML parsing and printing library. */ namespace libhtmlpp { /** * @brief Internal helper frame used while building the tree during parsing. */ class DocElements; /** * @brief Concrete element representing a generic HTML tag with attributes and child/next links. */ class HtmlElement; /** * @brief Mutable string buffer for HTML input/output with parsing facilities. */ class HtmlString; enum ElementType{ Loading @@ -47,6 +68,9 @@ namespace libhtmlpp { ScriptEL=3, SvgEL=4 }; /** * @brief Abstract base class for all nodes in the HTML tree. Provides linkage and common operations. */ class Element { public: Loading Loading @@ -140,6 +164,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Leaf node representing plain text content of an HTML document. */ class TextElement : public Element { public: Loading @@ -161,6 +188,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Leaf node representing an HTML comment (<!-- -->). */ class CommentElement : public Element{ public: Loading @@ -181,6 +211,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Element representing a <script> tag and its text content. */ class ScriptElement : public HtmlElement{ public: Loading Loading @@ -210,6 +243,9 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Element representing an embedded <svg> tag and its attributes/content. */ class SvgElement : public HtmlElement{ public: Loading Loading @@ -239,6 +275,13 @@ namespace libhtmlpp { friend void print(const Element& element, HtmlString &output,bool formated); friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src); }; /** * @brief Serializes an element (and its subtree) into an HtmlString. * @param element Root element to print. * @param output Output buffer to append serialized HTML to. * @param formated If true, pretty-prints the output with line breaks/indentation. */ void print(const Element& element, HtmlString &output,bool formated=false); Loading Loading @@ -284,6 +327,11 @@ namespace libhtmlpp { const std::vector<char>& data() const; const std::string str() const; const char *c_str() const; /** * @brief Parses the current buffer into a DOM-like tree and returns the root element. * @return Reference to the root Element stored internally. * @throws HTMLException on malformed input. */ libhtmlpp::Element &parse(); Loading @@ -296,9 +344,24 @@ namespace libhtmlpp { friend void HtmlEncode(const std::string &input,HtmlString *output); friend class HtmlPage; }; /** * @brief Encodes special HTML characters in a string and appends to an HtmlString. * @param input Plain input string. * @param output Destination HtmlString that receives encoded characters. */ void HtmlEncode(const std::string &input,HtmlString *output); /** * @brief Encodes special HTML characters in a string and writes into std::string. * @param input Plain input string. * @param output Receives encoded HTML string. */ void HtmlEncode(const std::string &input,std::string &output); /** * @brief High level loader/saver for HTML documents (files and strings). */ class HtmlPage { public: Loading