Commit a766a939 authored by jan.koester's avatar jan.koester
Browse files

documented

parent 4ee155f6
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
/**
 * @file html.cpp
 * @brief Definitions for libhtmlpp HTML element types and utilities.
 * @date 2025-10-30
 *
 * This file is part of libhtmlpp and provides HTML parsing, DOM-like element
 * types, serialization helpers, and encoding utilities.
 */
/*******************************************************************************
 * Copyright (c) 2021, Jan Koester jan.koester@gmx.net
 * All rights reserved.
@@ -43,6 +51,10 @@
#define HTMLTAG_TERMINATE '/'
#define HTMLTAG_CLOSE '>'
#define HTMLTAG_COMMENT '!'
/**
 * @namespace libhtmlpp
 * @brief Core namespace for the libhtmlpp HTML parsing and printing library.
 */

namespace libhtmlpp {

@@ -374,6 +386,10 @@ void libhtmlpp::HtmlString::_buildtreenode(
        start = start->nextel.get();
    }
}
/**
 * @brief Tokenizes the buffer and constructs the intermediate DocElements list,
 * then converts it into a tree stored in @_rootEl.
 */

void libhtmlpp::HtmlString::_buildTree() {
    DocElements *lastEl = nullptr;
@@ -527,11 +543,23 @@ void libhtmlpp::HtmlString::_buildTree() {

    _buildtreenode(firstEl.get(),nullptr,_rootEl);
}
/**
 * @brief Streams an HtmlString to an output stream using its underlying string.
 * @param os Output stream.
 * @param p HtmlString to stream.
 * @return Reference to @p os.
 */

std::ostream& operator<<(std::ostream& os, const libhtmlpp::HtmlString& p) {
    os << p.str();
    return os;
}
/**
 * @brief Extracts tag name and attributes from a token vector into an HtmlElement.
 * @param in Token buffer containing a single open tag.
 * @param out Destination HtmlElement to populate.
 * @throws HTMLException if no tag could be determined.
 */

void libhtmlpp::HtmlString::_serialelize(std::vector<char> in, libhtmlpp::HtmlElement *out) {
    size_t st=0,et=0;
@@ -1241,6 +1269,12 @@ libhtmlpp::HtmlPage::HtmlPage(){

libhtmlpp::HtmlPage::~HtmlPage(){
}
/**
 * @brief Loads an HTML file from disk into a given HtmlElement root.
 * @param html Output root element to populate.
 * @param path Filesystem path to the HTML file.
 * @throws HTMLException if the file cannot be opened or parsed.
 */

void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::string& path){
    std::string data;
@@ -1264,6 +1298,12 @@ void libhtmlpp::HtmlPage::loadFile(libhtmlpp::HtmlElement &html,const std::strin
    _CheckHeader(data);
    loadString(html,data);
}
/**
 * @brief Parses an HTML source string and copies the result into @p html.
 * @param html Output root element to populate.
 * @param src Input HTML source.
 * @throws HTMLException on malformed input.
 */

void libhtmlpp::HtmlPage::loadString(libhtmlpp::HtmlElement &html,const std::string &src){
    HtmlString buf=src;
@@ -1285,6 +1325,12 @@ void loadString(libhtmlpp::HtmlElement &html,const libhtmlpp::HtmlString *node){
    libhtmlpp::HtmlString buf=*node;
    html=(libhtmlpp::HtmlElement&)buf.parse();
}
/**
 * @brief Serializes an HtmlElement subtree and writes it to a file.
 * @param html Element to serialize.
 * @param path Destination file path.
 * @throws HTMLException on I/O errors.
 */

void libhtmlpp::HtmlPage::saveFile(libhtmlpp::HtmlElement &html,const std::string& path){
    HtmlString data;
@@ -1389,6 +1435,12 @@ void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){
    _Html5=!html4;

}
/**
 * @brief Serializes an element (and its subtree) into an HtmlString (definition).
 * @param element Root element to print.
 * @param output Output buffer to append serialized HTML to.
 * @param formated If true, pretty-prints the output with line breaks/indentation.
 */

void libhtmlpp::print(const Element &element, HtmlString &output,bool formated) {

+63 −0
Original line number Diff line number Diff line
/**
 * @file html.h
 * @brief Public declarations for libhtmlpp HTML element types and utilities.
 * @date 2025-10-30
 *
 * This file is part of libhtmlpp and provides HTML parsing, DOM-like element
 * types, serialization helpers, and encoding utilities.
 */
/*******************************************************************************
Copyright (c) 2014, Jan Koester jan.koester@gmx.net
All rights reserved.
@@ -34,10 +42,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <memory>

#pragma once
/**
 * @namespace libhtmlpp
 * @brief Core namespace for the libhtmlpp HTML parsing and printing library.
 */

namespace libhtmlpp {
/**
 * @brief Internal helper frame used while building the tree during parsing.
 */
    class DocElements;
/**
 * @brief Concrete element representing a generic HTML tag with attributes and child/next links.
 */
    class HtmlElement;
/**
 * @brief Mutable string buffer for HTML input/output with parsing facilities.
 */
    class HtmlString;

    enum ElementType{
@@ -47,6 +68,9 @@ namespace libhtmlpp {
        ScriptEL=3,
        SvgEL=4
    };
/**
 * @brief Abstract base class for all nodes in the HTML tree. Provides linkage and common operations.
 */

    class Element {
    public:
@@ -140,6 +164,9 @@ namespace libhtmlpp {
        friend void  print(const Element& element, HtmlString &output,bool formated);
        friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src);
    };
/**
 * @brief Leaf node representing plain text content of an HTML document.
 */

    class TextElement : public Element {
    public:
@@ -161,6 +188,9 @@ namespace libhtmlpp {
        friend void  print(const Element& element, HtmlString &output,bool formated);
        friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src);
    };
/**
 * @brief Leaf node representing an HTML comment (<!-- -->).
 */

    class CommentElement : public Element{
    public:
@@ -181,6 +211,9 @@ namespace libhtmlpp {
        friend void  print(const Element& element, HtmlString &output,bool formated);
        friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src);
    };
/**
 * @brief Element representing a <script> tag and its text content.
 */

    class ScriptElement : public HtmlElement{
    public:
@@ -210,6 +243,9 @@ namespace libhtmlpp {
        friend void  print(const Element& element, HtmlString &output,bool formated);
        friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src);
    };
/**
 * @brief Element representing an embedded <svg> tag and its attributes/content.
 */

    class SvgElement : public HtmlElement{
    public:
@@ -239,6 +275,13 @@ namespace libhtmlpp {
        friend void  print(const Element& element, HtmlString &output,bool formated);
        friend void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src);
    };
/**
 * @brief Serializes an element (and its subtree) into an HtmlString.
 * @param element Root element to print.
 * @param output Output buffer to append serialized HTML to.
 * @param formated If true, pretty-prints the output with line breaks/indentation.
 */


    void print(const Element& element, HtmlString &output,bool formated=false);

@@ -284,6 +327,11 @@ namespace libhtmlpp {
        const std::vector<char>&  data() const;
        const std::string                  str() const;
        const char                         *c_str() const;
/**
 * @brief Parses the current buffer into a DOM-like tree and returns the root element.
 * @return Reference to the root Element stored internally.
 * @throws HTMLException on malformed input.
 */

        libhtmlpp::Element &parse();

@@ -296,9 +344,24 @@ namespace libhtmlpp {
        friend void HtmlEncode(const std::string &input,HtmlString *output);
        friend class HtmlPage;
    };
/**
 * @brief Encodes special HTML characters in a string and appends to an HtmlString.
 * @param input Plain input string.
 * @param output Destination HtmlString that receives encoded characters.
 */


    void HtmlEncode(const std::string &input,HtmlString *output);
/**
 * @brief Encodes special HTML characters in a string and writes into std::string.
 * @param input Plain input string.
 * @param output Receives encoded HTML string.
 */

    void HtmlEncode(const std::string &input,std::string &output);
/**
 * @brief High level loader/saver for HTML documents (files and strings).
 */

    class HtmlPage {
    public: