Commit 8251b956 authored by jan.koester's avatar jan.koester
Browse files

test

parent 93fdbaa2
Loading
Loading
Loading
Loading
+119 −86
Original line number Diff line number Diff line
@@ -277,9 +277,7 @@ void libhtmlpp::HtmlString::_buildtreenode(

    auto checkContainer = [&](const std::string &tag) {
        for (size_t i = 0; i < ContainerTypes.size(); ++i) {
            if (tag == ContainerTypes[i]) {
                return true;
            }
            if (tag == ContainerTypes[i]) return true;
        }
        return false;
    };
@@ -293,8 +291,7 @@ void libhtmlpp::HtmlString::_buildtreenode(

    auto find_terminator = [&skip_empty, checkContainer](DocElements *open, const DocElements *bound) -> DocElements* {
        if (!open || !open->element || open->terminator ||
            open->element->getType() != HtmlEl)
            return nullptr;
            open->element->getType() != HtmlEl) return nullptr;

        const std::string &tag = static_cast<HtmlElement*>(open->element.get())->getTagname();
        int nest = 0;
@@ -302,15 +299,13 @@ void libhtmlpp::HtmlString::_buildtreenode(

        while (cur && cur != bound) {
            cur = skip_empty(cur, bound);

            if (!cur || cur == bound) break;

            if (cur->element && cur->element->getType() == HtmlEl) {
                const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname();

                if (curtag == tag) {
                    if (cur->terminator) {
                        if (nest == 0) return cur; // Found!
                        if (nest == 0) return cur; // Found matching end tag
                        --nest;
                    } else {
                        ++nest;
@@ -332,6 +327,15 @@ void libhtmlpp::HtmlString::_buildtreenode(
    for (;;) {
        start = skip_empty(start, end);

        // --- NEU: Terminator-Knoten auf dieser Ebene einfach überspringen ---
        // (End-Tags werden nur benutzt, um in find_terminator/Stack zu schließen,
        //  aber niemals als eigene Tree-Knoten eingebaut.)
        if (start && start != end && start->terminator) {
            start = start->nextel.get();
            continue;
        }
        // --------------------------------------------------------------------

        if (!start || start == end) {
            if (stack.empty()) {
                if (firstel->element) {
@@ -343,7 +347,8 @@ void libhtmlpp::HtmlString::_buildtreenode(
            Frame fr = stack.top(); stack.pop();
            HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get());

            if (fr.open->nextel->element ) {
            // SAFETY: open->nextel kann null sein
            if (fr.open->nextel && fr.open->nextel->element) {
                opener_el->_childElement = std::move(fr.open->nextel->element);
            }

@@ -356,7 +361,9 @@ void libhtmlpp::HtmlString::_buildtreenode(
            }

            prev_el_in_tree = opener_el;
            start           = fr.close->nextel.get();

            // SAFETY: close kann null sein (sollte nicht vorkommen, aber robust sein)
            start = (fr.close ? fr.close->nextel.get() : nullptr);
            end   = fr.outer_end;

            continue;
@@ -364,19 +371,16 @@ void libhtmlpp::HtmlString::_buildtreenode(

        if (start->element && !start->terminator && start->element->getType() == HtmlEl) {
            if (DocElements *close = find_terminator(start, end)) {

                stack.push(Frame{start, close, end, prev_el_in_tree});

                prev_el_in_tree = nullptr;
                start           = start->nextel.get();
                end             = close;

                continue;
            }
        }

        if (start->element && !start->terminator) {

            Element *current_el = start->element.get();

            if (prev_el_in_tree) {
@@ -387,9 +391,11 @@ void libhtmlpp::HtmlString::_buildtreenode(
                prev_el_in_tree = current_el;
            }
        }

        start = start->nextel.get();
    }
}

/**
 * @brief Tokenizes the buffer and constructs the intermediate DocElements list,
 * then converts it into a tree stored in @_rootEl.
@@ -399,6 +405,10 @@ void libhtmlpp::HtmlString::_buildTree() {
    DocElements *lastEl = nullptr;
    std::unique_ptr<DocElements> firstEl = nullptr;

    auto is_ignore_sign = [](unsigned char ch){
        return ch == '\r' || ch == '\t' || ch == '\n' || ch == ' ';
    };

    auto starts_with_ci = [&] (const std::vector<char> &s, size_t pos,const std::string_view k) {
        if (pos + k.size() > s.size()) return false;
        for (size_t i = 0; i < k.size(); ++i) {
@@ -441,7 +451,7 @@ void libhtmlpp::HtmlString::_buildTree() {
                addelement(&lastEl);
                ii = HtmlElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator);
            }
        } else if (!std::isspace(static_cast<unsigned char>(_Data[ii]))) {
        } else if (!is_ignore_sign(static_cast<unsigned char>(_Data[ii]))) {
            addelement(&lastEl);
            ii = TextElement::parseElement(_Data, lastEl->element, ii, lastEl->terminator);
        } else {
@@ -770,19 +780,12 @@ void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) {
                if(vend!=std::string::npos && vstart!=std::string::npos){
                    std::copy(in.begin()+vstart,in.begin()+vend,std::back_inserter(val));
                }

                std::cout << _TagName.data() << "->" << key << ": " << val << std::endl;

                setAttribute(key,val);
            }
        }
    }
}

#include <cctype>
#include <vector>
#include <memory>

size_t libhtmlpp::HtmlElement::parseElement(
    const std::vector<char>& in,
    std::unique_ptr<libhtmlpp::Element>& el,
@@ -791,47 +794,39 @@ size_t libhtmlpp::HtmlElement::parseElement(
){
    el = std::make_unique<HtmlElement>();
    termination = false;
    bool selfClosing = false;

    size_t i = start;
    if (i >= in.size() || in[i] != HTMLTAG_OPEN) {
        return i;
    }
    if (i >= in.size() || in[i] != HTMLTAG_OPEN) return i;

    ++i;

    while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i;

    if (i < in.size() && in[i] == HTMLTAG_TERMINATE) { // '/'
        termination = true;
        ++i; // '/' konsumieren
        while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
    }

    std::vector<char> tel;
    while (i < in.size() && in[i] != HTMLTAG_CLOSE) {  // '>'
        tel.emplace_back(in[i]);
        ++i;

    size_t close = i;
    while (close < in.size() && in[close] != HTMLTAG_CLOSE) { // '>'
        ++close;
    }

    while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back())))
        tel.pop_back();
    size_t k = i;
    while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k;

    if (!termination && !tel.empty() && tel.back() == HTMLTAG_TERMINATE) {
        selfClosing = true;
        tel.pop_back(); // den '/' entfernen
        while (!tel.empty() && std::isspace(static_cast<unsigned char>(tel.back())))
            tel.pop_back();
    std::vector<char> tel;

    if (k < close && in[k] == HTMLTAG_TERMINATE) { // '/'
        termination = true;
        ++k;
        while (k < close && std::isspace(static_cast<unsigned char>(in[k]))) ++k;
    }

    reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel);
    tel.insert(tel.end(), in.begin() + k, in.begin() + close);

    if (i < in.size() && in[i] == HTMLTAG_CLOSE) ++i;
    reinterpret_cast<HtmlElement*>(el.get())->_serialelize(tel);

    return i;
    return ++close;
}


namespace libhtmlpp {

    void _copy(libhtmlpp::Element *dest,const libhtmlpp::Element *src){
@@ -1245,28 +1240,34 @@ int libhtmlpp::CommentElement::getType() const{
    return ElementType::CommentEl;
}

size_t libhtmlpp::CommentElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){
    size_t startel=start+3;
    while(start<in.size()){
        if(std::equal(in.begin()+start,in.begin()+(start+3),"-->")) {
size_t libhtmlpp::CommentElement::parseElement(
    const std::vector<char>& in,
    std::unique_ptr<Element>& el,
    size_t start,
    bool& termination
){
    termination = false;
    el = std::make_unique<CommentElement>();
            std::copy(in.begin()+startel,in.begin()+start,
                      std::back_inserter(static_cast<CommentElement*>(el.get())->_Comment));

            start+=2;
    size_t i = start;

    if (i + 3 >= in.size()) return i;
    if (!(in[i] == '<' && in[i+1] == '!' && in[i+2] == '-' && in[i+3] == '-'))
        return i;

    i += 4;

    while (i + 2 < in.size()) {
        if (in[i] == '-' && in[i+1] == '-' && in[i+2] == '>') {
            i += 3;             // nach "-->"
            break;
        }
        ++start;
        static_cast<CommentElement*>(el.get())->_Comment.push_back(in[i]);
        ++i;
    }

    if (start == in.size()) {
        HTMLException excp;
        throw excp[HTMLException::Error] << "Parsing error: Missing --> closing tag.";
    return i;
}

    return start;
}

libhtmlpp::ScriptElement::ScriptElement() : HtmlElement("script"){
}
@@ -1301,43 +1302,75 @@ int libhtmlpp::ScriptElement::getType() const{
    return ElementType::ScriptEL;
}

size_t libhtmlpp::ScriptElement::parseElement(const std::vector<char> &in, std::unique_ptr<libhtmlpp::Element>& el, size_t start, bool& termination){
    size_t startel=start;
size_t libhtmlpp::ScriptElement::parseElement(
    const std::vector<char>& in,
    std::unique_ptr<Element>& el,
    size_t start,
    bool& termination
){
    termination = false;

    while(start<in.size()){
        if(in[start]==HTMLTAG_CLOSE) {
    el = std::make_unique<ScriptElement>();
            std::vector<char> tel;
    auto* self = static_cast<ScriptElement*>(el.get()); // _Script ist hier gültig

            std::copy(in.begin()+startel,in.begin()+start,std::back_inserter(tel));
    auto iequals = [](char a, char b) {
        return std::tolower(static_cast<unsigned char>(a)) ==
               std::tolower(static_cast<unsigned char>(b));
    };
    auto match_ci = [&](size_t pos, const char* k) -> bool {
        for (size_t i = 0; k[i]; ++i) {
            if (pos + i >= in.size()) return false;
            if (!iequals(in[pos + i], k[i])) return false;
        }
        return true;
    };

            static_cast<ScriptElement*>(el.get())->_serialelize(tel);
    size_t i = start;

            break;
    if (i >= in.size() || in[i] != '<') return i;

    ++i; // '<'
    while (i < in.size() && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
    const char* kw = "script";
    for (size_t k = 0; kw[k] && i < in.size(); ++k, ++i) {
        if (!iequals(in[i], kw[k])) {
            while (i < in.size() && in[i] != '>') ++i;
            if (i < in.size()) ++i; // '>'
            return i;
        }
        ++start;
    }

    size_t close=++start;
    while (i < in.size() && in[i] != '>') ++i;
    if (i < in.size() && in[i] == '>') ++i; // '>' konsumieren

    while(start<in.size()){
        if( std::equal(in.begin()+start,in.begin()+(start+7),"</script")) {
    size_t content_begin = i;

            std::copy(in.begin()+close,in.begin()+start,
                      std::back_inserter(static_cast<ScriptElement*>(el.get())->_Script));
            start+=8;
            break;
        }
        ++start;
    for (; i < in.size(); ++i) {
        if (in[i] == '<' && match_ci(i, "</script")) {
            size_t j = i + 8; // strlen("</script") == 8
            while (j < in.size() && in[j] != '>') ++j;

            if (i > content_begin) {
                self->_Script.reserve(self->_Script.size() + (i - content_begin));
                std::copy(in.begin() + content_begin, in.begin() + i,
                          std::back_inserter(self->_Script));
            }

    if (start == in.size()) {
        HTMLException excp;
        throw excp[HTMLException::Error] << "Parsing error: Missing </script closing tag.";
            if (j < in.size() && in[j] == '>') {
                i = j + 1;
            }
            return i;
        }
    }

    return start;
    if (in.size() > content_begin) {
        self->_Script.reserve(self->_Script.size() + (in.size() - content_begin));
        std::copy(in.begin() + content_begin, in.end(),
                  std::back_inserter(self->_Script));
    }
    return i;
}



libhtmlpp::SvgElement::SvgElement() : HtmlElement("svg"){