Commit a6055168 authored by jan.koester's avatar jan.koester
Browse files

some fixes

parent 8251b956
Loading
Loading
Loading
Loading
+185 −111
Original line number Diff line number Diff line
@@ -263,17 +263,17 @@ void libhtmlpp::HtmlString::_buildtreenode(
    }

    struct Frame {
        DocElements *open;
        DocElements *close;
        const DocElements *outer_end;
        Element *outer_prev_el;
        DocElements *open;              // Opener-DocElement (Start-Tag)
        DocElements *close;             // passender Terminator-DocElement (End-Tag)
        const DocElements *outer_end;   // Grenze der aktuellen Ebene
        Element *outer_prev_el;         // letztes bereits eingebautes Geschwister der äußeren Ebene
    };
    std::stack<Frame> stack;

    DocElements *start = firstel;
    const DocElements *end = lastel;
    const DocElements *end = lastel;    // nullptr bedeutet: bis Ketten-Ende

    Element *prev_el_in_tree = nullptr;
    Element *prev_el_in_tree = nullptr; // zuletzt in den Baum eingebautes Element

    auto checkContainer = [&](const std::string &tag) {
        for (size_t i = 0; i < ContainerTypes.size(); ++i) {
@@ -282,6 +282,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
        return false;
    };

    // Leere DocElements überspringen (z. B. Kommentare/Text, die nicht als element abgebildet sind)
    auto skip_empty = [](DocElements *cur, const DocElements *stop) -> DocElements* {
        while (cur && cur != stop && (!cur->element)) {
            cur = cur->nextel.get();
@@ -289,6 +290,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
        return cur;
    };

    // Finde zum gegebenen Start-Tag dessen passenden Terminator in [open->nextel, bound).
    auto find_terminator = [&skip_empty, checkContainer](DocElements *open, const DocElements *bound) -> DocElements* {
        if (!open || !open->element || open->terminator ||
            open->element->getType() != HtmlEl) return nullptr;
@@ -305,7 +307,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
                const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname();
                if (curtag == tag) {
                    if (cur->terminator) {
                        if (nest == 0) return cur; // Found matching end tag
                        if (nest == 0) return cur; // passendes End-Tag gefunden
                        --nest;
                    } else {
                        ++nest;
@@ -315,6 +317,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
            cur = cur->nextel.get();
        }

        // Wenn ein Container nicht geschlossen wurde: Fehler werfen
        if (checkContainer(tag)) {
            HTMLException e;
            e[HTMLException::Error] << tag << " must be terminated ! "
@@ -325,54 +328,84 @@ void libhtmlpp::HtmlString::_buildtreenode(
    };

    for (;;) {
        // bis zum nächsten sinnvollen DocElement laufen
        start = skip_empty(start, end);

        // --- NEU: Terminator-Knoten auf dieser Ebene einfach überspringen ---
        // (End-Tags werden nur benutzt, um in find_terminator/Stack zu schließen,
        //  aber niemals als eigene Tree-Knoten eingebaut.)
        // Terminator-Knoten als eigenständige Nodes überspringen
        if (start && start != end && start->terminator) {
            start = start->nextel.get();
            continue;
        }
        // --------------------------------------------------------------------

        // Ende der aktuellen Ebene erreicht?
        if (!start || start == end) {
            if (stack.empty()) {
                // Ganz oben: Root setzen (falls vorhanden)
                if (firstel->element) {
                    html = std::move(firstel->element);
                }
                return;
            }

            // Frame schließen: Kinderbereich [open->nextel, close) einsammeln
            Frame fr = stack.top(); stack.pop();
            HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get());

            // SAFETY: open->nextel kann null sein
            if (fr.open->nextel && fr.open->nextel->element) {
                opener_el->_childElement = std::move(fr.open->nextel->element);
            // Alle Kinder zwischen open und close verketten
            Element* last_child_in_chain = nullptr;
            {
                DocElements* cur = fr.open->nextel.get();
                // bis zum ersten brauchbaren Kind
                while (cur && cur != fr.close && (!cur->element || cur->terminator)) {
                    cur = cur->nextel.get();
                }

                // alle nicht-leeren, nicht-Terminierer bis vor close anbinden
                while (cur && cur != fr.close) {
                    if (cur->element && !cur->terminator) {
                        if (!opener_el->_childElement) {
                            opener_el->_childElement = std::move(cur->element);
                            last_child_in_chain = opener_el->_childElement.get();
                        } else {
                            last_child_in_chain->_nextElement = std::move(cur->element);
                            // _prev (falls genutzt) setzen
                            last_child_in_chain->_nextElement->_prevElement = last_child_in_chain;
                            last_child_in_chain = last_child_in_chain->_nextElement.get();
                        }
                    }
                    cur = cur->nextel.get();
                    // leere/terminator Knoten überspringen
                    while (cur && cur != fr.close && (!cur->element || cur->terminator)) {
                        cur = cur->nextel.get();
                    }
                }
            }

            // dieses Container-Element ist nun das "aktuelle" in der äußeren Ebene
            prev_el_in_tree = opener_el;

            // Wenn es bereits ein vorheriges Geschwister in der äußeren Ebene gibt: verketten
            if (fr.outer_prev_el) {
                prev_el_in_tree->_prevElement = fr.outer_prev_el;
                fr.outer_prev_el->_nextElement = std::move(fr.open->element);
                prev_el_in_tree = fr.outer_prev_el->_nextElement.get();
            }

            // für die äußere Ebene fortsetzen
            prev_el_in_tree = opener_el;

            // SAFETY: close kann null sein (sollte nicht vorkommen, aber robust sein)
            start = (fr.close ? fr.close->nextel.get() : nullptr);
            end   = fr.outer_end;

            continue;
        }

        // Start-Tag eines HTML-Elements: passenden Terminator suchen → in den Stack tauchen
        if (start->element && !start->terminator && start->element->getType() == HtmlEl) {
            if (DocElements *close = find_terminator(start, end)) {
                // neuen Rahmen für diesen Container aufmachen
                stack.push(Frame{start, close, end, prev_el_in_tree});

                // wir wechseln in die innere Ebene: prev zurücksetzen
                prev_el_in_tree = nullptr;
                start           = start->nextel.get();
                end             = close;
@@ -380,6 +413,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
            }
        }

        // "normales" Element (kein Container mit eigenem Terminatorbereich):
        if (start->element && !start->terminator) {
            Element *current_el = start->element.get();

@@ -392,6 +426,7 @@ void libhtmlpp::HtmlString::_buildtreenode(
            }
        }

        // weiter zum nächsten DocElement
        start = start->nextel.get();
    }
}
@@ -705,84 +740,93 @@ void libhtmlpp::HtmlElement::remove(libhtmlpp::Element* el){
 */

void libhtmlpp::HtmlElement::_serialelize(std::vector<char> in) {
    size_t st=0,et=0;
     _TagName.clear();
    bool end_tag = false;

    bool send=false;
    size_t i = 0, n = in.size();
    while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;

    for (size_t i = 0; i < in.size() && !send; ++i) {
        switch (in[i]) {
            case '<':
                continue;
            case '/':
                continue;
            case ' ':
                continue;
            default:
                st=i;
                send=true;
        }
    if (i < n && in[i] == '/') {
        end_tag = true;
        ++i;
        while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
    }

    for(et=st; et<in.size(); et++){
        if(in[et]==' ' || in[et]=='/' || in[et]=='>' || in[et]=='\r' || in[et]=='\n'){
            break;
        }
    size_t st = i;
    while (i < n) {
        char c = in[i];
        if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break;
        ++i;
    }


    std::copy(in.begin()+st,in.begin()+(et),std::back_inserter(_TagName));

    if (_TagName.empty()) {
    size_t et = i;
    if (st == et) {
        HTMLException excp;
        throw excp[HTMLException::Critical] << "no tag in element found!";
    }

    _TagName.emplace_back('\0');
    _TagName.assign(in.begin() + st, in.begin() + et);
    for (auto &ch : _TagName) ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));

    for(size_t i=et; i<in.size(); i++){
        if(in[i]=='>')
    if (end_tag) {
        return;
        size_t kstart=std::string::npos,kend=std::string::npos;
        if(in[i]!=' ' && in[i]!='\r' && in[i]!='\n' ){
            kstart=i;
            kend=in.size();
            bool value=false;
            size_t vstart=std::string::npos,vend=std::string::npos;
            while(i<in.size()){
                if (in[i]=='='){
                    value=true;
                    kend=i++;
                    break;
    }

    while (i < n) {
        while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
        if (i >= n) break;

        if (in[i] == '/') {
            ++i;
            }
            if(value){
                while(i<in.size()){
                    if(in[i]=='\"'){
                        if (vstart==std::string::npos){
                            vstart=++i;
            while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;
            continue;
                        }else{
                            vend=i;
                            break;
        }
        if (in[i] == '>') {
            break;
        }

        size_t kstart = i;
        while (i < n) {
            char c = in[i];
            if (std::isspace(static_cast<unsigned char>(c)) || c == '=' || c == '/' || c == '>') break;
            ++i;
        }
        size_t kend = i;
        if (kstart == kend) {
            ++i;
            continue;
        }

            if(kstart !=std::string::npos){
        std::string key(in.begin() + kstart, in.begin() + kend);

                std::string key,val;
        while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;

                std::copy(in.begin()+kstart,in.begin()+kend,std::back_inserter(key));
        std::string val;

                if(vend!=std::string::npos && vstart!=std::string::npos){
                    std::copy(in.begin()+vstart,in.begin()+vend,std::back_inserter(val));
        if (i < n && in[i] == '=') {
            ++i;
            while (i < n && std::isspace(static_cast<unsigned char>(in[i]))) ++i;

            if (i < n && (in[i] == '"' || in[i] == '\'')) {
                char quote = in[i++];
                size_t vstart = i;
                while (i < n && in[i] != quote) ++i;
                size_t vend = i;
                val.assign(in.begin() + vstart, in.begin() + vend);
                if (i < n && in[i] == quote) ++i;
            } else {
                size_t vstart = i;
                while (i < n) {
                    char c = in[i];
                    if (std::isspace(static_cast<unsigned char>(c)) || c == '/' || c == '>') break;
                    ++i;
                }
                setAttribute(key,val);
                size_t vend = i;
                val.assign(in.begin() + vstart, in.begin() + vend);
            }
        } else {
            val = "true";
        }
        setAttribute(key, val);
    }
}

@@ -1172,40 +1216,56 @@ int libhtmlpp::TextElement::getType() const{
    return ElementType::TextEl;
}

size_t libhtmlpp::TextElement::parseElement(const std::vector<char> in, std::unique_ptr<libhtmlpp::Element>& el, size_t start,bool &termination){
size_t libhtmlpp::TextElement::parseElement(
    const std::vector<char>& in,
    std::unique_ptr<libhtmlpp::Element>& el,
    size_t start,
    bool &termination
){
    termination = false;

    std::vector<char> buf;
    bool empty=true,end=false;
    while(start<in.size() && !end){
        switch(in[start]){
            case HTMLTAG_OPEN: {
                if(!buf.empty()){
                    el=std::make_unique<TextElement>();
                    std::copy(buf.begin(),buf.end(),std::back_inserter((static_cast<TextElement*>(el.get()))->_Text));
                }
                end=true;
    buf.reserve(64);
    bool seen_nonws = false;
    bool last_was_space = false;

    size_t i = start;
    while (i < in.size()) {
        char c = in[i];
        if (c == HTMLTAG_OPEN) {
            break;
            };
            case '\r':{
                ++start;
            }break;
            case '\n':{
                ++start;
            }break;
            case ' ':{
                if(!empty)
                    buf.push_back(in[start++]);
                else
                    ++start;
            }break;
            default:{
                buf.push_back(in[start++]);
                empty=false;
            }break;
        }

        if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
            if (seen_nonws) {
                if (!last_was_space) {
                    buf.push_back(' ');
                    last_was_space = true;
                }
    return start;
            }
            ++i;
            continue;
        }

        buf.push_back(c);
        seen_nonws = true;
        last_was_space = false;
        ++i;
    }

    if (!buf.empty()) {
        auto text = std::make_unique<TextElement>();
        (static_cast<TextElement*>(text.get()))->_Text.insert(
            (static_cast<TextElement*>(text.get()))->_Text.end(),
            buf.begin(), buf.end()
        );
        el = std::move(text);
    }

    return i;
}


libhtmlpp::CommentElement::CommentElement() : Element(){
}

@@ -1247,24 +1307,38 @@ size_t libhtmlpp::CommentElement::parseElement(
    bool& termination
){
    termination = false;
    el = std::make_unique<CommentElement>();

    size_t i = start;

    if (i + 3 >= in.size()) return i;
    if (!(in[i] == '<' && in[i+1] == '!' && in[i+2] == '-' && in[i+3] == '-'))

    if (!(in[i] == '<' && in[i+1] == '!' && in[i+2] == '-' && in[i+3] == '-')) {
        return i;
    }

    el = std::make_unique<CommentElement>();

    i += 4;
    const size_t content_begin = i;

    while (i + 2 < in.size()) {
        if (in[i] == '-' && in[i+1] == '-' && in[i+2] == '>') {
            i += 3;             // nach "-->"
            break;
        }
        static_cast<CommentElement*>(el.get())->_Comment.push_back(in[i]);
        ++i;
    }

    auto* self = static_cast<CommentElement*>(el.get());
    if (i > content_begin) {
        self->_Comment.insert(self->_Comment.end(),
                              in.begin() + content_begin,
                              in.begin() + i);
    }

    if (i + 2 < in.size()) {
        i += 3;
    } else {
        i = in.size();
    }
    return i;
}

@@ -1714,7 +1788,7 @@ void libhtmlpp::print(const Element &element, HtmlString &output,bool formated)
    const Element *el=&element;

    auto isContainer = [](const std::string &tagname) {
        for(size_t i=0; !ContainerTypes[i].empty(); ++i){
        for(size_t i=0; i<ContainerTypes.size(); ++i){
            if(tagname==ContainerTypes[i])
                return true;
        }
+1 −1
Original line number Diff line number Diff line
@@ -187,7 +187,7 @@ namespace libhtmlpp {

        int         getType() const;

        static size_t parseElement(const std::vector<char> in,std::unique_ptr<libhtmlpp::Element> &el,size_t start,bool &termination);
        static size_t parseElement(const std::vector<char> &in,std::unique_ptr<libhtmlpp::Element> &el,size_t start,bool &termination);

    protected:
        std::vector<char> _Text;