Loading src/html.cpp +108 −92 Original line number Diff line number Diff line Loading @@ -238,22 +238,29 @@ void libhtmlpp::HtmlString::_buildtreenode( libhtmlpp::DocElements *lastel, std::unique_ptr<Element> &html) { if (!firstel || !lastel) { // FIX 1: Relax initial check. If firstel is non-null, lastel=nullptr is valid (end of list). if (!firstel) { HTMLException excp; excp[HTMLException::Error] << "No start Element!"; throw excp; } // If lastel is nullptr, it represents the end of the entire list. We don't check it here. struct Frame { DocElements *open; // opening tag holder DocElements *close; // its matching terminator const DocElements *outer_end; // end bound of the parent span DocElements *open; // The DocElement node of the opening tag (e.g., <div>) DocElements *close; // The DocElement node of the closing tag (e.g., </div>) const DocElements *outer_end; // The end bound of the parent's span Element *outer_prev_el; }; std::stack<Frame> stack; DocElements *start = firstel; const DocElements *end = lastel; Element *prev_el = nullptr; // last attached Element in the current span const DocElements *end = lastel; // end will be nullptr for the root call Element *prev_el_in_tree = nullptr; // parent_el is unused and can be removed, but we'll leave it for now. // HtmlElement *parent_el = nullptr; auto skip_empty = [](DocElements *cur, const DocElements *stop) -> DocElements* { while (cur && cur != stop && (!cur->element)) { Loading @@ -262,140 +269,150 @@ void libhtmlpp::HtmlString::_buildtreenode( return cur; }; // FIX 2: Cleaned and non-redundant find_terminator logic auto find_terminator = [&skip_empty](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; const std::string &tag = dynamic_cast<HtmlElement*>(open->element.get())->getTagname(); const std::string &tag = static_cast<HtmlElement*>(open->element.get())->getTagname(); int nest = 0; for (DocElements *cur = skip_empty(open->nextel.get(), bound); cur; cur = skip_empty(cur->nextel.get(), bound)) { DocElements *cur = open->nextel.get(); // 1. Search for the terminator up to the boundary while (cur && cur != bound) { cur = skip_empty(cur, bound); if (!cur || cur == bound) break; if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tag) { if (cur->terminator) { if (nest == 0) return cur; if (nest == 0) return cur; // Found! --nest; } else { ++nest; } } } if (cur == bound) return nullptr; cur = cur->nextel.get(); } // Require explicit terminator for container types // 2. Mandatory container check (Only one check here, only runs if search failed) // If the loop finished without finding the terminator, and the tag is mandatory, throw. for (size_t i = 0; ContainerTypes[i]; ++i) { if (tag == ContainerTypes[i]) { HTMLException e; e[HTMLException::Error] << tag << " must be terminated ! " << dynamic_cast<HtmlElement*>(open->element.get())->getAtributte("id"); << static_cast<HtmlElement*>(open->element.get())->getAtributte("id"); throw e; } } return nullptr; }; // --- Main Iterative Traversal Loop --- for (;;) { start = skip_empty(start, end); // finished current span? if (!start) { html = std::move(firstel->element); return; } // A. END OF CURRENT SCOPE CHECK (Base case/Return from recursion) if (!start || start == end) { // hit this span's terminator -> restore parent scope if (start == end) { // Final return for the root scope if (stack.empty()) { // If the root element (firstel->element) was never moved into a sibling chain // because it was the only element, move it now. // In a well-formed HTML (like <html>...</html>), the <html> opener // is the firstel. Its element is moved here only if it wasn't moved // into a sibling chain previously (which shouldn't happen). // The root element must be the *first* element in the list. // We move it only if its unique_ptr hasn't been moved yet. if (firstel->element) { html = std::move(firstel->element); return; } Frame fr = stack.top(); stack.pop(); // continue after </open> in outer scope prev_el = nullptr; start = fr.close->nextel.get(); end = fr.outer_end; continue; return; } // If this is an opener with a terminator inside [start, end) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { auto *hel = static_cast<HtmlElement*>(start->element.get()); // Restore parent scope (pop from stack) Frame fr = stack.top(); stack.pop(); HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get()); // Attach first child if present and not already attached if (!hel->_childElement && start->nextel && start->nextel->element) { std::cout << hel->getTagname() << std::endl; hel->_childElement = std::move(start->nextel->element); // CRITICAL STEP: Set parent's _childElement // The element at fr.open->nextel must be the first child of the completed child span. if (fr.open->nextel) { // If element is still here, it was the first child and its unique_ptr was not moved in Case C. if (fr.open->nextel->element) { opener_el->_childElement = std::move(fr.open->nextel->element); } // If it's empty, it means the ownership was transferred to a sibling in Case C, // which is correct for multi-child containers. } // If we have a first child, we will walk/link the whole child span now if (hel->_childElement) { // Push frame to restore after children stack.push(Frame{start, close, end}); // RESTORE STATE: The parent container (opener_el) itself is the last element // of its sibling chain so far. And restore the parent's previous sibling. prev_el_in_tree = opener_el; // Inside child span, prev_el is the first child's Element prev_el = hel->_childElement.get(); if (fr.outer_prev_el) { // Link the parent's previous element (fr.outer_prev_el) to the completed parent container (opener_el). prev_el_in_tree->_prevElement = fr.outer_prev_el; // Link the parent to its previous sibling fr.outer_prev_el->_nextElement = std::move(fr.open->element); // Transfer ownership of the PARENT container prev_el_in_tree = fr.outer_prev_el->_nextElement.get(); // Update to the new last element in the parent's chain } // Walk from the physical node after the (now moved-from) first-child holder DocElements *cur = skip_empty(start->nextel.get(), close); // Continue traversal after the container's closing tag prev_el_in_tree = opener_el; start = fr.close->nextel.get(); // Start after the closing tag end = fr.outer_end; // Restore parent's boundary // The first physical node after opener is now moved-from; advance one more if (cur && !cur->element) cur = skip_empty(cur->nextel.get(), close); continue; // Go to the next element in the parent's scope (sibling of container) } // Link subsequent siblings up to (but not including) the close tag while (cur && cur != close) { // Only link non-terminator, real elements if (cur->element && !cur->terminator) { // double-link: sibling prev cur->element->_prevElement = prev_el; // B. NEW CONTAINER FOUND (Recursive call/Push to stack) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { // move current element into prev_el->_nextElement prev_el->_nextElement = std::move(cur->element); // Push the current scope onto the stack stack.push(Frame{start, close, end, prev_el_in_tree}); // advance prev_el to the element we just attached prev_el = prev_el->_nextElement.get(); } cur = skip_empty(cur->nextel.get(), close); } // Set up the new, deeper scope (child span) prev_el_in_tree = nullptr; start = start->nextel.get(); end = close; // New boundary is the closing tag // dive into the children recursively by setting the current span to (open->next, close) prev_el = nullptr; start = skip_empty(start->nextel.get(), close); end = close; continue; } // No first child; fall through to normal forward progress. continue; // Restart the loop to process the first child } } // Normal forward progress (non-container or no terminator found) // C. SIBLING LINKING (Normal Forward Progress) // Only link elements that still exist in the DocElements node. if (start->element && !start->terminator) { // Chain siblings in the flat list so callers that inspect next links still see sequence if (prev_el) { start->element->_prevElement = prev_el; Element *current_el = start->element.get(); if (prev_el_in_tree) { // Link the previous element's next pointer to the current element, // transferring ownership from the DocElements list to the tree. current_el->_prevElement = prev_el_in_tree; prev_el_in_tree->_nextElement = std::move(start->element); // Update the end of the sibling chain to the newly moved element prev_el_in_tree = prev_el_in_tree->_nextElement.get(); } else { // This is the first element in the current sibling chain. // The element stays in start->element (firstel->element for the root, // or fr.open->nextel->element for children) until a subsequent sibling // is found, or the scope closes (in A). prev_el_in_tree = current_el; } if (start->nextel && start->nextel->element) { start->element->_nextElement = std::move(start->nextel->element); } prev_el = start->element.get(); // Skip over the (now moved-from) nextel holder if we consumed it start = start->nextel ? start->nextel->nextel.get() : start->nextel.get(); } else { // terminator or moved-from holder; just advance // Advance to the next DocElement node start = start->nextel.get(); } } } void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; Loading Loading @@ -520,7 +537,6 @@ void libhtmlpp::HtmlString::_buildTree() { addelement(&lastEl); lastEl->element=std::make_unique<TextElement>(); std::copy(buf.begin(),buf.end(),std::back_inserter((static_cast<TextElement*>(lastEl->element.get()))->_Text)); } goto BUILDTREE_STARTLOOP; }; Loading @@ -545,7 +561,7 @@ void libhtmlpp::HtmlString::_buildTree() { } } _buildtreenode(firstEl.get(),lastEl,_rootEl); _buildtreenode(firstEl.get(),nullptr,_rootEl); HtmlString test; print(*_rootEl,test,true); Loading Loading
src/html.cpp +108 −92 Original line number Diff line number Diff line Loading @@ -238,22 +238,29 @@ void libhtmlpp::HtmlString::_buildtreenode( libhtmlpp::DocElements *lastel, std::unique_ptr<Element> &html) { if (!firstel || !lastel) { // FIX 1: Relax initial check. If firstel is non-null, lastel=nullptr is valid (end of list). if (!firstel) { HTMLException excp; excp[HTMLException::Error] << "No start Element!"; throw excp; } // If lastel is nullptr, it represents the end of the entire list. We don't check it here. struct Frame { DocElements *open; // opening tag holder DocElements *close; // its matching terminator const DocElements *outer_end; // end bound of the parent span DocElements *open; // The DocElement node of the opening tag (e.g., <div>) DocElements *close; // The DocElement node of the closing tag (e.g., </div>) const DocElements *outer_end; // The end bound of the parent's span Element *outer_prev_el; }; std::stack<Frame> stack; DocElements *start = firstel; const DocElements *end = lastel; Element *prev_el = nullptr; // last attached Element in the current span const DocElements *end = lastel; // end will be nullptr for the root call Element *prev_el_in_tree = nullptr; // parent_el is unused and can be removed, but we'll leave it for now. // HtmlElement *parent_el = nullptr; auto skip_empty = [](DocElements *cur, const DocElements *stop) -> DocElements* { while (cur && cur != stop && (!cur->element)) { Loading @@ -262,140 +269,150 @@ void libhtmlpp::HtmlString::_buildtreenode( return cur; }; // FIX 2: Cleaned and non-redundant find_terminator logic auto find_terminator = [&skip_empty](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; const std::string &tag = dynamic_cast<HtmlElement*>(open->element.get())->getTagname(); const std::string &tag = static_cast<HtmlElement*>(open->element.get())->getTagname(); int nest = 0; for (DocElements *cur = skip_empty(open->nextel.get(), bound); cur; cur = skip_empty(cur->nextel.get(), bound)) { DocElements *cur = open->nextel.get(); // 1. Search for the terminator up to the boundary while (cur && cur != bound) { cur = skip_empty(cur, bound); if (!cur || cur == bound) break; if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); const std::string &curtag = static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tag) { if (cur->terminator) { if (nest == 0) return cur; if (nest == 0) return cur; // Found! --nest; } else { ++nest; } } } if (cur == bound) return nullptr; cur = cur->nextel.get(); } // Require explicit terminator for container types // 2. Mandatory container check (Only one check here, only runs if search failed) // If the loop finished without finding the terminator, and the tag is mandatory, throw. for (size_t i = 0; ContainerTypes[i]; ++i) { if (tag == ContainerTypes[i]) { HTMLException e; e[HTMLException::Error] << tag << " must be terminated ! " << dynamic_cast<HtmlElement*>(open->element.get())->getAtributte("id"); << static_cast<HtmlElement*>(open->element.get())->getAtributte("id"); throw e; } } return nullptr; }; // --- Main Iterative Traversal Loop --- for (;;) { start = skip_empty(start, end); // finished current span? if (!start) { html = std::move(firstel->element); return; } // A. END OF CURRENT SCOPE CHECK (Base case/Return from recursion) if (!start || start == end) { // hit this span's terminator -> restore parent scope if (start == end) { // Final return for the root scope if (stack.empty()) { // If the root element (firstel->element) was never moved into a sibling chain // because it was the only element, move it now. // In a well-formed HTML (like <html>...</html>), the <html> opener // is the firstel. Its element is moved here only if it wasn't moved // into a sibling chain previously (which shouldn't happen). // The root element must be the *first* element in the list. // We move it only if its unique_ptr hasn't been moved yet. if (firstel->element) { html = std::move(firstel->element); return; } Frame fr = stack.top(); stack.pop(); // continue after </open> in outer scope prev_el = nullptr; start = fr.close->nextel.get(); end = fr.outer_end; continue; return; } // If this is an opener with a terminator inside [start, end) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { auto *hel = static_cast<HtmlElement*>(start->element.get()); // Restore parent scope (pop from stack) Frame fr = stack.top(); stack.pop(); HtmlElement *opener_el = static_cast<HtmlElement*>(fr.open->element.get()); // Attach first child if present and not already attached if (!hel->_childElement && start->nextel && start->nextel->element) { std::cout << hel->getTagname() << std::endl; hel->_childElement = std::move(start->nextel->element); // CRITICAL STEP: Set parent's _childElement // The element at fr.open->nextel must be the first child of the completed child span. if (fr.open->nextel) { // If element is still here, it was the first child and its unique_ptr was not moved in Case C. if (fr.open->nextel->element) { opener_el->_childElement = std::move(fr.open->nextel->element); } // If it's empty, it means the ownership was transferred to a sibling in Case C, // which is correct for multi-child containers. } // If we have a first child, we will walk/link the whole child span now if (hel->_childElement) { // Push frame to restore after children stack.push(Frame{start, close, end}); // RESTORE STATE: The parent container (opener_el) itself is the last element // of its sibling chain so far. And restore the parent's previous sibling. prev_el_in_tree = opener_el; // Inside child span, prev_el is the first child's Element prev_el = hel->_childElement.get(); if (fr.outer_prev_el) { // Link the parent's previous element (fr.outer_prev_el) to the completed parent container (opener_el). prev_el_in_tree->_prevElement = fr.outer_prev_el; // Link the parent to its previous sibling fr.outer_prev_el->_nextElement = std::move(fr.open->element); // Transfer ownership of the PARENT container prev_el_in_tree = fr.outer_prev_el->_nextElement.get(); // Update to the new last element in the parent's chain } // Walk from the physical node after the (now moved-from) first-child holder DocElements *cur = skip_empty(start->nextel.get(), close); // Continue traversal after the container's closing tag prev_el_in_tree = opener_el; start = fr.close->nextel.get(); // Start after the closing tag end = fr.outer_end; // Restore parent's boundary // The first physical node after opener is now moved-from; advance one more if (cur && !cur->element) cur = skip_empty(cur->nextel.get(), close); continue; // Go to the next element in the parent's scope (sibling of container) } // Link subsequent siblings up to (but not including) the close tag while (cur && cur != close) { // Only link non-terminator, real elements if (cur->element && !cur->terminator) { // double-link: sibling prev cur->element->_prevElement = prev_el; // B. NEW CONTAINER FOUND (Recursive call/Push to stack) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { // move current element into prev_el->_nextElement prev_el->_nextElement = std::move(cur->element); // Push the current scope onto the stack stack.push(Frame{start, close, end, prev_el_in_tree}); // advance prev_el to the element we just attached prev_el = prev_el->_nextElement.get(); } cur = skip_empty(cur->nextel.get(), close); } // Set up the new, deeper scope (child span) prev_el_in_tree = nullptr; start = start->nextel.get(); end = close; // New boundary is the closing tag // dive into the children recursively by setting the current span to (open->next, close) prev_el = nullptr; start = skip_empty(start->nextel.get(), close); end = close; continue; } // No first child; fall through to normal forward progress. continue; // Restart the loop to process the first child } } // Normal forward progress (non-container or no terminator found) // C. SIBLING LINKING (Normal Forward Progress) // Only link elements that still exist in the DocElements node. if (start->element && !start->terminator) { // Chain siblings in the flat list so callers that inspect next links still see sequence if (prev_el) { start->element->_prevElement = prev_el; Element *current_el = start->element.get(); if (prev_el_in_tree) { // Link the previous element's next pointer to the current element, // transferring ownership from the DocElements list to the tree. current_el->_prevElement = prev_el_in_tree; prev_el_in_tree->_nextElement = std::move(start->element); // Update the end of the sibling chain to the newly moved element prev_el_in_tree = prev_el_in_tree->_nextElement.get(); } else { // This is the first element in the current sibling chain. // The element stays in start->element (firstel->element for the root, // or fr.open->nextel->element for children) until a subsequent sibling // is found, or the scope closes (in A). prev_el_in_tree = current_el; } if (start->nextel && start->nextel->element) { start->element->_nextElement = std::move(start->nextel->element); } prev_el = start->element.get(); // Skip over the (now moved-from) nextel holder if we consumed it start = start->nextel ? start->nextel->nextel.get() : start->nextel.get(); } else { // terminator or moved-from holder; just advance // Advance to the next DocElement node start = start->nextel.get(); } } } void libhtmlpp::HtmlString::_buildTree() { DocElements *lastEl = nullptr; Loading Loading @@ -520,7 +537,6 @@ void libhtmlpp::HtmlString::_buildTree() { addelement(&lastEl); lastEl->element=std::make_unique<TextElement>(); std::copy(buf.begin(),buf.end(),std::back_inserter((static_cast<TextElement*>(lastEl->element.get()))->_Text)); } goto BUILDTREE_STARTLOOP; }; Loading @@ -545,7 +561,7 @@ void libhtmlpp::HtmlString::_buildTree() { } } _buildtreenode(firstEl.get(),lastEl,_rootEl); _buildtreenode(firstEl.get(),nullptr,_rootEl); HtmlString test; print(*_rootEl,test,true); Loading