Commit e3da4ef3 authored by jan.koester's avatar jan.koester
Browse files

needs debug new parser

parent 8c2ddf1d
Loading
Loading
Loading
Loading
+112 −163
Original line number Diff line number Diff line
@@ -42,11 +42,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HTMLTAG_TERMINATE '/'
#define HTMLTAG_CLOSE '>'

#define HTMLELEMENT 0
#define HTMLTERMELEMENT 1
#define HTMLCOMMENT 2
#define HTMLHEADER 3

namespace libhtmlpp {

    class DocElements {
@@ -55,16 +50,12 @@ namespace libhtmlpp {
        bool                    terminator;
        class DocElements*      nextel;
        class DocElements*      prevel;
        long                    spos;
        long                    epos;

        DocElements() {
            nextel = nullptr;
            prevel = nullptr;
            element = nullptr;
            terminator = false;
            spos = 0;
            epos = 0;
        }

        virtual ~DocElements() {
@@ -80,8 +71,6 @@ namespace libhtmlpp {
};

libhtmlpp::HtmlString::HtmlString(){
    _HTable=nullptr;
    _HTableSize=0;
}

libhtmlpp::HtmlString::HtmlString(const char* str) : HtmlString(){
@@ -93,13 +82,6 @@ libhtmlpp::HtmlString::HtmlString(std::string& str) : HtmlString(){
}

libhtmlpp::HtmlString::~HtmlString(){
    if(_HTable){
        for(size_t i=0; i<_HTableSize; ++i){
            delete[] _HTable[i];
        }
    }
    delete[]   _HTable;

    while(!_Childs.empty()){
        delete _Childs.top();
        _Childs.pop();
@@ -137,15 +119,6 @@ void libhtmlpp::HtmlString::insert(size_t pos, char src){
}

void libhtmlpp::HtmlString::clear(){
    if(_HTable){
        for(size_t i=0; i<_HTableSize; ++i){
            delete[] _HTable[i];
        }
    }
    delete[]   _HTable;
    _HTable=nullptr;
    _HTableSize=0;

    while(!_Childs.empty()){
        delete _Childs.top();
        _Childs.pop();
@@ -250,9 +223,7 @@ const char * libhtmlpp::HtmlString::c_str(){

libhtmlpp::Element *libhtmlpp::HtmlString::parse() {
    HTMLException excp;
    _parseTree();
    long pos = 0;
    Element *el=_buildTree(pos);
    Element *el=_buildTree();
    _Childs.push(el);
    return el;
}
@@ -260,9 +231,7 @@ libhtmlpp::Element *libhtmlpp::HtmlString::parse() {
bool libhtmlpp::HtmlString::validate(std::string *err){
    bool erg=false;
    try{
        _parseTree();
        long pos = 0;
        Element *buf=_buildTree(pos);
        Element *buf=_buildTree();
        if(buf){
            erg=true;
        }
@@ -299,7 +268,7 @@ void libhtmlpp::HtmlString::_buildtreenode(libhtmlpp::DocElements *firstel,libht
    auto checkterminator = [](DocElements *termel, DocElements *end){
        int i=0;

        if(((Element*)(termel->element))->getType()!=HtmlEl)
        if(termel->element->getType()!=HtmlEl)
            return (DocElements*) nullptr;

        for (DocElements* curcel=termel->nextel; curcel; curcel=curcel->nextel) {
@@ -370,7 +339,7 @@ NEXTDOCEL:
    }
}

libhtmlpp::Element *libhtmlpp::HtmlString::_buildTree(long& pos) {
libhtmlpp::Element *libhtmlpp::HtmlString::_buildTree() {
    DocElements *firstEl = nullptr, *lastEl = nullptr;

    auto addelement = [](DocElements **first,DocElements **last){
@@ -384,54 +353,92 @@ libhtmlpp::Element *libhtmlpp::HtmlString::_buildTree(long& pos) {
        }
    };

    for(size_t i = 0; i < _HTableSize; ++i) {
        if(_HTable[i][0] == -1){
            continue;
    HTMLException excp;
    size_t open=std::string::npos;
    size_t terminate=std::string::npos;
    size_t close=std::string::npos;
    size_t starttag=std::string::npos;
    size_t comment=std::string::npos;
    size_t prevclose=std::string::npos;

    for(size_t ii=0; ii<_Data.size(); ++ii){
        switch(_Data[ii]){
            case HTMLTAG_OPEN:
                if(open==std::string::npos){
                    open = ii;
                }
                break;
            case HTMLTAG_TERMINATE:
                if(open!=std::string::npos && close ==std::string::npos){
                    terminate=ii;
                }
                break;
            case HTMLTAG_CLOSE:
                if(open!=std::string::npos){
                    close = ii;
                }
                break;
            case '!':
                if(open!=std::string::npos && close ==std::string::npos)
                    if( ii+2 < _Data.size() && _Data[ii+1]=='-' && _Data[ii+2]=='-'){
                        comment=(ii+=2);
                    }
                break;
            case ' ':
                break;
            default:
                if(open!=std::string::npos && close ==std::string::npos && starttag==std::string::npos)
                    starttag= ii;
            break;
        }

        if(open!=std::string::npos &&  close !=std::string::npos){
            addelement(&firstEl,&lastEl);

        lastEl->spos = _HTable[i][0];
        lastEl->epos = _HTable[i][2];

        if (_HTable[i][1] != -1){
            if(terminate!=std::string::npos)
                lastEl->terminator=true;
        }


        bool comment = false;

        if((size_t)lastEl->spos+3<_Data.size()){
            if(_Data.at(lastEl->spos)=='<' && _Data.at(lastEl->spos+1)=='!' &&
                _Data.at(lastEl->spos+2)=='-' && _Data.at(lastEl->spos+3)=='-')
                comment=true;
        }

        if(!comment){
            if(comment==std::string::npos){
                lastEl->element=new HtmlElement();
                lastEl->element->_firstElement=firstEl->element;
                std::vector<char> el;
            std::copy(_Data.begin()+lastEl->spos,_Data.begin()+(lastEl->epos+1),std::inserter<std::vector<char>>(el,el.begin()));
                std::copy(_Data.begin()+open,_Data.begin()+(close+1),std::inserter<std::vector<char>>(el,el.begin()));
                _serialelize(el,(HtmlElement*)lastEl->element);
            }else{
            lastEl->element=new CommentElement();
            std::copy(_Data.begin()+lastEl->spos+4,_Data.begin()+lastEl->epos-3,
                        std::inserter<std::vector<char>>(((CommentElement*) lastEl->element)->_Comment,((CommentElement*) lastEl->element)->_Comment.begin()));
                int ctlvl=0;
                size_t endcomment=std::string::npos;
                while(ii < _Data.size()){
                    switch(_Data[ii]){
                        case '-':
                            ++ctlvl;
                            endcomment=ii;
                            break;
                        case '>':
                            if(endcomment!=std::string::npos && ctlvl==2)
                                goto ENDCOMMANDTAGDOUND;
                        break;
                        default:
                            ctlvl=0;
                            endcomment=std::string::npos;
                            break;
                    }

        if(!lastEl->terminator){
                    ++ii;
                }
                ENDCOMMANDTAGDOUND:
                if(endcomment!=std::string::npos){
                    lastEl->element=new CommentElement();
                    lastEl->element->_firstElement=firstEl->element;
                    std::vector<char> el;
                    std::copy(_Data.begin()+comment,_Data.begin()+endcomment,std::inserter<std::vector<char>>(el,el.begin()));
                    ((CommentElement*)lastEl->element)->_Comment=el;
                }
            }

        size_t epos = i+1 < _HTableSize ? _HTable[i+1][0] :  _Data.size();

        size_t spos = _HTable[i][2]+1;

        if(int(epos - spos) > 0){
            if(prevclose!=std::string::npos && int(open-prevclose) > 0){
                std::vector<char> buf;

                bool start=false;
            for(size_t i = spos; i<epos; ++i){
                switch(_Data[i]){
                for(size_t it = prevclose; it<open; ++it){
                    switch(_Data[it]){
                        case '\r':
                            continue;
                        case '\n':
@@ -441,27 +448,31 @@ libhtmlpp::Element *libhtmlpp::HtmlString::_buildTree(long& pos) {
                                continue;
                        default:
                            start=true;
                        buf.push_back(_Data[i]);
                            buf.push_back(_Data[it]);
                            continue;
                    }
                }
                if(!buf.empty()){
                    addelement(&firstEl,&lastEl);

                    lastEl->element=new TextElement();
                    lastEl->element->_firstElement=firstEl->element;
                lastEl->spos = spos;
                lastEl->epos = epos;

                    ((TextElement*)lastEl->element)->_Text=buf;
            }
                    lastEl->terminator=false;
                }
            }

    _buildtreenode(firstEl,lastEl);
            open=std::string::npos;
            prevclose=close;
            close=std::string::npos;
            terminate=std::string::npos;
            starttag=std::string::npos;
        }
    }

    Element *first = firstEl->element;

    _buildtreenode(firstEl,lastEl);

    while(firstEl){
        DocElements *next=firstEl->nextel;
        if(firstEl->terminator)
@@ -546,68 +557,6 @@ void libhtmlpp::HtmlString::_serialelize(std::vector<char> in, libhtmlpp::HtmlEl
    }
}

void libhtmlpp::HtmlString::_parseTree(){
 HTMLException excp;
    if(_HTable){
        for(size_t i=0; i< _HTableSize; ++i){
            delete[] _HTable[i];
        }
        delete[] _HTable;
    }

    size_t closetag = 0;
    for (size_t i = 0; i < _Data.size(); ++i) {
        switch (_Data[i]) {
            case HTMLTAG_CLOSE:
                ++closetag;
            break;
        default:
            break;
        }
    }

    _HTableSize = closetag;

    _HTable = new long*[_HTableSize];
    for (size_t is = 0; is < _HTableSize; is++) {
        _HTable[is] = new long[3];
        _HTable[is][0] = -1;
        _HTable[is][1] = -1;
        _HTable[is][2] = -1;
    }

    bool open=false;
    bool pterm=false;
    size_t ip=0;
    for(size_t ii=0; ii<_Data.size(); ++ii){
        switch(_Data[ii]){
            case HTMLTAG_OPEN:
                if(!open){
                    open = true;
                    pterm = true;
                    _HTable[ip][0] = ii;
                }
                break;
            case HTMLTAG_TERMINATE:
                if(pterm==true)
                    _HTable[ip][1]=ii;
                break;
            case HTMLTAG_CLOSE:
                if(open){
                    _HTable[ip][2] = ii;
                    ++ip;
                    open = false;
                }
                break;
            case ' ':
                break;
            default:
                pterm=false;
                break;
        }
    }
}

void libhtmlpp::HtmlEncode(const char* input, std::string &output){
    HtmlString tmp(output);
    HtmlEncode(input,&tmp);
@@ -819,7 +768,7 @@ namespace libhtmlpp {
        Element *firstdest=dest;

NEWEL:
        if(src->getType()==libhtmlpp::HtmlEl && dest->getType()==libhtmlpp::HtmlEl){
        if(src->getType()==HtmlEl && dest->getType()==HtmlEl){
            ((libhtmlpp::HtmlElement*)dest)->_TagName=(((libhtmlpp::HtmlElement*)src)->_TagName);
            for(libhtmlpp::HtmlElement::Attributes *cattr=((libhtmlpp::HtmlElement*)src)->_firstAttr; cattr; cattr=cattr->_nextAttr){
                if(!cattr->_Value.empty())
@@ -828,7 +777,7 @@ NEWEL:
                    ((libhtmlpp::HtmlElement*)dest)->setAttribute(cattr->_Key.data(),cattr->_Key.size(),nullptr,0);
            }

            if(dest->getType()==HtmlEl && ((libhtmlpp::HtmlElement*)src)->_childElement){
            if(((libhtmlpp::HtmlElement*)src)->_childElement){
                switch(src->getType()){
                    case HtmlEl:
                        ((libhtmlpp::HtmlElement*)dest)->_childElement= new HtmlElement;
@@ -873,7 +822,7 @@ NEWEL:
                    break;
                default:
                    HTMLException ex;
                    ex[HTMLException::Critical] << "Unknown html element found !";
                    ex[HTMLException::Critical] << "_copy: Unknown next html element found !";
                    throw ex;
            }
            prev=dest;
+1 −4
Original line number Diff line number Diff line
@@ -221,15 +221,12 @@ namespace libhtmlpp {
        libhtmlpp::Element *parse();
        bool                validate(std::string *err);
    private:
        void                 _parseTree();
        void                 _serialelize(std::vector<char> in, HtmlElement* out);
        Element             *_buildTree(long& pos);
        Element             *_buildTree();
        void                 _buildtreenode(DocElements *firstel,DocElements *lastel);
        std::vector<char>    _Data;
        std::vector<char>    _CStr;
        std::stack<Element*> _Childs;
        long**               _HTable;
        size_t               _HTableSize;
        friend void HtmlEncode(const char *input,HtmlString *output);
    };