Commit 40f1ab72 authored by jan.koester's avatar jan.koester
Browse files

html4 detection is working

parent e85ba366
Loading
Loading
Loading
Loading
+23 −9
Original line number Diff line number Diff line
@@ -1390,8 +1390,23 @@ bool libhtmlpp::HtmlPage::isHtml5(){

void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){
    const char type[] = { '!','D','O','C','T','Y','P','E' };

    int i = 0;

    bool start=false;

    do{
        ++i;
        if(page[i]== '<'){
            if(start==true){
                HTMLException excp;
                excp[HTMLException::Critical] << "Wrong Header arborting";
                throw excp;
            }
            start=true;
        }
    } while ( page[i]== '<' || page[i]== '!' || page[i] == ' ');

    if (page.size() < 8) {
        HTMLException excp;
        excp[HTMLException::Critical] << "No Doctype found arborting";
@@ -1411,7 +1426,7 @@ void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){
        ++i;
    }while (page[i] == ' ');

    const char doctype[] = { 'h','t','m','l' };
    const char doctype[] = { 'H','T','M','L' };
    size_t tpvl = 4;

    const char typevalue4[] = {'P','U','B','L','I','C'};
@@ -1419,32 +1434,31 @@ void libhtmlpp::HtmlPage::_CheckHeader(const HtmlString &page){

    if ((i + tpvl) > page.size()) {
        HTMLException excp;
        excp[HTMLException::Critical] << "Doctype header broken or wrong type";
        excp[HTMLException::Critical] << "Document to short broken !";
        throw excp;
    }

    int ii=0;
    size_t ii=0;

    while ( ii < tpvl) {
        if (tolower(page[i]) !=  tolower(doctype[ii])) {
        if (tolower(page[i++]) !=  tolower(doctype[ii++])) {
            HTMLException excp;
            excp[HTMLException::Critical] << "Doctype header broken or wrong type";
            throw excp;
        }
        ++i;
    }

    ii=0;

    do{
        ++i;
    } while (page[i] == ' ');

    ii=0;

    bool html4=true;

    if(i +tpvl4  <page.size()){
        while(ii < tpvl4){
            if (tolower(page[i]) !=  tolower(typevalue4[ii])) {
            if (tolower(page[i++]) !=  tolower(typevalue4[ii++])) {
                html4=false;
            }
        }
+10 −0
Original line number Diff line number Diff line
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
    <title>My HTML4 Document</title>
</head>
<body>
    <h1>This is the main heading</h1>
    <p>This is a paragraph of text.</p>
</body>
</html>