/* This file is part of the KDE project
 * Copyright (C) 2001, 2002 Rolf Magnus <ramagnus@kde.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation version 2.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 *  $Id$
 */

#include "tdefile_html.h"
#include "tdefile_html.moc"
#include <kgenericfactory.h>
#include <kmimetype.h>
#include <kurl.h>
#include <kprocess.h>
#include <kdebug.h>
#include <tqcstring.h>
#include <tqfile.h>
#include <tqregexp.h>
#include <tqtextcodec.h>

typedef KGenericFactory<KHtmlPlugin> HtmlFactory;

K_EXPORT_COMPONENT_FACTORY( tdefile_html, HtmlFactory( "tdefile_html" ) )

KHtmlPlugin::KHtmlPlugin( TQObject *parent, const char *name,
                          const TQStringList &args )
    : KFilePlugin( parent, name, args )
{
    kdDebug(7034) << "html plugin\n";

    KFileMimeTypeInfo* info = addMimeTypeInfo("text/html");

    KFileMimeTypeInfo::GroupInfo* group;
    KFileMimeTypeInfo::ItemInfo* item;

    group = addGroupInfo(info, "General", i18n("General"));
    addItemInfo(group, "Doctype", i18n("Document Type"), TQVariant::String);
    addItemInfo(group, "Javascript", i18n("JavaScript"), TQVariant::Bool);
    item = addItemInfo(group, "Title", i18n("Title"), TQVariant::String);
    setHint(item, KFileMimeTypeInfo::Name);

    group = addGroupInfo(info, "Metatags", i18n("Meta Tags"));
    addVariableInfo(group, TQVariant::String, 0);
}


bool KHtmlPlugin::readInfo( KFileMetaInfo& info, uint )
{
    if ( info.path().isEmpty() ) // remote file
        return false;

    TQFile f(info.path());
    if (!f.open(IO_ReadOnly))
        return false;

    // we're only interested in the header, so just read until before </head>
    // or until <body> if the author forgot it
    // In this case, it's better to limit the size of the buffer to something
    // sensible. Think a 0-filled 3GB file with an .html extension.
    int maxBufSize = TQMIN(f.size(), 32768);
    TQByteArray data(maxBufSize + 1);
    f.readBlock(data.data(), maxBufSize);
    data[maxBufSize]='\0';

    TQString s(data);

    int start=0, last=0;
    TQRegExp exp;
    exp.setCaseSensitive(false);
    exp.setMinimal(true);

    KFileMetaInfoGroup group = appendGroup(info, "General");

    exp.setPattern("\\s*<\\s*!doctype\\s*([^>]*)\\s*>");
    if (exp.search(s, last) != -1)
    {
        kdDebug(7034) << "DocType: " << TQString(exp.capturedTexts().join("-")) << endl;
        appendItem(group, "Doctype", exp.cap(1));
        last += exp.matchedLength();
    }

    TQString title;
    exp.setPattern("<\\s*title\\s*>\\s*(.*)\\s*<\\s*/\\s*title\\s*>");
    if (exp.search(s, last) != -1)
    {
        title = exp.cap(1);
        last += exp.matchedLength();
    }

    KFileMetaInfoGroup metatags = appendGroup(info, "Metatags");

    TQString meta, name, content;
    exp.setPattern("<\\s*meta\\s*([^>]*)\\s*>");
    TQRegExp rxName("(?:name|http-equiv)\\s*=\\s*\"([^\"]+)\"", false);
    TQRegExp rxContent("content\\s*=\\s*\"([^\"]+)\"", false);
    TQRegExp rxCharset("charset\\s*=\\s*(.*)", false);
    TQTextCodec *codec = 0;

    // find the meta tags
    last = 0;
    while (1)
    {
        if ((start=exp.search(s, last)) == -1)
            break;
        meta = exp.cap(1);
        last = start+exp.matchedLength();

        kdDebug(7034) << "Found Meta: " << meta << endl;

        if (rxName.search(meta) == -1)
            continue;
        name = rxName.cap(1);

        if (rxContent.search(meta) == -1)
            continue;
        content = rxContent.cap(1);

        appendItem(metatags, name, content.left(50));

        // check if it has a charset defined
        if ( rxCharset.search(content) != -1 )
        {
            kdDebug(7034) << "CodecForName : " << rxCharset.cap(1) << endl;
            codec = TQTextCodec::codecForName(rxCharset.cap(1).ascii());
        }
    }

    if ( ! title.isEmpty() )
    {
        if ( codec )
        {
          title = codec->toUnicode(title.ascii());
          kdDebug(7034) << "Codec : " << codec->name() << endl;
        }

        appendItem(group, "Title", title);
    }

    // find out if it contains javascript
    exp.setPattern("<script>");

    appendItem(group, "Javascript", TQVariant( s.find(exp)!=-1));

    return true;
}

