<?php

/**
* b3 (blogter blog backup) - Simon Benjamin - http://benjamin.hu/ - 2007.04.20.
*
* Leiras
* ------
* A http://www.blogter.hu/ blogrol export / backup keszites.
*
* Informaciok
* -----------
* Elozmenyek: http://benjamin.hu/2007/03/08/februari-hianyzas-es-valtas/
* A script lement minden (szovegesen elerheto) adatot a blogodrol:
*        bejegyzesek: cim, datum, hozzaszolasi lehetoseg, cimkek, bejegyzes
*        hozzaszolasok: nev, email, weboldal, ip, datum
*
* Hasznalat
* ---------
* Allitsd be a blogter adataidat (felhaszanlonev, jelszo, blognev) add meg, hogy milyen formatumban
* (szerializalt php tomb vagy XML) es modon (write vagy download - ha nem tudsz irhato konyvtarat
* megadni) szeretned lementeni a blog adatokat.
*
* Wordpress importalas eseten a ser.txt (serializalt.php) formatumot hasznald.
*
* Licensz
* -------
* GPL, a kod szabadon terjesztheto es modosithato a fejlesztesekrol visszajelzest kernek
*
* Kapcsolat
* ---------
* Javaslatokat, eszreveteleket, hibakat, kerdeseket a blogter2wp [kukac] benjamin [pont] hu cimre
* lehet kuldeni.
*
* Valtozasok
* ----------
* 0.2 - 2007.04.20.
*         Megjegyzes, $exportmethod bevezetese, bejegyzesek mentese kozben flush() hogy ne legyen
*         timeout ha bongeszobol hivjak meg (csak akkor ha $exportmethod == 'write'!).
*
* 0.1 - 2007.04.19.
*         Elso publikus megjelenes
**/

define('TAB',                "\t");        // tab char
define('NL',                "\r\n");    // newline char
define('CURL_TIMEOUT',        30);        // ennyit var max az URL letoltesere
define('CURL_USER_AGENT',    'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.8.0.8) Gecko/20061025 Firefox/1.5.0.8');

set_time_limit(0);
ini_set('memory_limit', '32M');

// init
$user = 'LOGIN';            // felhasznalonev
$pass = 'PASSWORD';            // jelszo
$blog = 'BLOG.blogter.hu';    // blog url
$host = 'http://www.blogter.hu';
$export = '';
$exporttype = 'ser.txt';    // ser.txt vagy xml (ser.txt: szerializalt php tomb)
$exportmethod = 'write';    // write - fileba ment, download - letoltesre felajanlja
$getcomments = TRUE;
$entities = array_flip(get_html_translation_table(HTML_ENTITIES));
// windows \r -t uresre
$entities["\r"] = '';

$months = array (
    
'január' => 1,
    
'február' => 2,
    
'március' => 3,
    
'április' => 4,
    
'május' => 5,
    
'június' => 6,
    
'július' => 7,
    
'augusztus' => 8,
    
'szeptember' => 9,
    
'október' => 10,
    
'november' => 11,
    
'december' => 12,
);
$postdata = array();

// "beleptetes"
$COOKIE = getCookie($user, $pass);

// blog bejegyzes linkek osszeszedese
$content = fetchURL('http://'.$blog.'/?archive', array(), $COOKIE);
// href="index.php?action=edit_post&amp;blog_id=6158&amp;post_id=73327">[szerkesztés]</a>
preg_match_all('!href=".*blog_id=([0-9]+)&amp;post_id=([0-9]+)">\[szerkeszt.s\]</a>!isU', $content, $matches);
$BLOGID = $matches[1][0];
$posts = $matches[2];

if (
checkArray($posts)) {
    
// bejegyzesek
    
foreach ($posts as $postid) {
        
$url = $host.'/index.php?action=edit_post&blog_id='.$BLOGID.'&post_id='.$postid;
        
$content = fetchURL($url, array(), $COOKIE);
        
$arr = array (
            
'title' =>        '~name="post\[title\]"\s*value="([^"]+)"~i',
            
'date' =>        '~name="post\[date\]"\s*value="([^"]+)"~i',
            
'comment' =>    '~id="post\[comments_allowed\]"\s*name="post\[comments_allowed\]"\s*(checked)?~i',
            
'tags' =>        '~id="post\[main_tags\]"\s*name="post\[main_tags\]"\s*value="([^"]+)"~i',
            
'post' =>        '~textarea\s*id="post\[postEditor\]"\s*name="post\[postEditor\]"[^>]+>(.*?)</textarea>~isU',
        );
        foreach (
$arr as $var => $match) {
            
preg_match($match, $content, $matches);
            ${
$var} = iso8852(strtr($matches[1], $entities));
        }
        if (
$exportmethod == 'write') { p($url); flush(); }

        
// rendezeshez es importhoz nem art
        // 2007. január 24. 02:32 -> 1169602320
        
$d = preg_split('/[\s:-]/', $date, -1, PREG_SPLIT_NO_EMPTY);
        
$stamp = mktime($d[3], $d[4], $d[5], $d[1], $d[2], $d[0]);

//        $postarr = explode('<!--more-->', $post);
        
$arr = array (
            
'postid' =>                $postid,
            
'title' =>                $title,
            
'last_save_date' =>        $date,
            
'stamp' =>                $stamp,
            
'comments_allowed' =>    (($comment == 'checked') ? 1 : 0),
            
'tags' =>                $tags,
//            'lead' =>                iso8852(strtr(trim($postarr[0]), $entities)),
            
'html' =>                iso8852(strtr(trim($post), $entities)),
        );
        
$postdata[] = $arr;
    }

    
// postok idorendi sorrendezese
    
usort($postdata, 'mysort');

    
$commentdata = array();
    if (
$getcomments) {
        
// kommentek beolvasasa (majd a postokhoz csatoljuk)
        
$page = 1;

        
// kommentar oldalak "vegignezese"
        
do {
            
// komment oldalbol a kovetkezo oldal linkje (illetve oldalszam kiszedese)
            
$url = $host.'/index.php?action=comments&blog_id='.$BLOGID.'&page='.$page;
            
$content = fetchURL($url, array(), $COOKIE);
            
preg_match('~href="[^&]*&blog_id='.$BLOGID.'&page=([0-9]+)"[^>]*>k.vetkez.</a>~i', $content, $matches);
            
$page = iso8852(strtr($matches[1], $entities));
            if (
$exportmethod == 'write') { p($url); flush(); }

            
// hozzaszolas adatok osszegyujtese
            // mi kell: postid, comment_id, nev, email, weboldal, ip, bejegyzes_id, datum
            
$matchstr = '~<li class="comment[^>]+>.*<a href="[^;]+;blog_id='.$BLOGID.'&amp;post_id=([0-9]+)">.*<a href="[^;]+;comment_id=([0-9]+)&[^>]+>.*<span class="first">Név:(.*)</span><span> E-mail:(.*)</span><span> weboldal: (.*)</span><span> IP-cím:(.*)</span>.*<span>Mikor:(.*)</span>~isU';

            
preg_match_all($matchstr, $content, $matches);
            unset(
$matches[0]);
            
$sum = count($matches[1]);
            for (
$i = 0; $i < $sum; $i++) {
                
$postid = $matches[1][$i];
                
$email = $matches[4][$i];
                
$commentid = $matches[2][$i];

                
// nincs email
                
if ($email == stripHTML($email)) $email = '';
                
// van email
                
else {
                    
preg_match('~mailto:([^"]+)"~i', $email, $emailmatch);
                    
$email = $emailmatch[1];
                }
                
$arr = array (
                    
'name' =>    stripHTML($matches[3][$i]),
                    
'email' =>    $email,
                    
'www' =>    stripHTML($matches[5][$i]),
                    
'ip' =>        stripHTML($matches[6][$i]),
                    
'date' =>    $matches[7][$i],
                );

                
// rendezeshez es importhoz nem art
                // 2007. január 24. 02:32 -> 1169602320
                
$d = preg_split('/[.\s:]/', $arr['date'], -1, PREG_SPLIT_NO_EMPTY);
                
$arr['stamp'] = mktime($d[3], $d[4], 0, $months[$d[1]], $d[2], $d[0]);

                
// hozzaszolas szovege kulon oldalrol (mert a listaban nincs semilyen HTML formazas)
                
$content = fetchURL($host.'/index.php?action=edit_comment&comment_id='.$commentid.'&blog_id='.$BLOGID, array(), $COOKIE);
                
preg_match('~<textarea.*name="comment\[comment\]"[^>]*>(.*)</textarea>~isU', $content, $commentmatch);
                
$arr['comment'] = iso8852(strtr(trim($commentmatch[1]), $entities));

                
// kommentek gyujtese (memoria?)
                
$commentdata[$postid][] = $arr;
            }

        
// amig van "oldalszam" (1000-re maximalizalva - annyi azert csak nincs senkinek :)
        
} while ($page && $page <= 1000);

        foreach (
$commentdata as $postid => $tmparr) {
            
// commentek idorendi sorrendezese
            
usort($tmparr, 'mysort');
            
$commentdata[$postid] = $tmparr;
        }
    }

    
// export osszeallitsa
    
if (checkArray($postdata)) {
        switch (
$exporttype) {
            case
'xml':
                
$export .= '<blog url="'.$blog.'">'.NL;
                foreach (
$postdata as $post) {
                    
$export .= TAB.'<post>'.NL;
                    foreach (
$post as $k => $v) {
                        if (
in_array($k, array('html'))) $v = '<![CDATA[ '.$v.' ]]>';
                        
$export .= TAB.TAB.'<'.$k.'>'.trim($v).'</'.$k.'>'.NL;
                    }

                    if (
checkArray($commentdata) && checkArray($commentdata[$post['postid']])) {
                        
$export .= TAB.TAB.TAB.'<comments>'.NL;
                        foreach (
$commentdata[$post['postid']] as $comment) {
                            
$export .= TAB.TAB.TAB.TAB.'<comment>'.NL;
                            foreach (
$comment as $k => $v) {
                                if (
in_array($k, array('comment'))) $v = '<![CDATA[ '.$v.' ]]>';
                                
$export .= TAB.TAB.TAB.TAB.TAB.'<'.$k.'>'.trim($v).'</'.$k.'>'.NL;
                            }
                            
$export .= TAB.TAB.TAB.TAB.'</comment>'.NL;
                        }
                        
$export .= TAB.TAB.TAB.'</comments>'.NL;
                    }
                    
$export .= TAB.'</post>'.NL;
                }
                
$export .= '</blog>'.NL;
                break;

            case
'ser.txt':
                foreach (
$postdata as $k => $post) {
                    if (
checkArray($commentdata) && checkArray($commentdata[$post['postid']])) {
                        
$postdata[$k]['comments'] = $commentdata[$post['postid']];
                    }
                }
                if (
$exportmethod == 'write') p($postdata);
                
$export .= serialize(array('posts' => $postdata, 'blog' => $blog));
                break;
        }
    }
}

if (
$exportmethod == 'write') {
    
$savename = 'blogter.export.'.$BLOGID.'.'.$exporttype;
    
$fp = fopen('./'.$savename, 'w');
    if (
$fp) {
        
fwrite($fp, $export);
        
fclose($fp);
    }
} elseif (
$exportmethod == 'download') {
    
// felajanlja lementesre
    
Header('Content-type: application/blogterexport'.NL);
    
Header('Content-disposition: inline; filename="'.$savename.'"'.NL);
    echo
$export;
}

function
p($v) { echo '<pre>'; print_r($v); echo '</pre>'; }
function
checkArray(&$array) { return (is_array($array) && count($array)) ? TRUE : FALSE; }

function
mysort($a, $b) {
    if (
$a['stamp'] == $b['stamp']) return 0;
    return (
$a['stamp'] > $b['stamp']) ? 1 : -1;
}

function
getCookie($user, $pass)
{
    global
$host;

    
$arr = array ('login[username]' => $user, 'login[password]' => $pass);
    
$content = fetchURL($host.'/index.php?action=mainpage', $arr);
    
preg_match_all("!Set-Cookie:([^;]+)!i", $content, $matches);
    return
trim($matches[1][0]).'; '.trim($matches[1][1]);
}

function
stripHTML($str)
{
    return
preg_replace('~<[^>]+>~i', '', $str);
}

function
fetchURL($url, $post_data = array(), $cookie = NULL, $nobody = 0) {
    global
$curlinfo;

    
$ch = curl_init($url);
    
curl_setopt($ch, CURLOPT_VERBOSE, TRUE);
    
curl_setopt($ch, CURLOPT_HEADER, TRUE);
    
curl_setopt($ch, CURLOPT_NOBODY, $nobody);
    
// X masodpercen belul kell valaszt kapni
    
curl_setopt($ch, CURLOPT_TIMEOUT, CURL_TIMEOUT);
    
// kuldunk vmi infot is magunkrol
    
curl_setopt($ch, CURLOPT_USERAGENT, CURL_USER_AGENT);
    
curl_setopt($ch, CURLOPT_REFERER, $url);
    
// kerjuk vissza az outputot
    
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    
// location: -t kovesse
    
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);

    if (
checkArray($post_data)) {
        
$tmp = array();
        foreach (
$post_data as $k => $v) $tmp[] = $k.'='.rawurlencode($v);
        
$post_str = implode('&', $tmp);
        
curl_setopt($ch, CURLOPT_POST, TRUE);
        
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_str);
    }

    
// Set-Cookie: adatok kuldese
    
if (!empty($cookie)) curl_setopt($ch, CURLOPT_COOKIE, $cookie);

    
// SSL ellenorzes kikapcsolasa
    
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
    
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    
$result = curl_exec($ch);
    
$curlinfo = curl_getinfo($ch);

    
$errno = curl_errno($ch);
    
$error = curl_error($ch);
    if (
$errno) { p($errno); p($error); }

    
curl_close($ch);

    return
$result;
}

function
iso8852 ($text)
{
    
$chg = array(
        
'&agrave;' => 'á','aacute' => 'á', '&Agrave;' => 'Á', 'Aacute' => 'Á', 'eacute' => 'é', 'Eacute' => 'É', 'iacute' => 'í', 'Iacute' => 'Í',
        
'uacute' => 'ú', 'Uacute' => 'Ú', 'oacute' => 'ó', 'Oacute' => 'Ó', 'ouml' => 'ö', 'Ouml' => 'Ö',
        
'uuml' => 'ü', 'Uuml' => 'Ü', 'otilde' => 'ő', 'Otilde' => 'Ő', 'ocirc' => 'ő', 'Ocirc' => 'Ő',
        
'utilde' => 'ű', 'Utilde' => 'Ű', 'ucirc' => 'ű', 'Ucirc' => 'Ű',
        
'#132' => '"', '#148' => '"', '#232' => 'é',
        
'#189' => '1/2', '#190' => '3/4', '#200' => 'É', '#204' => 'Í', '#224' => 'á', '#236' => 'á', '#305' => 'i',
        
'#213' => 'Ő', '#219' => 'Ű', '#245' => 'ő', '#251' => 'ű', '#337' => 'ő', '#336' => 'Ő', '#369' => 'ű',
        
'#8211' => '-', '#8212' => '-', '#8216' => "'", '#8217' => "'", '#8220' => '"', '#8221' => '"', '#8222' => '"', '#8226' => '-', '#039' => "'",
        
'#8230' => '...', 'nbsp' => ' ',
        
'hellip' => '...',
        
'frac12' => '1/2', 'frac14' => '1/4', 'frac34' => '3/4',
        
'#174' => '&reg;', '#8482' => '&trade;',
        
'#8364' => '&euro;', '#9830' => '-', '#9679' => '-', '#9472' => '-', '#1048698' => '-', '#8209' => '-', '#187' => '-', '#183' => '-',
        
'#61623' => '-', '#61618' => '-', '#61607' => '-', '#61558' => '-',
        
'#65306' => ':',
        
'bdquo' => '"', 'rdquo' => '"', 'rsquo' => '"',
        
'ndash' => '-', 'mdash' => '-',
        
'middot' => '-'
    
);
    foreach (
$chg as $key => $value) $text = str_replace('&'.$key.';', $value, $text);

    
$chg = array (
        
// word 3db . (pont)
        
'…' => '...',
        
'”' => '"', '„' => '"',
        
// ez nem sima kotojel!!!
        
'–' => '-'
    
);
    foreach (
$chg as $k => $v) $text = str_replace($k, $v, $text);

    return
$text;
}

?>