Универсальный парсер

Универсальный парсер 0.7.2

Crow

Пользователь
Это как вариант, защиты рутрекера от всевозможных парсеров-копипастеров.
Можно так:
PHP:
$text = preg_replace('/<span class="post-font-serif1">([^<]*?)<(?=\/)\/span>/', "[font=\"Georgia\"]\\1[/font]", $text);
ну и так далее...
И этот вариант со шрифтом тоже уже не работает.
trey.png
 
Последнее редактирование:

dexer7

Пользователь
Crow,
PHP:
<?php
function parse_release($url,$p_source, $gettorrent)
{
    switch ($p_source){
        case "rutracker": $p_release = parse_rutracker($url, $gettorrent);break;
        case "new-rutor": $p_release = parse_rutor($url, $gettorrent);break;
        case "nnmclub": $p_release = parse_nnm($url, $gettorrent);break;
    }
    return $p_release;
}

function parse_nnm($url, $gettorrent)
{
    global $bb_cfg;
        preg_match("#(.*)library#i", __FILE__, $pocket);

        $cookie_file = $pocket[1]."data".DIRECTORY_SEPARATOR."cookies".DIRECTORY_SEPARATOR."nnm.txt";
        if ( (!file_exists($cookie_file)) || (time() - filemtime($cookie_file) > 86400)) {
            $options_cookie = array(CURLOPT_URL => 'http://nnmclub.to/forum/login.php',
                            CURLOPT_REFERER => $url,
                    CURLOPT_COOKIEFILE => $cookie_file,
                    CURLOPT_COOKIEJAR => $cookie_file,
                    CURLOPT_POSTFIELDS => 'username=' . $bb_cfg['parsmod_nnm_login'] .'&password=' .
        $bb_cfg['parsmod_nnm_password'] .'&autologin=on&redirect=&login=Вход');
            make_cookie($options_cookie);
        }

        $topic = get_url(array(CURLOPT_URL => $url, CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file));
        preg_match("#<td width=\"150\" align=\"left\" valign=\"top\" class=\"row1\"><a name=\"(\d+)\">#i", $topic, $pocket);

        $quote_post_url= "http://nnmclub.to/forum/posting.php?mode=quote&p=".$pocket[1];
        $options_topic = array(CURLOPT_URL => $quote_post_url,    CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file);
        $quoted_post = mb_convert_encoding(get_url($options_topic), "utf-8", "windows-1251");

        preg_match("#tabindex=\"2\" class=\"post\" value=\"Re: (.*?)\" />#si", $quoted_post, $r_title);
        preg_match("#<textarea.*?\";p=\"\d+\"\](.*)\[/quote\]</textarea>#si", $quoted_post, $post);
        $text = preg_replace("#\[hide=(.*?)\](.*?)\[/hide\]#si", "[spoiler=\"\\1\"]\\2[/spoiler]", $post[1]);
        $text = preg_replace("#\[poster=(.*?)\](.*?)\[/poster\]#si", "[img=\\1]\\2[/img]", $text);


    if ($gettorrent === 1) {
        preg_match ("#<a href=\"download.php\?id=(\d+)\" rel=\"nofollow\">#si", $topic, $r_torrent);
        $torrent_url = "http://nnmclub.to/forum/download.php?id=".$r_torrent[1];

        $options_torrent = array(CURLOPT_URL => $torrent_url,
                        CURLOPT_REFERER => "http://nnmclub.to/forum/download.php?id=".$r_torrent[1],
                CURLOPT_COOKIEFILE => $cookie_file,
                CURLOPT_COOKIEJAR => $cookie_file);
        $torrent_hidden = get_torrent($options_torrent);
    }else {
        $torrent_hidden = '';
    }

    //return
    $pars_data = array("title" => $r_title[1], "bbcode" => strip_tags($text), "hidden" => $torrent_hidden);
    return $pars_data;
}

function parse_rutor($url, $gettorrent)
{
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_USERAGENT, 'IE20');
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, '1');
    $copy_page = curl_exec($ch);
    curl_close($ch);
   
    preg_match("#\<table id=\"details\"\>.*?<br />(.*?)\<tr\>\<td class=\"header\"\>#si", $copy_page, $copy_post);
    preg_match ("#<title>new-rutor.org :: (.*?)</title>#si", $copy_page, $r_title);

    $copy_release = $copy_post[1];

    $text = preg_replace("#<a href=\"(.*?)\".*?>(.*?)</a>#si", "[url=\\1]\\2[/url]", $copy_release);
    $text = preg_replace("#<img src=\"(\S*?)\" style=\"float:(.*?);\" />#si", "[img=\\2]\\1[/img]", $text);
    $text = preg_replace("#<img src=\"(\S*?)\" />#si", "[img]\\1[/img]", $text);
    $text = preg_replace("#<hr />#si", "[hr]", $text);

    //hide
    while (preg_match("#<div class=\"hidewrap\">.*?this\)\)\">(.*?)</div>.*?<textarea class=\"hidearea\">(.*?)</textarea></div>#sie", $text, $match))
    {
        $replace = "[spoiler=\"".strip_tags($match[1])."\"]".$match[2]."[/spoiler]";
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, $replace, $text);
    }      
    //u i b center
    while (preg_match("#<(.*?)>(.*?)<\/\\1>#sie", $text, $match))
    {
        switch ($match[1])
        {
          case "center": $replace = "[align=".$match[1]."]".$match[2]."[/align]"; break;
          default: $replace = "[".$match[1]."]".$match[2]."[/".$match[1]."]";break;
        }
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, $replace, $text);
    }
   
    //size
    while (preg_match("#<font size=\"(\d+)\">(.*?)<\/font>#sie", $text, $match))
    {
        $replace = "[size=".(intval($match[1])+10)."]".$match[2]."[/size]";
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, $replace, $text);
    }
    //color font
    while (preg_match("#<(?:span) (?:(?:style=\"((?:[^>]*?)):((?:[^>]*?));\">))((?:(?!<(?:span) ).)*?)</(?:span)>#sie", $text, $match))
    {
        switch ($match[1]){
          case "font-family": $replace = "[font=\"".$match[2]."\"]".$match[3]."[/font]";break;
          case "color":    $replace = "[".$match[1]."=".$match[2]."]".$match[3]."[/".$match[1]."]";break;
        }
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, $replace, $text);
       
    }  

    if ($gettorrent === 1) {
        preg_match ("#<a href=\"(http:\/\/d\.rutor\.org\/download\/\d+)\"#si", $copy_page, $r_torrent);
        $torrent_url = $r_torrent[1];
        $options_torrent = array(CURLOPT_URL => $torrent_url);
        $torrent_hidden = get_torrent($options_torrent);
    }else {
        $torrent_hidden = '';
    }

    $pars_data = array("title" => $r_title[1], "bbcode" => strip_tags($text), "hidden" => $torrent_hidden);
    return $pars_data;

}

function parse_rutracker($url, $gettorrent)
{
    global $bb_cfg;

    $copy_page = mb_convert_encoding(file_get_contents($url), "utf-8", "windows-1251");
    preg_match("#<div class=\"post_body\" id=\"p-\d+\">(.*?)<div class=\"clear\" style=\"height: 8px;\"></div>#si", $copy_page, $copy_post);
    preg_match ("#<title>(.*?)</title>#si", $copy_page, $r_title);
    $copy_release = $copy_post[1];

    $text = preg_replace("#<span class=\"post-br\"><br /></span>#si", "\r\n\r\n", $copy_release);
    $text = str_replace('<span class="post-br"><br></span>',"\r\n\r\n", $text);
    $text = preg_replace("#<span class=\"post-hr\">-</span>#si", "[hr]", $text);

    $text = preg_replace("#<var class=\"postImg\" title=\"(.*?)\">.*?</var>#si", "[img]\\1[/img]", $text);
    $text = preg_replace("#<var class=\"postImg postImgAligned img-(.*?)\" title=\"(.*?)\">.*?</var>#si", "[img=\\1]\\2[/img]", $text);
    $text = preg_replace("#<a href=\"(.*?)\" class=\"(.*?)\">(.*?)</a>#si", "[url=\\1]\\3[/url]", $text);


    while (preg_match("#<(?:span|div) (?:(?:class=\"([^>]*?)\") (?:style=\"(?:[^>]*?): ((?:[^>]*?));\">)|(?:class=\"post-((?:[^>]*?))\">)|(?:style=\"((?:[^>]*?)): ((?:[^>]*?));\">)|(?:(?:style=\"((?:[^>]*?)): ((?:[^>]*?))\") (?:class=\"post-((?:[^>]*?))\">)))((?:(?!<(?:span|div) ).)*?)</(?:span|div)>#sie", $text, $match))
    {
        if (!empty($match[1])) {
            switch ($match[1]){
                case "p-color": $replace = "[color=".$match[2]."]".$match[9]."[/color]";break;
                case "post-align": $replace = "[align=".$match[2]."]".$match[9]."[/align]";break;
            }
        }
        if (!empty($match[3])) $replace = "[".$match[3]."]".$match[9]."[/".$match[3]."]";
        if (!empty($match[4])) {
            switch ($match[4]){
                case "font-family": $replace = "[font=\"".$match[5]."\"]".$match[9]."[/font]";break;
                case "font-size":
                    preg_match("#^(\d+)#si", $match[5], $pocket);
                    $replace = "[size=".$pocket[1]."]".$match[9]."[/size]";
                break;
            }
           
        }
        if (!empty($match[6])) $replace = $match[9];
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, $replace, $text);
            $text = preg_replace('/<span class="post-font-serif1">([^<]*?)<(?=\/)\/span>/', "[font=\"Georgia\"]\\1[/font]", $text);
    }
    while (preg_match("#<div class=\"((?:[^>]*?))\">(?:(?:(?!<div ).)*?)<div class=\"(?:(?:[^>]*?))\">((?:(?!<div ).)*?)</div>(?:(?:(?!<div ).)*?)<div class=\"(?:(?:[^>]*?))\">((?:(?!<div ).)*?)</div>(?:(?:(?!<div ).)*?)</div>#sie", $text, $match))
    {
        switch ($match[1]) {
            case "sp-wrap":$replace="[spoiler=\"".$match[2]."\"]".$match[3]."[/spoiler]";break;
            case "c-wrap": $replace="[code]".$match[3]."[/code]"; break;
            case "q-wrap":
                $tag_q='';
                if (strripos($match[2], "</b> ")) {
                    preg_match("#<b>(.*?)</b>#i",$match[2], $q_author);
                    $tag_q= "=\"".$q_author[1]."\"";
                }
                $replace="[quote".$tag_q."]".$match[3]."[/quote]";
            break;
        }
        $search = "|".preg_quote($match[0])."|si";
        $text = preg_replace($search, strip_tags($replace), $text);
    }
    if ($gettorrent === 1) {
        preg_match ("#t=(\d+)#si", $url, $r_torrent);
        $torrent_url = "http://dl.rutracker.org/forum/dl.php?t=".$r_torrent[1];
        preg_match("#(.*)library.includes#i", __FILE__, $pocket);
        $cookie_file = $pocket[1]."data".DIRECTORY_SEPARATOR."cookies".DIRECTORY_SEPARATOR."rutracker.txt";
        if ( (!file_exists($cookie_file)) || (time() - filemtime($cookie_file) > 86400)) {
            $options_cookie = array(CURLOPT_URL => 'http://login.rutracker.org/forum/login.php',
                    CURLOPT_REFERER => $url,
            CURLOPT_COOKIEFILE => $cookie_file,
            CURLOPT_COOKIEJAR => $cookie_file,
            CURLOPT_POSTFIELDS => 'redirect=index.php&login_username=' . $bb_cfg['parsmod_rutracker_login'] .'&login_password=' .
        $bb_cfg['parsmod_rutracker_password'] . '&ses_short=1&login=Р’С…РѕРґ');
            make_cookie($options_cookie);
        }
   
        $options_torrent = array(CURLOPT_URL => $torrent_url,
                        CURLOPT_REFERER => $url,
                CURLOPT_COOKIEFILE => $cookie_file,
                CURLOPT_COOKIEJAR => $cookie_file,
                CURLOPT_COOKIE => "bb_dl=".$r_torrent[1]);
        $torrent_hidden = get_torrent($options_torrent);
    }else {
        $torrent_hidden = '';
    }

    //return

    $pars_data = array("title" => $r_title[1], "bbcode" => strip_tags($text), "hidden" => $torrent_hidden);
    return $pars_data;
}

function get_torrent($curl_options)
{
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_USERAGENT, "Opera/12.00 (Windows NT 5.1; U; ru) Presto/2.2.0");
        curl_setopt_array($ch, $curl_options);
        $torrent_data = curl_exec ($ch);
        curl_close ($ch);

        $phys_name = "".rand(100000000, 999999999)."";
        $real_name = "[".rand(100000000, 999999999)."].torrent";

        preg_match("#(.*)library.includes#i", __FILE__, $pocket);
        $torrent_place = $pocket[1]."data".DIRECTORY_SEPARATOR."old_files".DIRECTORY_SEPARATOR.$phys_name;
   
        $f = @fopen($torrent_place, 'w');
          if ($f)
             {
                  $bytes = fwrite($f, $torrent_data);
                  fclose($f);
             } else die("fuck");
        $hidden =  '
            <input type="hidden" name="add_attachment_body" value="0" />
            <input type="hidden" name="posted_attachments_body" value="0" />
        <input type="hidden" name="attachment_list[]" value="'.$phys_name.'" />
        <input type="hidden" name="filename_list[]" value="'.$real_name.'" />
        <input type="hidden" name="extension_list[]" value="torrent" />
        <input type="hidden" name="mimetype_list[]" value="application/x-bittorrent" />
        <input type="hidden" name="filesize_list[]" value="'.$bytes.'" />
        <input type="hidden" name="filetime_list[]" value="'.time().'" />
        <input type="hidden" name="attach_id_list[]" value="0" />
        <input type="hidden" name="attach_thumbnail_list[]" value="0" />';

        return $hidden;
}


function make_cookie($curl_options)
{
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_USERAGENT, "Opera/12.00 (Windows NT 5.1; U; ru) Presto/2.2.0");
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_POST, 1);
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt_array($curl, $curl_options);
        $page = curl_exec ($curl);
        curl_close ($curl);

        return true;
}

function delete_old_torrent($tor_id)
{
        DB()->query("DELETE FROM ". BB_ATTACHMENTS_DESC ." WHERE attach_id = $tor_id");
        DB()->query("DELETE FROM ". BB_ATTACHMENTS ." WHERE attach_id = $tor_id");
        DB()->query("DELETE FROM ". BB_BT_TORRENTS ." WHERE attach_id = $tor_id");

        return true;
}

function get_url($curl_options)
{
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_USERAGENT, "Opera/12.00 (Windows NT 5.1; U; ru) Presto/2.2.0");
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_POST, 1);
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt_array($curl, $curl_options);
        $page = curl_exec ($curl);
        curl_close ($curl);

        return $page;
}
Здесь поправлено согласно твоих жалоб.
 

xqsI

Пользователь
Все сделал по инструкции. Ошибок вроде нет, но торрент файл не добавляет. Паршу с rutracker.org и rutor.info все парсится кроме торрента.
1.jpg 1_2.jpg 2.jpg
 
Сверху