302 lines
7.3 KiB
PHP
302 lines
7.3 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* MultiCurl class provides a convenient way to execute parallel HTTP(S)
|
||
|
* requests via PHP MULTI CURL extension with additional restrictions.
|
||
|
* For example: start 100 downloads with 2 parallel sessions, and get only
|
||
|
* first 100 Kb per session.
|
||
|
*
|
||
|
* This library is free software; you can redistribute it and/or
|
||
|
* modify it under the terms of the GNU Lesser General Public
|
||
|
* License as published by the Free Software Foundation; either
|
||
|
* version 3.0 of the License, or (at your option) any later version.
|
||
|
*
|
||
|
* This library is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||
|
* See the GNU Lesser General Public License for more details.
|
||
|
*
|
||
|
* @author Vadym Timofeyev <tvad@mail333.com> http://weblancer.net/users/tvv/
|
||
|
* @copyright 2007-2010 Vadym Timofeyev
|
||
|
* @license http://www.gnu.org/licenses/lgpl-3.0.txt
|
||
|
* @version 1.07
|
||
|
* @since PHP 5.0
|
||
|
* @example examples/example.php How to use MultiCurl class library.
|
||
|
*/
|
||
|
abstract class MultiCurl {
|
||
|
/**
|
||
|
* Maximal number of CURL multi sessions. Default: 10 sessions.
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $maxSessions = 10;
|
||
|
|
||
|
/**
|
||
|
* Maximal size of downloaded content. Default: 10 Mb (10 * 1024 * 1024).
|
||
|
*
|
||
|
* @var integer
|
||
|
*/
|
||
|
private $maxSize = 10485760;
|
||
|
|
||
|
/**
|
||
|
* Common CURL options (used for all requests).
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
private $curlOptions;
|
||
|
|
||
|
/**
|
||
|
* Current CURL multi sessions.
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
private $sessions = array();
|
||
|
|
||
|
/**
|
||
|
* Class constructor. Setup primary parameters.
|
||
|
*
|
||
|
* @param array $curlOptions Common CURL options.
|
||
|
*/
|
||
|
public function __construct($curlOptions = array()) {
|
||
|
if(empty($curlOptions))
|
||
|
{
|
||
|
$header[] = "Accept: */*";
|
||
|
$header[] = "Cache-Control: max-age=0";
|
||
|
$header[] = "Accept-Charset: utf-8;q=0.7,*;q=0.7";
|
||
|
$header[] = "Accept-Language: en-us,en;q=0.5";
|
||
|
$header[] = "Pragma: ";
|
||
|
|
||
|
$curlOptions=array(
|
||
|
CURLOPT_HEADER => true,
|
||
|
CURLOPT_HTTPHEADER => $header,
|
||
|
CURLOPT_USERAGENT => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
|
||
|
CURLOPT_CONNECTTIMEOUT => 20,
|
||
|
CURLOPT_TIMEOUT => 10
|
||
|
);
|
||
|
}
|
||
|
$this->setCurlOptions($curlOptions);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Class destructor. Close opened sessions.
|
||
|
*/
|
||
|
public function __destruct() {
|
||
|
foreach ($this->sessions as $i => $sess) {
|
||
|
$this->destroySession($i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Adds new URL to query.
|
||
|
*
|
||
|
* @param mixed $url URL for downloading.
|
||
|
* @param array $curlOptions CURL options for current request.
|
||
|
*/
|
||
|
public function addUrl($url, $extra_data = NULL, $curlOptions = array()) {
|
||
|
// Check URL
|
||
|
if (!$url) {
|
||
|
throw new Exception('URL is empty!');
|
||
|
}
|
||
|
|
||
|
// Check array of URLs
|
||
|
if (is_array($url)) {
|
||
|
foreach ($url as $s) {
|
||
|
$this->addUrl($s, $curlOptions);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Check query
|
||
|
while (count($this->sessions) == $this->maxSessions) {
|
||
|
$this->checkSessions();
|
||
|
}
|
||
|
|
||
|
// Init new CURL session
|
||
|
$ch = curl_init($url);
|
||
|
foreach ($this->curlOptions as $option => $value) {
|
||
|
curl_setopt($ch, $option, $value);
|
||
|
}
|
||
|
foreach ($curlOptions as $option => $value) {
|
||
|
curl_setopt($ch, $option, $value);
|
||
|
}
|
||
|
|
||
|
// Init new CURL multi session
|
||
|
$mh = curl_multi_init();
|
||
|
curl_multi_add_handle($mh, $ch);
|
||
|
$this->sessions[] = array($mh, $ch, $url, $extra_data);
|
||
|
$sessions_key = array_keys($this->sessions);
|
||
|
$this->execSession(array_pop($sessions_key));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Waits CURL milti sessions.
|
||
|
*/
|
||
|
public function wait() {
|
||
|
while (count($this->sessions)) {
|
||
|
$this->checkSessions();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Executes all active CURL multi sessions.
|
||
|
*/
|
||
|
protected function checkSessions() {
|
||
|
foreach ($this->sessions as $i => $sess) {
|
||
|
if ($this->multiSelect($sess[0]) != -1) {
|
||
|
$this->execSession($i);
|
||
|
}
|
||
|
else {
|
||
|
throw new Exception('Multicurl loop detected!');
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Executes CURL multi session, check session status and downloaded size.
|
||
|
*
|
||
|
* @param integer $i A session id.
|
||
|
*/
|
||
|
protected function execSession($i) {
|
||
|
list($mh, $ch) = $this->sessions[$i];
|
||
|
if ($mh) {
|
||
|
do {
|
||
|
$mrc = curl_multi_exec($mh, $act);
|
||
|
} while ($act > 0);
|
||
|
if (!$act || $mrc !== CURLM_OK || curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD) >= $this->maxSize) {
|
||
|
$this->closeSession($i);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replace curl_multi_select.
|
||
|
*
|
||
|
* @see http://php.net/manual/en/function.curl-multi-select.php#110869
|
||
|
* @param resource $mh A cURL multi handle returned by curl_multi_init().
|
||
|
* @param float $timeout Time, in seconds, to wait for a response.
|
||
|
*/
|
||
|
protected function multiSelect($mh, $timeout = 1.0) {
|
||
|
$ts = microtime(true);
|
||
|
|
||
|
do {
|
||
|
$mrc = curl_multi_exec($mh, $act);
|
||
|
$ct = microtime(true);
|
||
|
$t = $ct - $ts;
|
||
|
if ($t >= $timeout) {
|
||
|
return CURLM_CALL_MULTI_PERFORM;
|
||
|
}
|
||
|
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Closes session.
|
||
|
*
|
||
|
* @param integer $i A session id.
|
||
|
*/
|
||
|
protected function closeSession($i) {
|
||
|
list(, $ch, $url, $extra_data) = $this->sessions[$i];
|
||
|
|
||
|
$content = !curl_error($ch) ? curl_multi_getcontent($ch) : null;
|
||
|
$info = curl_getinfo($ch);
|
||
|
$this->destroySession($i);
|
||
|
$this->onLoad($url, $content, $info, $extra_data);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Destroys session.
|
||
|
*
|
||
|
* @param integer $i A session id.
|
||
|
*/
|
||
|
protected function destroySession($i) {
|
||
|
list($mh, $ch,) = $this->sessions[$i];
|
||
|
|
||
|
curl_multi_remove_handle($mh, $ch);
|
||
|
curl_close($ch);
|
||
|
curl_multi_close($mh);
|
||
|
|
||
|
unset($this->sessions[$i]);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Gets maximal number of CURL multi sessions.
|
||
|
*
|
||
|
* @return integer Maximal number of CURL multi sessions.
|
||
|
*/
|
||
|
public function getMaxSessions() {
|
||
|
return $this->maxSessions;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sets maximal number of CURL multi sessions.
|
||
|
*
|
||
|
* @param integer $maxSessions Maximal number of CURL multi sessions.
|
||
|
*/
|
||
|
public function setMaxSessions($maxSessions) {
|
||
|
if ((int)$maxSessions <= 0) {
|
||
|
throw new Exception('Max sessions number must be bigger then zero!');
|
||
|
}
|
||
|
|
||
|
$this->maxSessions = (int)$maxSessions;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Gets maximal size limit for downloaded content.
|
||
|
*
|
||
|
* @return integer Maximal size limit for downloaded content.
|
||
|
*/
|
||
|
public function getMaxSize() {
|
||
|
return $this->maxSize;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sets maximal size limit for downloaded content.
|
||
|
*
|
||
|
* @param integer $maxSize Maximal size limit for downloaded content.
|
||
|
*/
|
||
|
public function setMaxSize($maxSize) {
|
||
|
if ((int)$maxSize <= 0) {
|
||
|
throw new Exception('Max size limit must be bigger then zero!');
|
||
|
}
|
||
|
|
||
|
$this->maxSize = (int)$maxSize;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Gets CURL options for all requests.
|
||
|
*
|
||
|
* @return array CURL options.
|
||
|
*/
|
||
|
public function getCurlOptions() {
|
||
|
return $this->curlOptions;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sets CURL options for all requests.
|
||
|
*
|
||
|
* @param array $curlOptions CURL options.
|
||
|
*/
|
||
|
public function setCurlOptions($curlOptions) {
|
||
|
if (!array_key_exists(CURLOPT_FOLLOWLOCATION, $curlOptions)) {
|
||
|
$curlOptions[CURLOPT_FOLLOWLOCATION] = 1;
|
||
|
}
|
||
|
$curlOptions[CURLOPT_RETURNTRANSFER] = 1;
|
||
|
$this->curlOptions = $curlOptions;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* OnLoad callback event.
|
||
|
*
|
||
|
* @param string $url URL for downloading.
|
||
|
* @param string $content Downloaded content.
|
||
|
* @param array $info CURL session information.
|
||
|
*/
|
||
|
protected abstract function onLoad($url, $content, $info, $extra_data);
|
||
|
|
||
|
/**
|
||
|
* Checks CURL extension, etc.
|
||
|
*/
|
||
|
public static function checkEnvironment() {
|
||
|
if (!extension_loaded('curl')) {
|
||
|
throw new Exception('CURL extension not loaded');
|
||
|
}
|
||
|
}
|
||
|
}
|