Skip to content
Snippets Groups Projects

Resolve "use multi curl to improve performance"

Merged Ghost User requested to merge 19-use-multi-curl-to-improve-performance into master
1 file
+ 36
147
Compare changes
  • Side-by-side
  • Inline
@@ -9,9 +9,14 @@ use finfo;
use Illuminate\Http\Request;
use Symfony\Component\HttpFoundation\StreamedResponse;
use URL;
use Redis;
use App\Console\Commands\RequestFetcher;
use App\Models\HttpParser;
class ProxyController extends Controller
{
const PROXY_CACHE = 20; # Cache duration in minutes
public function proxyPage(Request $request, $password, $url)
{
$targetUrl = str_replace("<<SLASH>>", "/", $url);
@@ -142,22 +147,43 @@ class ProxyController extends Controller
$result = [];
$httpcode = 200;
# We are gonna cache all files for 60 Minutes to reduce
# redundant file transfers:
$val = base64_encode(serialize($result));
Cache::put($hash, $val, 60);
if (!Cache::has($hash) || env("CACHE_ENABLED") === false) {
// Inits the Curl connection for being able to preload multiple URLs while using a keep-alive connection
$this->initCurl();
$key = $request->route('id', '');
if (!preg_match("/^[a-f0-9]{32}$/", $key)) {
abort(404);
$useragent = $_SERVER['HTTP_USER_AGENT'];
if (preg_match('/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i', $useragent) || preg_match('/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i', substr($useragent, 0, 4))) {
// Mobile Browser Dummy Mobile Useragent
$useragent = 'Mozilla/5.0 (Android 10; Mobile; rv:83.0) Gecko/83.0 Firefox/83.0';
} else {
// Not Mobile Dummy Desktop useragent
$useragent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/83.0';
}
$result = $this->getUrlContent($targetUrl, false, $key);
# $result can be null if the File Size exeeds the maximum cache size defined in .env
# In this case
$mission = [
"resulthash" => $hash,
"url" => $targetUrl,
"useragent" => $useragent,
"cacheDuration" => $this::PROXY_CACHE,
];
$mission = json_encode($mission);
Redis::rpush(RequestFetcher::FETCHQUEUE_KEY, $mission);
$answer = Redis::brpoplpush($hash, $hash, 1);
$result = HttpParser::parse($answer);
} else {
$result = Cache::get($hash);
}
if ($result === null) {
return $this->streamFile($targetUrl);
} else {
$httpcode = $result["http_code"];
$httpcode = $result["header"]["http_code"];
extract(parse_url($targetUrl));
@@ -183,7 +209,7 @@ class ProxyController extends Controller
switch ($contentType) {
case 'text/html':
# It's a html Document
# It's an html document
$htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"], $contentEncoding);
$htmlDocument->proxifyContent();
$result["data"] = $htmlDocument->getResult();
@@ -229,24 +255,7 @@ class ProxyController extends Controller
abort(500, $contentType . " " . $targetUrl);
break;
}
# We are gonna cache all files for 60 Minutes to reduce
# redundant file transfers:
$val = base64_encode(serialize($result));
Cache::put($hash, $val, 60);
}
curl_close($this->ch);
} else {
$result = Cache::get($hash);
// Base64 decode:
$result = base64_decode($result);
// Unserialize
$result = unserialize($result);
if (isset($result["http_code"]) && $result["http_code"] !== 0) {
$httpcode = $result["http_code"];
}
}
if ($result["data"] === false) {
$result["data"] = "";
@@ -256,28 +265,6 @@ class ProxyController extends Controller
->withHeaders($result["header"]);
}
private function initCurl()
{
$this->ch = curl_init();
$useragent = $_SERVER['HTTP_USER_AGENT'];
if (preg_match('/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i', $useragent) || preg_match('/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i', substr($useragent, 0, 4))) {
// Mobile Browser Dummy Mobile Useragent
curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36');
} else {
// Not Mobile Dummy Desktop useragent
curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0');
}
curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($this->ch, CURLOPT_TIMEOUT, 6);
curl_setopt($this->ch, CURLOPT_LOW_SPEED_LIMIT, 50000);
curl_setopt($this->ch, CURLOPT_LOW_SPEED_TIME, 5);
curl_setopt($this->ch, CURLOPT_HEADER, 1);
}
private function streamFile($url)
{
$headers = get_headers($url, 1);
@@ -318,104 +305,6 @@ class ProxyController extends Controller
return $response;
}
private function getUrlContent($url, $withCookies, $key)
{
$url = htmlspecialchars_decode($url);
curl_setopt($this->ch, CURLOPT_URL, "$url");
curl_setopt($this->ch, CURLOPT_NOPROGRESS, false);
curl_setopt($this->ch, CURLOPT_PROGRESSFUNCTION, 'self::downloadProgress');
$data = curl_exec($this->ch);
# If the requested File is too big for this Process to cache then we are gonna handle this File download later
# in another way.
if (curl_errno($this->ch) === CURLE_ABORTED_BY_CALLBACK) {
# In this case the download was aborted because of the FileSize
# We have no headers or anything like that
# so we will return null and handle this case in the calling function
return null;
} else {
$httpcode = intval(curl_getinfo($this->ch, CURLINFO_HTTP_CODE));
$header_size = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $header_size);
$data = substr($data, $header_size);
$headerArray = [];
foreach (explode(PHP_EOL, $header) as $index => $value) {
if ($index > 0) {
$ar = explode(': ', $value);
if (sizeof($ar) === 2) {
if ($withCookies && (strtolower($ar[0]) === "content-type" || strtolower($ar[0]) === "set-cookie")) {
$headerArray[trim($ar[0])] = trim($ar[1]);
} elseif (!$withCookies && strtolower($ar[0]) === "content-type") {
$headerArray[strtolower(trim($ar[0]))] = strtolower(trim($ar[1]));
} elseif (strtolower($ar[0]) === "location") {
$redLink = $ar[1];
if (strpos($redLink, "/") === 0) {
$parse = parse_url($url);
$redLink = $parse["scheme"] . "://" . $parse["host"] . $redLink;
} elseif (preg_match("/^\w+\.\w+$/si", $redLink)) {
$parse = parse_url($url);
$redLink = $parse["scheme"] . "://" . $parse["host"] . "/" . $redLink;
}
$headerArray[trim($ar[0])] = $this->proxifyUrl($redLink, null, $key, false);
} elseif (strtolower($ar[0]) === "content-disposition") {
$headerArray[strtolower(trim($ar[0]))] = strtolower(trim($ar[1]));
} else {
#$headerArray[trim($ar[0])] = trim($ar[1]);
}
}
}
}
# It might happen that a server doesn't give Information about file Type.
# Let's try to generate one in this case
if (!isset($headerArray["content-type"])) {
$finfo = new finfo(FILEINFO_MIME);
$headerArray["content-type"] = $finfo->buffer($data);
}
$headerArray["Content-Security-Policy"] = "default-src 'self' data: 'unsafe-inline' http://localhost";
# Charset-Fix for people who forget to declare charset:
# If this won't work the default charset UTF-8 is set by laravel:
foreach ($headerArray as $key => $value) {
if (strtolower($key) === "content-type" && strpos(strtolower($value), "charset") === false) {
# We will see if there is a content-type with charset declared in the document:
if (preg_match("/<\s*meta[^>]+http-equiv=[\'\"]\s*content-type\s*[\'\"][^>]+?>/si", $data, $match)) {
if (strpos($match[0], "charset") !== false && preg_match("/content=[\'\"]([^\'\"]+)/si", $match[0], $contentType)) {
$headerArray[$key] = $contentType[1];
break;
} else {
break;
}
} else {
break;
}
}
}
if (!isset($httpcode) || !$httpcode || $httpcode === 0) {
$httpcode = 200;
}
return ['header' => $headerArray, 'data' => $data, 'http_code' => $httpcode];
}
}
private function downloadProgress($resource, $download_size, $downloaded, $upload_size, $uploaded)
{
# The Memory Cache:
# Every file that our Proxy parses has to lie in the memory Cache of PHP
# If you would download a 5GB File then our PHP Process would need 5GB min RAM
# We are gonna handle Files bigger then our defined maximum Cache Size in another way and break the conection at this point.
if ($download_size > intval(env('PROXY_MEMORY_CACHE')) || $downloaded > intval(env('PROXY_MEMORY_CACHE'))) {
return 1;
}
}
public function proxifyUrl($url, $password = null, $key, $topLevel)
{
// Only convert valid URLs
Loading