diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8d1acc5a2b6eebbcba0afcb6bf82ecb74ff49c80..c9faf419086453604244e7e46359fa09500130eb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,7 +86,7 @@ prepare_composer: key: "metager-${CI_JOB_NAME}" paths: - .composer - + build: services: diff --git a/Dockerfile b/Dockerfile index ee6f8ea503b8ba5329192c100558b2540bac8ad5..114e1d24a4e1fa420cb59ef85526948737a4c748 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,7 @@ RUN apk add --update \ php7-tokenizer \ php7-xml \ php7-curl \ + php7-pcntl \ php7-dom \ php7-fileinfo \ && rm -rf /var/cache/apk/* diff --git a/DockerfileDev b/DockerfileDev index 0345c87d6f2dea0bfec2b63eeffbcbc447f97ec6..b7f112534c7898c8015a788f1f298feff0a27f67 100644 --- a/DockerfileDev +++ b/DockerfileDev @@ -17,6 +17,7 @@ RUN apk add --update \ php7-tokenizer \ php7-xml \ php7-curl \ + php7-pcntl \ php7-dom \ php7-fileinfo \ && rm -rf /var/cache/apk/* diff --git a/app/Console/Commands/RequestFetcher.php b/app/Console/Commands/RequestFetcher.php new file mode 100644 index 0000000000000000000000000000000000000000..e064adb2748278097c9c551c5a1d27efbbb84f73 --- /dev/null +++ b/app/Console/Commands/RequestFetcher.php @@ -0,0 +1,232 @@ +<?php + +namespace App\Console\Commands; + +use Cache; +use Illuminate\Console\Command; +use Illuminate\Support\Facades\Redis; +use Log; + +class RequestFetcher extends Command +{ + public const FETCHQUEUE_KEY = "fetcher.queue"; + + /** + * The name and signature of the console command. + * + * @var string + */ + protected $signature = 'requests:fetcher'; + + /** + * The console command description. + * + * @var string + */ + protected $description = 'This commands fetches requests to the installed search engines'; + + protected $shouldRun = true; + protected $multicurl = null; + protected $proxyhost; + protected $proxyuser; + protected $proxypassword; + + /** + * Create a new command instance. + * + * @return void + */ + public function __construct() + { + parent::__construct(); + $this->multicurl = curl_multi_init(); + $this->proxyhost = env("PROXY_HOST", ""); + $this->proxyport = env("PROXY_PORT", ""); + $this->proxyuser = env("PROXY_USER", ""); + $this->proxypassword = env("PROXY_PASSWORD", ""); + } + + /** + * Execute the console command. + * + * @return mixed + */ + public function handle() + { + $pidFile = "/tmp/fetcher"; + \pcntl_signal(SIGINT, [$this, "sig_handler"]); + \pcntl_signal(SIGTERM, [$this, "sig_handler"]); + \pcntl_signal(SIGHUP, [$this, "sig_handler"]); + + // Redis might not be available now + for ($count = 0; $count < 10; $count++) { + try { + Redis::connection(); + break; + } catch (\Predis\Connection\ConnectionException $e) { + if ($count >= 9) { + // If its not available after 10 seconds we will exit + return; + } + sleep(1); + } + } + + touch($pidFile); + + if (!file_exists($pidFile)) { + return; + } + + try { + while ($this->shouldRun) { + $operationsRunning = true; + curl_multi_exec($this->multicurl, $operationsRunning); + $status = $this->readMultiCurl($this->multicurl); + $answersRead = $status[0]; + $messagesLeft = $status[1]; + $newJobs = $this->checkNewJobs($operationsRunning, $messagesLeft); + + if ($newJobs === 0 && $answersRead === 0) { + usleep(10 * 1000); + } + } + } finally { + unlink($pidFile); + curl_multi_close($this->multicurl); + } + } + + /** + * Checks the Redis queue if any new fetch jobs where submitted + * and adds them to multicurl if there are. + * Will be blocking call to redis if there are no running jobs in multicurl + */ + private function checkNewJobs($operationsRunning, $messagesLeft) + { + $newJobs = []; + if ($operationsRunning === 0 && $messagesLeft === -1) { + $newJob = Redis::blpop($this::FETCHQUEUE_KEY, 1); + if (!empty($newJob)) { + $newJobs[] = $newJob[1]; + } + } else { + $elements = Redis::pipeline(function ($redis) { + $redis->lrange($this::FETCHQUEUE_KEY, 0, -1); + $redis->del($this::FETCHQUEUE_KEY); + }); + $newJobs = $elements[0]; + } + + $addedJobs = 0; + foreach ($newJobs as $newJob) { + $newJob = json_decode($newJob, true); + $ch = $this->getCurlHandle($newJob); + if (curl_multi_add_handle($this->multicurl, $ch) !== 0) { + $this->shouldRun = false; + Log::error("Couldn't add Handle to multicurl"); + break; + } else { + $addedJobs++; + } + } + + return $addedJobs; + } + + private function readMultiCurl($mc) + { + $messagesLeft = -1; + $answersRead = 0; + while (($info = curl_multi_info_read($mc, $messagesLeft)) !== false) { + try { + $answersRead++; + $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE); + $infos = explode(";", $infos); + $resulthash = $infos[0]; + $cacheDurationMinutes = intval($infos[1]); + $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE); + $body = "no-result"; + + $totalTime = curl_getinfo($info["handle"], CURLINFO_TOTAL_TIME); + + $error = curl_error($info["handle"]); + if (!empty($error)) { + Log::error($error); + } + + $body = \curl_multi_getcontent($info["handle"]); + + Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) { + $pipe->lpush($resulthash, $body); + $pipe->expire($resulthash, 60); + }); + + if ($cacheDurationMinutes > 0) { + try { + Cache::put($resulthash, $body, $cacheDurationMinutes * 60); + } catch (\Exception $e) { + Log::error($e->getMessage()); + } + } + } catch (\Exception $e) { + Log::error($e->getMessage()); + } finally { + \curl_multi_remove_handle($mc, $info["handle"]); + } + } + return [$answersRead, $messagesLeft]; + } + + private function getCurlHandle($job) + { + $ch = curl_init(); + + curl_setopt_array($ch, array( + CURLOPT_URL => $job["url"], + CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"], + CURLOPT_RETURNTRANSFER => 1, + CURLOPT_USERAGENT => $job["useragent"], + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_MAXCONNECTS => 500, + CURLOPT_LOW_SPEED_LIMIT => 50000, + CURLOPT_LOW_SPEED_TIME => 5, + CURLOPT_TIMEOUT => 7, + CURLOPT_HEADER => true, + )); + + if (!empty($job["curlopts"])) { + curl_setopt_array($ch, $job["curlopts"]); + } + + if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) { + curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost); + curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword); + curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport); + curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); + } + + if (!empty($job["username"]) && !empty($job["password"])) { + curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]); + } + + if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) { + $headers = []; + foreach ($job["headers"] as $key => $value) { + $headers[] = $key . ":" . $value; + } + # Headers are in the Form: + # <key>:<value>;<key>:<value> + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + } + + return $ch; + } + + public function sig_handler($sig) + { + $this->shouldRun = false; + echo("Terminating Process\n"); + } +} diff --git a/app/HtmlDocument.php b/app/HtmlDocument.php index efa33b76985ce31ecb32efeed5e83f449df41852..d3db7e7e2b7dc44cfc8fc4c7e9b93f333443c479 100644 --- a/app/HtmlDocument.php +++ b/app/HtmlDocument.php @@ -8,6 +8,7 @@ class HtmlDocument extends Document { private $htmlString; + private $encoding; public function __construct($password, $baseUrl, $htmlString, $encoding) { @@ -30,7 +31,7 @@ class HtmlDocument extends Document if ($meta->hasAttribute("http-equiv") && strtolower($meta->getAttribute("http-equiv")) === "content-type" && $meta->hasAttribute("content")) { $contentType = $meta->getAttribute("content"); $encoding = stripos($contentType, "charset=") !== false ? trim(substr($contentType, stripos($contentType, "charset=") + 8)) : null; - $contentEncoding = rtrim($contentEncoding, ";"); + $encoding = rtrim($encoding, ";"); if ($encoding !== null) { break; } @@ -48,6 +49,7 @@ class HtmlDocument extends Document # Default Fallback } if (!empty($encoding)) { + $this->encoding = $encoding; return mb_convert_encoding($htmlString, 'HTML-ENTITIES', $encoding); } else { return mb_convert_encoding($htmlString, 'HTML-ENTITIES'); @@ -59,6 +61,11 @@ class HtmlDocument extends Document return $this->htmlString; } + public function getEncoding() + { + return $this->encoding; + } + /** * Function proxifyContent * This method parses the given String and Proxifies all Links/Urls in it so it's targetting this Proxy Server diff --git a/app/Http/Controllers/ProxyController.php b/app/Http/Controllers/ProxyController.php index d3516591cfac1360859d96208fa7f4fe615f6176..808979d7b0b3680aa87d1d9bdb4d329e54955b30 100644 --- a/app/Http/Controllers/ProxyController.php +++ b/app/Http/Controllers/ProxyController.php @@ -9,9 +9,14 @@ use finfo; use Illuminate\Http\Request; use Symfony\Component\HttpFoundation\StreamedResponse; use URL; +use Redis; +use App\Console\Commands\RequestFetcher; +use App\Models\HttpParser; class ProxyController extends Controller { + const PROXY_CACHE = 20; # Cache duration in minutes + public function proxyPage(Request $request, $password, $url) { $targetUrl = str_replace("<<SLASH>>", "/", $url); @@ -135,6 +140,12 @@ class ProxyController extends Controller $targetUrl = str_replace("<<SLASH>>", "/", $url); $targetUrl = str_rot13(base64_decode($targetUrl)); + + try{ + $path = parse_url($targetUrl)["path"]; + }catch(\Exception $e){ + $path = ""; + } $this->password = $password; // Hash Value under which a possible cached file would've been stored @@ -143,141 +154,110 @@ class ProxyController extends Controller $httpcode = 200; if (!Cache::has($hash) || env("CACHE_ENABLED") === false) { - // Inits the Curl connection for being able to preload multiple URLs while using a keep-alive connection - $this->initCurl(); - $key = $request->route('id', ''); - if (!preg_match("/^[a-f0-9]{32}$/", $key)) { - abort(404); + $useragent = $_SERVER['HTTP_USER_AGENT']; + if (preg_match('/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i', $useragent) || preg_match('/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i', substr($useragent, 0, 4))) { + // Mobile Browser Dummy Mobile Useragent + $useragent = 'Mozilla/5.0 (Android 10; Mobile; rv:83.0) Gecko/83.0 Firefox/83.0'; + } else { + // Not Mobile Dummy Desktop useragent + $useragent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/83.0'; } - $result = $this->getUrlContent($targetUrl, false, $key); - # $result can be null if the File Size exeeds the maximum cache size defined in .env - # In this case - if ($result === null) { - return $this->streamFile($targetUrl); - } else { - $httpcode = $result["http_code"]; - - extract(parse_url($targetUrl)); - - $base = $scheme . "://" . $host; - - # We will parse whether we have a parser for this document type. - # If not, we will not Proxy it: - $contentTypeHeader = $result["header"]["content-type"]; - $contentType = strpos($result["header"]["content-type"], ";") !== false ? trim(substr($result["header"]["content-type"], 0, strpos($result["header"]["content-type"], ";"))) : trim($result["header"]["content-type"]); - $contentEncoding = stripos($contentTypeHeader, "charset=") !== false ? trim(substr($contentTypeHeader, stripos($contentTypeHeader, "charset=") + 8)) : null; - $contentEncoding = rtrim($contentEncoding, ";"); - - if (isset($result["header"]["content-disposition"])) { - if (stripos($result["header"]["content-disposition"], "filename=") === false) { - $basename = basename(parse_url($targetUrl, PHP_URL_PATH)); - $newHeader = $result["header"]["content-disposition"]; - $newHeader = trim($newHeader); - $newHeader = rtrim($newHeader, ";"); - $newHeader .= "; filename=" . $basename; - $result["header"]["content-disposition"] = $newHeader; - } - } + $mission = [ + "resulthash" => $hash, + "url" => $targetUrl, + "useragent" => $useragent, + "cacheDuration" => $this::PROXY_CACHE, + ]; - switch ($contentType) { - case 'text/html': - # It's a html Document - $htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"], $contentEncoding); - $htmlDocument->proxifyContent(); - $result["data"] = $htmlDocument->getResult(); - break; - case 'application/pdf': - if (!isset($result["header"]["content-disposition"])) { - $name = "document.pdf"; - $basename = basename($targetUrl); - if (stripos($basename, ".pdf") !== false) { - $name = $basename; - } - $result["header"]["content-disposition"] = "attachment; filename=$name"; - } - // no break - case 'image/png': - case 'image/jpeg': - case 'image/gif': - case 'application/font-woff': - case 'application/x-font-woff': - case 'application/x-empty': - case 'font/woff2': - case 'image/svg+xml': - case 'application/octet-stream': - case 'text/plain': - case 'image/x-icon': - case 'font/eot': - case 'image/vnd.microsoft.icon': - case 'application/vnd.ms-fontobject': - case 'application/x-font-ttf': - case 'application/x-www-form-urlencoded': - case 'application/zip': - case 'binary/octet-stream': - # Nothing to do with Images: Just return them - break; - case 'text/css': - # Css Documents might contain references to External Documents that need to get Proxified - $cssDocument = new CssDocument($password, $targetUrl, $result["data"]); - $cssDocument->proxifyContent(); - $result["data"] = $cssDocument->getResult(); - break; - default: - # We have no Parser for this one. Let's respond: - abort(500, $contentType . " " . $targetUrl); - break; + $mission = json_encode($mission); + Redis::rpush(RequestFetcher::FETCHQUEUE_KEY, $mission); + $answer = Redis::brpoplpush($hash, $hash, 10); + } else { + $answer = Cache::get($hash); + } + $result = HttpParser::parse($answer, $path); + if ($result === null) { + return $this->streamFile($targetUrl); + } else { + $httpcode = $result["header"]["http_code"]; + extract(parse_url($targetUrl)); + $base = $scheme . "://" . $host; + # We will parse whether we have a parser for this document type. + # If not, we will not Proxy it: + $contentTypeHeader = $result["header"]["content-type"]; + $contentType = strpos($result["header"]["content-type"], ";") !== false ? trim(substr($result["header"]["content-type"], 0, strpos($result["header"]["content-type"], ";"))) : trim($result["header"]["content-type"]); + $contentEncoding = stripos($contentTypeHeader, "charset=") !== false ? trim(substr($contentTypeHeader, stripos($contentTypeHeader, "charset=") + 8)) : null; + $contentEncoding = rtrim($contentEncoding, ";"); + if (isset($result["header"]["content-disposition"])) { + if (stripos($result["header"]["content-disposition"], "filename=") === false) { + $basename = basename(parse_url($targetUrl, PHP_URL_PATH)); + $newHeader = $result["header"]["content-disposition"]; + $newHeader = trim($newHeader); + $newHeader = rtrim($newHeader, ";"); + $newHeader .= "; filename=" . $basename; + $result["header"]["content-disposition"] = $newHeader; } - # We are gonna cache all files for 60 Minutes to reduce - # redundant file transfers: - $val = base64_encode(serialize($result)); - - Cache::put($hash, $val, 60); } - - curl_close($this->ch); - } else { - $result = Cache::get($hash); - // Base64 decode: - $result = base64_decode($result); - // Unserialize - $result = unserialize($result); - if (isset($result["http_code"]) && $result["http_code"] !== 0) { - $httpcode = $result["http_code"]; + switch ($contentType) { + case 'text/html': + # It's an html document + $htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"], $contentEncoding); + $htmlDocument->proxifyContent(); + $result['header']['content-type'] = $contentType . "; charset=" . $htmlDocument->getEncoding(); + $result["data"] = $htmlDocument->getResult(); + break; + case 'application/pdf': + if (!isset($result["header"]["content-disposition"])) { + $name = "document.pdf"; + $basename = basename($targetUrl); + if (stripos($basename, ".pdf") !== false) { + $name = $basename; + } + $result["header"]["content-disposition"] = "attachment; filename=$name"; + } + // no break + case 'image/png': + case 'image/jpeg': + case 'image/gif': + case 'application/font-woff': + case 'application/x-font-woff': + case 'application/x-empty': + case 'font/woff2': + case 'image/svg+xml': + case 'application/octet-stream': + case 'text/plain': + case 'image/x-icon': + case 'font/eot': + case 'image/vnd.microsoft.icon': + case 'application/vnd.ms-fontobject': + case 'application/x-font-ttf': + case 'application/x-www-form-urlencoded': + case 'application/zip': + case 'binary/octet-stream': + # Nothing to do with Images: Just return them + break; + case 'text/css': + # Css Documents might contain references to External Documents that need to get Proxified + $cssDocument = new CssDocument($password, $targetUrl, $result["data"]); + $cssDocument->proxifyContent(); + $result["data"] = $cssDocument->getResult(); + break; + default: + # We have no Parser for this one. Let's respond: + abort(500, $contentType . " " . $targetUrl); + break; } } if ($result["data"] === false) { $result["data"] = ""; } - return response($result["data"], $httpcode) ->withHeaders($result["header"]); } - private function initCurl() - { - $this->ch = curl_init(); - $useragent = $_SERVER['HTTP_USER_AGENT']; - if (preg_match('/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i', $useragent) || preg_match('/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i', substr($useragent, 0, 4))) { - // Mobile Browser Dummy Mobile Useragent - curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36'); - } else { - // Not Mobile Dummy Desktop useragent - curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0'); - } - - curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 0); - curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, 0); - curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($this->ch, CURLOPT_TIMEOUT, 6); - curl_setopt($this->ch, CURLOPT_LOW_SPEED_LIMIT, 50000); - curl_setopt($this->ch, CURLOPT_LOW_SPEED_TIME, 5); - curl_setopt($this->ch, CURLOPT_HEADER, 1); - } - private function streamFile($url) { $headers = get_headers($url, 1); @@ -318,104 +298,6 @@ class ProxyController extends Controller return $response; } - private function getUrlContent($url, $withCookies, $key) - { - $url = htmlspecialchars_decode($url); - curl_setopt($this->ch, CURLOPT_URL, "$url"); - curl_setopt($this->ch, CURLOPT_NOPROGRESS, false); - curl_setopt($this->ch, CURLOPT_PROGRESSFUNCTION, 'self::downloadProgress'); - - $data = curl_exec($this->ch); - - # If the requested File is too big for this Process to cache then we are gonna handle this File download later - # in another way. - if (curl_errno($this->ch) === CURLE_ABORTED_BY_CALLBACK) { - # In this case the download was aborted because of the FileSize - # We have no headers or anything like that - # so we will return null and handle this case in the calling function - return null; - } else { - $httpcode = intval(curl_getinfo($this->ch, CURLINFO_HTTP_CODE)); - - $header_size = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $header_size); - - $data = substr($data, $header_size); - $headerArray = []; - - foreach (explode(PHP_EOL, $header) as $index => $value) { - if ($index > 0) { - $ar = explode(': ', $value); - if (sizeof($ar) === 2) { - if ($withCookies && (strtolower($ar[0]) === "content-type" || strtolower($ar[0]) === "set-cookie")) { - $headerArray[trim($ar[0])] = trim($ar[1]); - } elseif (!$withCookies && strtolower($ar[0]) === "content-type") { - $headerArray[strtolower(trim($ar[0]))] = strtolower(trim($ar[1])); - } elseif (strtolower($ar[0]) === "location") { - $redLink = $ar[1]; - if (strpos($redLink, "/") === 0) { - $parse = parse_url($url); - $redLink = $parse["scheme"] . "://" . $parse["host"] . $redLink; - } elseif (preg_match("/^\w+\.\w+$/si", $redLink)) { - $parse = parse_url($url); - $redLink = $parse["scheme"] . "://" . $parse["host"] . "/" . $redLink; - } - - $headerArray[trim($ar[0])] = $this->proxifyUrl($redLink, null, $key, false); - } elseif (strtolower($ar[0]) === "content-disposition") { - $headerArray[strtolower(trim($ar[0]))] = strtolower(trim($ar[1])); - } else { - #$headerArray[trim($ar[0])] = trim($ar[1]); - } - } - } - } - - # It might happen that a server doesn't give Information about file Type. - # Let's try to generate one in this case - if (!isset($headerArray["content-type"])) { - $finfo = new finfo(FILEINFO_MIME); - $headerArray["content-type"] = $finfo->buffer($data); - } - - $headerArray["Content-Security-Policy"] = "default-src 'self' data: 'unsafe-inline' http://localhost"; - # Charset-Fix for people who forget to declare charset: - # If this won't work the default charset UTF-8 is set by laravel: - foreach ($headerArray as $key => $value) { - if (strtolower($key) === "content-type" && strpos(strtolower($value), "charset") === false) { - # We will see if there is a content-type with charset declared in the document: - if (preg_match("/<\s*meta[^>]+http-equiv=[\'\"]\s*content-type\s*[\'\"][^>]+?>/si", $data, $match)) { - if (strpos($match[0], "charset") !== false && preg_match("/content=[\'\"]([^\'\"]+)/si", $match[0], $contentType)) { - $headerArray[$key] = $contentType[1]; - break; - } else { - break; - } - } else { - break; - } - } - } - - if (!isset($httpcode) || !$httpcode || $httpcode === 0) { - $httpcode = 200; - } - - return ['header' => $headerArray, 'data' => $data, 'http_code' => $httpcode]; - } - } - - private function downloadProgress($resource, $download_size, $downloaded, $upload_size, $uploaded) - { - # The Memory Cache: - # Every file that our Proxy parses has to lie in the memory Cache of PHP - # If you would download a 5GB File then our PHP Process would need 5GB min RAM - # We are gonna handle Files bigger then our defined maximum Cache Size in another way and break the conection at this point. - if ($download_size > intval(env('PROXY_MEMORY_CACHE')) || $downloaded > intval(env('PROXY_MEMORY_CACHE'))) { - return 1; - } - } - public function proxifyUrl($url, $password = null, $key, $topLevel) { // Only convert valid URLs diff --git a/app/Models/HttpParser.php b/app/Models/HttpParser.php new file mode 100644 index 0000000000000000000000000000000000000000..d73ddeaabfedd979afad41c796e027f5642cd5f5 --- /dev/null +++ b/app/Models/HttpParser.php @@ -0,0 +1,57 @@ +<?php + +namespace app\Models; + +class HttpParser +{ + + public static function parseHeader($headerPart, $path = "") { + $headerLine = explode("\r\n", $headerPart); + $httpcode = rtrim($headerLine[0]); + $httpcode = explode(" ", $httpcode); + $header['http_code'] = $httpcode[1]; + foreach($headerLine as $line) { + if(stripos($line, ": ")) { + $tmp = explode(": ", $line); + $header[strtolower($tmp[0])] = $tmp[1]; + } + } + if(!isset($header['content-type'])) { + if(substr($path, -4) === '.css') { + $header['content-type'] = 'text/css'; + } elseif(substr($path, -3) === '.js') { + $header['content-type'] = 'text/javascript'; + } elseif(substr($path, -4) === '.svg') { + $header['content-type'] = 'image/svg+xml'; + } else { + $header['content-type'] = 'application/octet-stream'; + } + } + return $header; + } + + public static function parse($htmldoc, $path = "") { + try{ + $parts = explode("\r\n\r\n", $htmldoc); + $regex = "/^HTTP/sm"; + $i = 0; + while($i < count($parts)){ + if(!preg_match($regex ,$parts[$i])){ + break; + } + $i++; + } + $out['header'] = self::parseHeader($parts[$i-1], $path); + $out['data'] = implode(array_slice($parts,$i)); + unset($out['header']['content-encoding']); + unset($out['header']['content-length']); + unset($out['header']['x-frame-options']); + return $out; + } catch(\Exception $e) { + $out['header']['http_code'] = 200; + $out['header']['content-type'] = "text/html"; + $out['data'] = 'no-result'; + return $out; + } + } +} \ No newline at end of file diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 8e4b95c0cf2dfc43e8e0806e8556c4f81bf81324..654987f931c362f1b38a807eca55255818969e8e 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -113,6 +113,41 @@ spec: requests: cpu: 100m memory: 100m + - name: {{ .Chart.Name }}-worker + image: {{ template "imagename" . }} + command: ["su"] + args: ["-s", "/bin/sh", "-c", "php artisan requests:fetcher", "nginx"] + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.application.secretName }} + envFrom: + - secretRef: + name: {{ .Values.application.secretName }} + {{- end }} + env: + - name: DATABASE_URL + value: {{ .Values.application.database_url | quote }} + - name: GITLAB_ENVIRONMENT_NAME + value: {{ .Values.gitlab.envName | quote }} + - name: GITLAB_ENVIRONMENT_URL + value: {{ .Values.gitlab.envURL | quote }} + livenessProbe: + exec: + command: + - cat + - /tmp/fetcher + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + exec: + command: + - cat + - /tmp/fetcher + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + resources: + requests: + cpu: 1 + memory: 200m # REDIS - name: {{ .Chart.Name }}-redis image: "redis:5.0.3-alpine" diff --git a/docker-compose.yml b/docker-compose.yml index 18ed16ba0b7a8f7ea4d7c61e9187eae980d3463f..f028e990a6257b82274170aa0a36f61ca85a9b74 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,14 +20,14 @@ services: build: context: . dockerfile: DockerfileDev - image: metager:latest + image: proxy:latest working_dir: /html volumes: - .:/html web: depends_on: - "phpfpm" - image: metager:latest + image: proxy:latest working_dir: /html command: nginx volumes: @@ -38,4 +38,13 @@ services: - "8080:80" redis: image: redis:5.0.3-alpine - container_name: redis \ No newline at end of file + container_name: redis + worker: + depends_on: + - "phpfpm" + restart: on-failure + image: proxy:latest + working_dir: /html + volumes: + - .:/html + command: "su -s /bin/sh -c 'php artisan requests:fetcher' nginx" \ No newline at end of file diff --git a/resources/views/errors/405.blade.php b/resources/views/errors/405.blade.php index dc0905cd542b15e32140dd440b505999d7a08de6..ec8e76c95d641ecf5c136d96426dccde02ab384e 100644 --- a/resources/views/errors/405.blade.php +++ b/resources/views/errors/405.blade.php @@ -1,4 +1,4 @@ -@extends('layouts.app') +@extends('layouts.app',['key' => 'error']) @section('content') <div class="container content-container"> diff --git a/routes/web.php b/routes/web.php index e8213790a364085bbbb2ad7401cced0536ddf138..9cfda255d5c3d89f7a6c85373098eee42ce4b319 100644 --- a/routes/web.php +++ b/routes/web.php @@ -12,11 +12,14 @@ use Illuminate\Http\Request; | contains the "web" middleware group. Now create something great! | */ - Route::post('/{url}', function ($url) { abort(405); }); +Route::post('{password}/{url}', function ($url) { + abort(405); +}); + Route::get('healthz', function () { return response('', 200) ->header('Content-Type', 'text/plain');