From 8d1f78eeb5e1585ee54cd6433898ce12f08ddbed Mon Sep 17 00:00:00 2001 From: Dominik Pfennig <dominik@suma-ev.de> Date: Mon, 20 Mar 2017 08:20:30 +0100 Subject: [PATCH] Erste gut funktionierende Version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Die neue Proxy Software ist soweit feritg, als dass diese Version nun mindestens so gut funktionieren dürfte, wie die bestehende --- app/CssDocument.php | 56 +++ app/Document.php | 125 ++++++ app/HtmlDocument.php | 244 +++++++++++ app/Http/Controllers/ProxyController.php | 530 +++++++---------------- resources/views/ProxyPage.blade.php | 11 +- 5 files changed, 592 insertions(+), 374 deletions(-) create mode 100644 app/CssDocument.php create mode 100644 app/Document.php create mode 100644 app/HtmlDocument.php diff --git a/app/CssDocument.php b/app/CssDocument.php new file mode 100644 index 0000000..769f297 --- /dev/null +++ b/app/CssDocument.php @@ -0,0 +1,56 @@ +<?php + +namespace App; + +class CssDocument extends Document +{ + + private $styleString; + + public function __construct($password, $base, $styleString) + { + parent::__construct($password, $base); + $this->styleString = $styleString; + } + + public function proxifyContent() + { + # All Resources that I know, that are included within an CSS Stylesheet must have the url() functional quoting + # We're gonna replace all URL's that we find within this document + # First with Quotation Marks: + $this->styleString = preg_replace_callback("/(url\()([\"\']{1})([^\\2]+?)\\2/si", "self::pregReplaceUrl", $this->styleString); + # And then the ones without Quotation Marks + $this->styleString = preg_replace_callback("/(url\()([^\"\'][^\)]+?)(\))/si", "self::pregReplaceUrlNoQuotes", $this->styleString); + } + + private function pregReplaceUrl($matches) + { + $url = $matches[3]; + # Relative to Absolute + $url = $this->convertRelativeToAbsoluteLink($url); + # Proxify Url + $url = $this->proxifyUrl($url, false); + + $replacement = $matches[1] . $matches[2] . $url . $matches[2]; + + return $replacement; + } + + private function pregReplaceUrlNoQuotes($matches) + { + $url = $matches[2]; + # Relative to Absolute + $url = $this->convertRelativeToAbsoluteLink($url); + # Proxify Url + $url = $this->proxifyUrl($url, false); + + $replacement = $matches[1] . $url . $matches[3]; + + return $replacement; + } + + public function getResult() + { + return $this->styleString; + } +} diff --git a/app/Document.php b/app/Document.php new file mode 100644 index 0000000..d6e6078 --- /dev/null +++ b/app/Document.php @@ -0,0 +1,125 @@ +<?php + +namespace App; + +use Illuminate\Http\Request; +use URL; + +abstract class Document +{ + + protected $password; + protected $baseUrl; + + public function __construct($password, $base) + { + $this->password = $password; + $this->baseUrl = $base; + } + + public function proxifyUrl($url, $topLevel) + { + // Only convert valid URLs + $url = trim($url); + if (strpos($url, "http") !== 0 || strpos($url, URL::to('/')) === 0) { + return $url; + } + + $urlToProxy = base64_encode(str_rot13($url)); + $urlToProxy = str_replace("/", "<<SLASH>>", $urlToProxy); + $urlToProxy = urlencode($urlToProxy); + + if ($topLevel) { + $params = \Request::all(); + + # Password + $pw = md5(env('PROXY_PASSWORD') . $url); + $urlToProxy = base64_encode(str_rot13($url)); + $urlToProxy = urlencode(str_replace("/", "<<SLASH>>", $urlToProxy)); + + # Params + $params['password'] = $pw; + $params['url'] = $urlToProxy; + + $iframeUrl = action('ProxyController@proxyPage', $params); + } else { + $params = \Request::all(); + $params['password'] = $this->password; + $params['url'] = $urlToProxy; + + $iframeUrl = action('ProxyController@proxy', $params); + + } + + return $iframeUrl; + } + + protected function convertRelativeToAbsoluteLink($rel) + { + if (strpos($rel, "//") === 0) { + $rel = parse_url($this->baseUrl, PHP_URL_SCHEME) . ":" . $rel; + } + + /* return if already absolute URL or empty URL */ + if (parse_url($rel, PHP_URL_SCHEME) != '' + || strlen(trim($rel)) <= 0 + || preg_match("/^\s*mailto:/si", $rel)) { + return ($rel); + } + + /* queries and anchors */ + if ($rel[0] == '#' || $rel[0] == '?') { + return ($this->baseUrl . $rel); + } + + /* parse base URL and convert to local variables: + $scheme, $host, $path */ + extract(parse_url($this->baseUrl)); + + /* remove non-directory element from path */ + if (isset($path)) { + $path = preg_replace('#/[^/]*$#', '', $path); + } + + /* destroy path if relative url points to root */ + if ($rel[0] == '/') { + $path = ''; + } + + /* dirty absolute URL */ + $abs = ''; + + /* do we have a user in our URL? */ + if (isset($user)) { + $abs .= $user; + + /* password too? */ + if (isset($pass)) { + $abs .= ':' . $pass; + } + + $abs .= '@'; + } + + $abs .= $host; + /* did somebody sneak in a port? */ + if (isset($port)) { + $abs .= ':' . $port; + } + + if (isset($path)) { + $abs .= $path; + } + if (isset($rel)) { + $abs .= "/" . ltrim($rel, "/"); + } + /* replace '//' or '/./' or '/foo/../' with '/' */ + $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); + for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {} + + /* absolute URL is ready! */ + return ($scheme . '://' . $abs); + } + + abstract public function proxifyContent(); +} diff --git a/app/HtmlDocument.php b/app/HtmlDocument.php new file mode 100644 index 0000000..0318431 --- /dev/null +++ b/app/HtmlDocument.php @@ -0,0 +1,244 @@ +<?php + +namespace App; + +use DomDocument; + +class HtmlDocument extends Document +{ + + private $htmlString; + + public function __construct($password, $baseUrl, $htmlString, $encoding = "UTF-8") + { + parent::__construct($password, $baseUrl); + $this->htmlString = mb_convert_encoding($htmlString, 'HTML-ENTITIES', $encoding); + } + + public function getResult() + { + return $this->htmlString; + } + + /** + * Function proxifyContent + * This method parses the given String and Proxifies all Links/Urls in it so it's targetting this Proxy Server + **/ + public function proxifyContent() + { + if (trim($this->htmlString) === "") { + return; + } + + # Let's create a new DOM + libxml_use_internal_errors(true); + $dom = new DomDocument(); + $dom->loadHtml($this->htmlString); + + foreach ($dom->getElementsByTagName('base') as $base) { + $href = $base->getAttribute('href'); + # Convert all relative Links to absolute Ones + $href = $this->convertRelativeToAbsoluteLink($href); + $this->baseUrl = $href; + # Delete Base Tag + $base->parentNode->removeChild($base); + } + + # First things first. Let's change all a Tags that can define a target Attribute + foreach ($dom->getElementsByTagName('a') as $link) { + # All Links within a "a" Tag need to target the top level because they change the site on click + $this->convertTargetAttribute($link, "_top"); + # Convert all relative Links to absolute Ones + $link->setAttribute("href", $this->convertRelativeToAbsoluteLink($link->getAttribute("href"))); + # Convert all Links to the proxified Version + # All of this Links should target to the top Level + $link->setAttribute("href", $this->proxifyUrl($link->getAttribute("href"), true)); + } + + # All Buttons + foreach ($dom->getElementsByTagName('button') as $button) { + if ($button->hasAttribute("formtarget")) { + $button->setAttribute("formtarget", "_top"); + } + if ($button->hasAttribute("formaction")) { + $formaction = $button->getAttribute("formaction"); + # Rel to abs + $formaction = $this->convertRelativeToAbsoluteLink($formaction); + # Abs to proxified + $formaction = $this->proxifyUrl($formaction, true); + # And replace + $button->setAttribute("formaction", $formaction); + } + # Since when are buttons allowed to have a href? + # Youtube has such on it's site so we are converting it anyways + if ($button->hasAttribute("href")) { + $href = $button->getAttribute("href"); + # Rel to abs + $href = $this->convertRelativeToAbsoluteLink($href); + # Abs to proxified + $href = $this->proxifyUrl($href, true); + # And replace + $button->setAttribute("href", $href); + } + + } + + foreach ($dom->getElementsByTagName('area') as $area) { + # All Links within a "a" Tag need to target the top level because they change the site on click + $this->convertTargetAttribute($area, "_top"); + } + + foreach ($dom->getElementsByTagName('form') as $form) { + # All Links within a "a" Tag need to target the top level because they change the site on click + $this->convertTargetAttribute($form, "_top"); + # If a Form doesn't define a action It references itself but we need to set the link then + $action = $form->getAttribute("action"); + if ($action === "") { + $action = $this->baseUrl; + } else { + # Otherwise the Link could be relative and we need to change it: + # Convert all relative Links to absolute Ones + $action = $this->convertRelativeToAbsoluteLink($action); + } + # And finally Proxify the Url + $action = $this->proxifyUrl($action, true); + $form->setAttribute("action", $action); + } + + # Alle Link Tags + foreach ($dom->getElementsByTagName('link') as $link) { + # Convert all relative Links to absolute Ones + $link->setAttribute("href", $this->convertRelativeToAbsoluteLink($link->getAttribute("href"))); + # Convert all Links to the proxified Version + # All of this Links should NOT target to the top Level + $link->setAttribute("href", $this->proxifyUrl($link->getAttribute("href"), false)); + } + + # All Iframes + foreach ($dom->getElementsByTagName('iframe') as $iframe) { + # There can be 2 Possible sources + # A - The src Attribute defines a Url that the Iframe loads + $src = $iframe->getAttribute("src"); + if ($src !== "") { + # Make the Link absolute + $src = $this->convertRelativeToAbsoluteLink($src); + # Proxify the Link + $src = $this->proxifyUrl($src, false); + # Replace the old Link + $iframe->setAttribute("src", $src); + } + # B - The srcdoc Attribute defines Html-Code that should be displayed in the frame + $srcdoc = $iframe->getAttribute("srcdoc"); + if ($srcdoc !== "") { + # The srcdoc should be a HTML String so we are gonna make a new HTML-Document Element + $htmlDoc = new HtmlDocument($this->password, $this->baseUrl, $srcdoc); + $htmlDoc->proxifyContent(); + $srcdoc = $htmlDoc->getResult(); + # Replace the Old HTML Code + $iframe->setAttribute("srcdoc", $srcdoc); + } + } + + # All Image Tags + foreach ($dom->getElementsByTagName('img') as $img) { + # Convert all Image src's to Absolute Links + $img->setAttribute("src", $this->convertRelativeToAbsoluteLink($img->getAttribute("src"))); + # Convert all Image Sources to proxified Versions + $img->setAttribute("src", $this->proxifyUrl($img->getAttribute("src"), false)); + # Some Images might contain a srcset (Different Images for different resolutions) + # Syntax would be i.e. srcset="medium.jpg 1000w, large.jpg 2000w" + $srcset = $img->getAttribute("srcset"); + if ($srcset !== "") { + $images = explode(",", $srcset); + foreach ($images as $index => $set) { + $set = trim($set); + $parts = preg_split("/\s+/si", $set); + # $parts[0] is the Image Path + # It could be relative so convert that one: + $parts[0] = $this->convertRelativeToAbsoluteLink($parts[0]); + + # And now Proxify it: + $parts[0] = $this->proxifyUrl($parts[0], false); + $images[$index] = implode(" ", $parts); + } + $srcset = implode(",", $images); + $img->setAttribute("srcset", $srcset); + } + } + + # Alle Meta Tags + foreach ($dom->getElementsByTagName('meta') as $meta) { + if ($meta->hasAttribute("href")) { + # Convert all relative Links to absolute Ones + $meta->setAttribute("href", $this->convertRelativeToAbsoluteLink($meta->getAttribute("href"))); + # Convert all Links to the proxified Version + # All of this Links should NOT target to the top Level + $meta->setAttribute("href", $this->proxifyUrl($meta->getAttribute("href"), false)); + } + if ($meta->hasAttribute("http-equiv") && $meta->getAttribute("http-equiv") === "refresh") { + # We should refresh the site with a meta tag + # But not before profifying the new URL + $content = $meta->getAttribute("content"); + $url = substr($content, stripos($content, "url=") + 4); + # Convert all relative Links to absolute Ones + $url = $this->convertRelativeToAbsoluteLink($url); + + # Convert all Links to the proxified Version + # All of this Links should NOT target to the top Level + $url = $this->proxifyUrl($url, false); + + $content = substr($content, 0, stripos($content, "url=") + 4) . $url; + + $meta->setAttribute("content", $content); + } + } + + # Alle Script Tags + foreach ($dom->getElementsByTagName('script') as $script) { + $script->nodeValue = ""; + $script->setAttribute("src", ""); + $script->setAttribute("type", ""); + } + + # Alle Style Blöcke + # Werden extra geparsed + foreach ($dom->getElementsByTagName('style') as $style) { + $styleString = $style->nodeValue; + $cssElement = new CssDocument($this->password, $this->baseUrl, $styleString); + $cssElement->proxifyContent(); + $style->nodeValue = $cssElement->getResult(); + } + + # Abschließend gehen wir noch einmal alle Tags durch + foreach ($dom->getElementsByTagName('*') as $el) { + if ($el->getAttribute("style") !== "") { + $styleString = $el->getAttribute("style"); + $cssElement = new CssDocument($this->password, $this->baseUrl, $styleString); + $cssElement->proxifyContent(); + $el->setAttribute("style", $cssElement->getResult()); + } + # We Will Remove all Javascript Event attributes + # To keep things simple we're gonna remove all Attributes which names start with "on" + foreach ($el->attributes as $attr) { + if (stripos($attr->name, "on") === 0) { + $el->removeAttribute($attr->name); + } + } + } + + $this->htmlString = $dom->saveHtml(); + + # Remove all now empty script Tags + $this->htmlString = preg_replace("/<\s*[\/]{0,1}\s*script[^>]*?>/si", "", $this->htmlString); + + libxml_use_internal_errors(false); + } + +/** + * This function changes the current Target Attribute on the link to given new target Attribute + */ + private function convertTargetAttribute($link, $newTarget) + { + $link->setAttribute("target", $newTarget); + } +} diff --git a/app/Http/Controllers/ProxyController.php b/app/Http/Controllers/ProxyController.php index d371e41..07616a4 100644 --- a/app/Http/Controllers/ProxyController.php +++ b/app/Http/Controllers/ProxyController.php @@ -2,8 +2,13 @@ namespace App\Http\Controllers; +use App\CssDocument; +use App\HtmlDocument; use Cache; +use finfo; use Illuminate\Http\Request; +use Symfony\Component\HttpFoundation\StreamedResponse; +use URL; class ProxyController extends Controller { @@ -11,6 +16,11 @@ class ProxyController extends Controller { $targetUrl = str_replace("<<SLASH>>", "/", $url); $targetUrl = str_rot13(base64_decode($targetUrl)); + + if (strpos($targetUrl, URL::to('/')) === 0) { + return redirect($targetUrl); + } + // Password already got checked by the middleware: $newPW = md5(env('PROXY_PASSWORD') . date('dmy')); @@ -100,6 +110,11 @@ class ProxyController extends Controller public function proxy(Request $request, $password, $url) { + + $supportedContentTypes = [ + 'text/html', + ]; + $targetUrl = str_replace("<<SLASH>>", "/", $url); $targetUrl = str_rot13(base64_decode($targetUrl)); $this->password = $password; @@ -111,60 +126,60 @@ class ProxyController extends Controller if (!Cache::has($hash) || 1 == 1) { // Inits the Curl connection for being able to preload multiple URLs while using a keep-alive connection $this->initCurl(); - if ($request->has("enableCookies")) { - $result = $this->getUrlContent($targetUrl, true); + $result = $this->getUrlContent($targetUrl, false); + + # $result can be null if the File Size exeeds the maximum cache size defined in .env + # In this case + if ($result === null) { + return $this->streamFile($targetUrl); } else { - $result = $this->getUrlContent($targetUrl, false); - } - # Für alle weiteren Aktonen auf der URL benötigen wir die URL-Parameter nicht mehr. Wir entfernen diese: - $targetUrl = preg_replace("/\?.*/si", "", $targetUrl); - if (isset($result["http_code"]) && $result["http_code"] !== 0) { $httpcode = $result["http_code"]; - } - if (!$request->has('enableJS')) { - $result["data"] = $this->removeJavaScript($result["data"]); - } - - extract(parse_url($targetUrl)); - $this->base = $scheme . "://" . $host; - if (isset($path)) { - $this->base .= $path; + extract(parse_url($targetUrl)); + $base = $scheme . "://" . $host; + + # We will parse whether we have a parser for this document type. + # If not, we will not Proxy it: + $contentType = strpos($result["header"]["content-type"], ";") !== false ? trim(substr($result["header"]["content-type"], 0, strpos($result["header"]["content-type"], ";"))) : trim($result["header"]["content-type"]); + switch ($contentType) { + case 'text/html': + # It's a html Document + $htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"]); + $htmlDocument->proxifyContent(); + $result["data"] = $htmlDocument->getResult(); + break; + case 'image/png': + case 'image/jpeg': + case 'image/gif': + case 'application/font-woff': + case 'application/x-font-woff': + case 'application/x-empty': + case 'font/woff2': + case 'image/svg+xml': + case 'application/octet-stream': + case 'text/plain': + case 'image/x-icon': + case 'font/eot': + case 'image/vnd.microsoft.icon': + # Nothing to do with Images: Just return them + break; + case 'text/css': + # Css Documents might contain references to External Documents that need to get Proxified + $cssDocument = new CssDocument($password, $targetUrl, $result["data"]); + $cssDocument->proxifyContent(); + $result["data"] = $cssDocument->getResult(); + break; + default: + # We have no Parser for this one. Let's respond: + abort(500, $contentType . " " . $targetUrl); + break; + } } - # Forms ohne action bekommen eine Action mit der aktuellen URL verpasst. - $result["data"] = preg_replace("/(<form\b(?:(?!action=).)*?)(>)/si", "$1 action=\"$targetUrl\"$2", $result["data"]); - if (isset($result["header"]["Content-Type"]) && stripos($result["header"]["Content-Type"], "text/html") !== false) { - $result["data"] = $this->convertTargetAttributes($result["data"]); - } - // First of all we will handle the URLs in the META refresh tag - #die(var_dump($result["data"])); - $result["data"] = preg_replace_callback("/(<meta\b[^>]*?url=)([^\s\"\']*)([^>]*?>)/si", "self::regRel2AbsMeta", $result["data"]); - // We define the "tag" that encloses possible URLS that are needed to be parsed - // Every tag is seperated by a "|" and needs to be regexp escaped - $tagsToMatch = "href=|src=|action=|background="; - // We have to match all Links enclosed within Quotes - $result["data"] = preg_replace_callback("/(<[^>]+)($tagsToMatch)\s*([\"\'])((?!\\\\3).*?)(\\3.*?>)/si", "self::regRel2AbsQuotes", $result["data"]); - // Ommitting Quotes is valid too so we match all Links matching this here - $result["data"] = preg_replace_callback("/(<[^>]+?)($tagsToMatch)([^\"\'\s][^\s\">]*?)(\s[^>]+?>|>)/si", "self::regRel2AbsNoQuotes", $result["data"]); - // srcsets can contain multiple URLs so we handle them here srcset= - $result["data"] = preg_replace_callback("/(<[^>]+)(srcset=)\s*([\"\'])((?!\\\\3).*?)(\\3.*?>)/s", "self::regRel2AbsSrcSet", $result["data"]); - - // You can define resources in your css files that will make the browser load that resources - // We need to Proxify them, too. - // Option one url(...) - $result["data"] = preg_replace_callback("/(url\(\s*[\"\']{0,1})([^\'\"\)]*?)([\"\']{0,1}\))/si", "self::regCssRel2Abs", $result["data"]); - - // Now we need replace all of the absolute Links - // We have to distinct whether the target of the Link is _blank|_top or not - - #$result["data"] = $this->parseProxyLink($result["data"], $password, $request); curl_close($this->ch); - # We are gonna cache all files for 60 Minutes to reduce # redundant file transfers: - $val = base64_encode(serialize($result)); Cache::put($hash, $val, 60); @@ -183,302 +198,151 @@ class ProxyController extends Controller ->withHeaders($result["header"]); } - private function convertTargetAttributes($data) - { - $result = $data; - - # The following html Elements can define a target attribute which will set the way in which Links are gonna be opened - # a, area, base, form (https://wiki.selfhtml.org/wiki/Referenz:HTML/Attribute/target) - # If the target is _blank we don't need to worry It'll be correct then - # We should make all other options Reference to _top because the link outside of the iframe needs to be changed then: - - # First change the ones that already have an target tag - $result = preg_replace("/(<\s*(?:a|area|base|form)[^>]+\starget\s*=\s*[\"\']{0,1}\s*)(?:(?!_blank|\s|\"|\').)+/si", "$1_top", $result); - # Now the ones that haven't got one - $result = preg_replace("/(<\s*(?:a|area|base|form)(?:(?!target=).)*?)([<>])/", "$1 target=_top $2", $result); - return $result; - } - private function removeJavaScript($html) - { - // We could use DOMDocument from PHP but - // that would suggest that the given HTML is well formed which - // we cannot guarantee so we will use regex - $result = $html; - // We will simply remove every single script tag and it's contents - $result = preg_replace("/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/i", "", $result); - # Remove all Javascript that is placed in a href - $result = preg_replace('/(href=)(["\'])\s*javascript:[^\\2]*?(\\2)/si', "$1$2$3", $result); - # Remove all HTMl Event Handler - $result = preg_replace_callback("/<[^>]*?\bon[^=]+?=([\"\']).*?\\1[^>]*?>/si", "self::removeJsAttributes", $result); - # Remove all autofocus attributes: - $result = preg_replace("/(<[^>]*?)autofocus[=\"\']+([^>]*>)/si", "$1$2", $result); - - // The rest of a potentional JavaScript code will be blocked by our IFrame. It would be a waste of resources to remove them - - return $result; - } - - private function removeJsAttributes($match) - { - # This funktion gets a HTML Tag in $match[0] with javascript HTML Attributes in it (onclick=, etc...) - # This funktion schouls remove these from the string and return the replacement - $string = $match[0]; - - $string = preg_replace("/\bon[^=]+?=\s*([\"\']).*?\\1/si", "", $string); - return $string; - } - - private function regRel2AbsMeta($match) - { - $top = false; - $pre = $match[1]; - $post = $match[3]; - $link = $match[2]; - - $link = $this->parseRelativeToAbsolute($link); - - // We will Proxify this URL - $link = $this->proxifyUrl($link, $this->password, $top); - return $pre . $link . $post; - } - - private function regRel2AbsNoQuotes($match) - { - $top = false; - if (preg_match("/target=[\"\']{0,1}\s*(_blank|_top)/si", $match[0]) === 1) { - $top = true; - } - - $pre = $match[1] . $match[2]; - $post = $match[4]; - $link = $match[3]; - - $link = $this->parseRelativeToAbsolute($link); - - // We will Proxify this URL - $link = $this->proxifyUrl($link, $this->password, $top); - return $pre . $link . $post; - } - - private function regRel2AbsQuotes($match) - { - $top = false; - if (preg_match("/target=[\"\']{0,1}\s*(_blank|_top)/si", $match[0]) === 1) { - $top = true; - } - $pre = $match[1] . $match[2] . $match[3]; - $post = $match[5]; - $link = htmlspecialchars_decode($match[4]); - - $link = $this->parseRelativeToAbsolute($link); - - // We will Proxify this URL - $link = $this->proxifyUrl($link, $this->password, $top); - return $pre . $link . $post; - } - - private function regCssRel2Abs($match) - { - $top = false; - $pre = $match[1]; - $post = $match[3]; - $link = htmlspecialchars_decode($match[2]); - - $link = $this->parseRelativeToAbsolute($link); - if (strpos($link, "data:") !== 0) { - # die($link); - } - // We will Proxify this URL - $link = $this->proxifyUrl($link, $this->password, $top); - - return $pre . $link . $post; - } - - private function regRel2AbsSrcSet($match) + private function initCurl() { - $top = false; - if (preg_match("/target=[\"\']{0,1}\s*(_blank|_top)/si", $match[0]) === 1) { - $top = true; - } - - $pre = $match[1] . $match[2] . $match[3]; - $post = $match[5]; - - $links = explode(",", $match[4]); - $result = $match[4]; - foreach ($links as $link) { - preg_match_all("/[\S]+/", $link, $matches); - if (isset($matches[0]) && isset($matches[0][0])) { - // In the srcset the Link would be the first match - $rel = $matches[0][0]; - $absLink = $this->parseRelativeToAbsolute($rel, "", $rel, ""); - - // We will Proxify this URL - $absLink = $this->proxifyUrl($absLink, $this->password, $top); - - $result = str_replace($rel, $absLink, $result); - } - } - return $pre . $result . $post; + $this->ch = curl_init(); + curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0'); + curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 0); + curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, 0); + curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($this->ch, CURLOPT_TIMEOUT, 30); + curl_setopt($this->ch, CURLOPT_HEADER, 1); } - private function parseRelativeToAbsolute($link) + private function streamFile($url) { - // When the Link is already absolute then do not convert anything - if (preg_match("/(?:(?!:\/\/).)+?:\/\//si", $link) === 1) { - return $link; - // If the Link starts with "//" that means it already is an absolute Link - // But it needs to get the current protocol added - } elseif (preg_match("/^\s*?\/\//si", $link) === 1) { - $scheme = parse_url($this->base)["scheme"] . "://"; - $abs = preg_replace("/^\s*?\/\//si", "$scheme", $link); - return $abs; - // The Link that is following here is not absolute. But it can be invalid: - } else { - $absLink = $this->rel2abs($link, $this->base); - return $absLink; - } - - # Convert every Link that starts with [ . | / ] but not with [ // ] + $headers = get_headers($url, 1); - while (preg_match("/(href=|src=|url\(|action=|srcset=|@import |background=)(\s*[\"\']{0,1}\s*)((:?\.|\/[^\/])[^\"\'\s]+)([\"\'\s])/si", $result, $matches) === 1) { - $absoluteLink = $this->rel2abs($matches[3], $base); - $result = str_replace($matches[0], $matches[1] . $matches[2] . $absoluteLink . $matches[5], $result, $count); - } - # Convert every Link that starts with a path and not with a slash - preg_match_all("/(href=|src=|url\(|action=|srcset=|@import |background=)\s*([\"\']{0,1})\s*([\w]+?[^\"\'\s>]+)/si", $result, $matches); - foreach ($matches[0] as $index => $value) { - $absoluteLink = $this->rel2abs($matches[3][$index], $base); - $result = str_replace($value, $matches[1][$index] . $matches[2][$index] . $absoluteLink, $result); - } + $filename = basename($url); - $scheme = parse_url($base)["scheme"] . "://"; + # From the headers we need to remove the first Element since it's the status code: + $status = $headers[0]; + $status = intval(preg_split("/\s+/si", $status)[1]); + array_forget($headers, 0); - while (preg_match("/(href=|src=|url\(|action=|srcset=|@import |background=)([\"\']{0,1})\/{2}/si", $result, $matches) === 1) { - $result = str_replace($matches[0], $matches[1] . $matches[2] . $scheme, $result, $count); + # Add the Filename if it's not set: + if (!isset($headers["Content-Disposition"])) { + $headers["Content-Disposition"] = "inline; filename=\"" . $filename . "\""; } - # Form tags that do not define a action will automatically target the same site we are on. - # To make it link to the correct page in the end we need to add an specific action: - $result = preg_replace("/(<\s*form(?:(?!action=).)+?)(>)/si", "$1 action=\"$base\"$2", $result); + $response = new StreamedResponse(function () use ($url) { + # We are gonna stream a large file + $wh = fopen('php://output', 'r+'); - return $result; - } + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_BUFFERSIZE, 256); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FILE, $wh); // Data will be sent to our stream ;-) - private function parseProxyLink($data, $password, $request) - { - $result = $data; - $count = 1; - # Zunächst ersetzen wir alle externen Links - preg_match_all("/<\s*(?:a|area|form)[^>]+?target=[\"\']{0,1}\s*(?:_top|_blank)\s*[^>]*?>/si", $result, $matches); - foreach ($matches[0] as $tag) { - $tmp = preg_replace_callback("/((?:href|action)=[\"\']{0,1})([^\"\'\s>]+)/si", "self::pregProxifyUrlTop", $tag); - $result = str_replace($tag, $tmp, $result, $count); - } - # Jetzt alle internen Links mit einem anderen target: - preg_match_all("/<\s*(?:a|area|form)[^>]+?target=[\"\']{0,1}\s*(?:(?!_top|_blank|>).)+?>/si", $result, $matches); - foreach ($matches[0] as $tag) { - $tmp = preg_replace_callback("/((?:href|action)=[\"\']{0,1})([^\"\'\s>]+)/si", "self::pregProxifyUrl", $tag); - $result = str_replace($tag, $tmp, $result, $count); - } - $result = preg_replace_callback("/((?:href=|src=|action=|url\(|srcset=|@import |background=)\s*[\"\']{0,1}\s*)([^\"\'\s\)>]+)/si", "self::pregProxifyUrl", $result); + curl_exec($ch); - return $result; - } - - private function pregProxifyUrl($matches) - { - - $current = \Request::root(); - $iframeUrl = $matches[2]; + curl_close($ch); - if (strpos(strtolower($iframeUrl), "http") === 0 && strpos(strtolower($iframeUrl), $current) !== 0) { - $iframeUrl = $this->proxifyUrl($matches[2], null, false); - } - - return $matches[1] . $iframeUrl; - } - private function pregProxifyUrlTop($matches) - { - $current = \Request::root(); - $iframeUrl = $matches[2]; - if (strpos($iframeUrl, "http") === 0 && strpos($iframeUrl, $current) !== 0) { - $iframeUrl = $this->proxifyUrl($matches[2], null, true); - } - return $matches[1] . $iframeUrl; - } - - private function initCurl() - { - $this->ch = curl_init(); - curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)'); - curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 0); - curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, 0); - curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($this->ch, CURLOPT_TIMEOUT, 30); - curl_setopt($this->ch, CURLOPT_HEADER, 1); + // Don't forget to close the "file" / stream + fclose($wh); + }, 200, $headers); + $response->send(); + return $response; } private function getUrlContent($url, $withCookies) { $url = htmlspecialchars_decode($url); curl_setopt($this->ch, CURLOPT_URL, "$url"); + curl_setopt($this->ch, CURLOPT_NOPROGRESS, false); + curl_setopt($this->ch, CURLOPT_PROGRESSFUNCTION, 'self::downloadProgress'); + + $data = curl_exec($this->ch); + + # If the requested File is too big for this Process to cache then we are gonna handle this File download later + # in another way. + if (curl_errno($this->ch) === CURLE_ABORTED_BY_CALLBACK) { + # In this case the download was aborted because of the FileSize + # We have no headers or anything like that + # so we will return null and handle this case in the calling function + return null; + } else { - $data = curl_exec($this->ch); - $httpcode = intval(curl_getinfo($this->ch, CURLINFO_HTTP_CODE)); - - $header_size = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $header_size); - - $data = substr($data, $header_size); - $headerArray = []; - foreach (explode(PHP_EOL, $header) as $index => $value) { - if ($index > 0) { - $ar = explode(': ', $value); - if (sizeof($ar) === 2) { - if ($withCookies && (strtolower($ar[0]) === "content-type" || strtolower($ar[0]) === "set-cookie")) { - $headerArray[trim($ar[0])] = trim($ar[1]); - } elseif (!$withCookies && strtolower($ar[0]) === "content-type") { - $headerArray[trim($ar[0])] = trim($ar[1]); - } elseif (strtolower($ar[0]) === "location") { - $headerArray[trim($ar[0])] = $this->proxifyUrl(trim($ar[1]), null, false); - } else { - #$headerArray[trim($ar[0])] = trim($ar[1]); + $httpcode = intval(curl_getinfo($this->ch, CURLINFO_HTTP_CODE)); + + $header_size = curl_getinfo($this->ch, CURLINFO_HEADER_SIZE); + $header = substr($data, 0, $header_size); + + $data = substr($data, $header_size); + $headerArray = []; + foreach (explode(PHP_EOL, $header) as $index => $value) { + if ($index > 0) { + $ar = explode(': ', $value); + if (sizeof($ar) === 2) { + if ($withCookies && (strtolower($ar[0]) === "content-type" || strtolower($ar[0]) === "set-cookie")) { + $headerArray[trim($ar[0])] = trim($ar[1]); + } elseif (!$withCookies && strtolower($ar[0]) === "content-type") { + $headerArray[strtolower(trim($ar[0]))] = strtolower(trim($ar[1])); + } elseif (strtolower($ar[0]) === "location") { + $headerArray[trim($ar[0])] = $this->proxifyUrl(trim($ar[1]), null, false); + } else { + #$headerArray[trim($ar[0])] = trim($ar[1]); + } } } } - } - $headerArray["Content-Security-Policy"] = "default-src 'self' data: 'unsafe-inline' http://localhost"; - # Charset-Fix for people who forget to declare charset: - # If this won't work the default charset UTF-8 is set by laravel: - foreach ($headerArray as $key => $value) { - if (strtolower($key) === "content-type" && strpos(strtolower($value), "charset") === false) { - # We will see if there is a content-type with charset declared in the document: - if (preg_match("/<\s*meta[^>]+http-equiv=[\'\"]\s*content-type\s*[\'\"][^>]+?>/si", $data, $match)) { - if (strpos($match[0], "charset") !== false && preg_match("/content=[\'\"]([^\'\"]+)/si", $match[0], $contentType)) { - $headerArray[$key] = $contentType[1]; - break; + + # It might happen that a server doesn't give Information about file Type. + # Let's try to generate one in this case + if (!isset($headerArray["content-type"])) { + $finfo = new finfo(FILEINFO_MIME); + $headerArray["content-type"] = $finfo->buffer($data); + } + + $headerArray["Content-Security-Policy"] = "default-src 'self' data: 'unsafe-inline' http://localhost"; + # Charset-Fix for people who forget to declare charset: + # If this won't work the default charset UTF-8 is set by laravel: + foreach ($headerArray as $key => $value) { + if (strtolower($key) === "content-type" && strpos(strtolower($value), "charset") === false) { + # We will see if there is a content-type with charset declared in the document: + if (preg_match("/<\s*meta[^>]+http-equiv=[\'\"]\s*content-type\s*[\'\"][^>]+?>/si", $data, $match)) { + if (strpos($match[0], "charset") !== false && preg_match("/content=[\'\"]([^\'\"]+)/si", $match[0], $contentType)) { + $headerArray[$key] = $contentType[1]; + break; + } else { + break; + } } else { break; } - } else { - break; + } } + if (!isset($httpcode) || !$httpcode || $httpcode === 0) { + $httpcode = 200; + } + + return ['header' => $headerArray, 'data' => $data, 'http_code' => $httpcode]; } + } - return ['header' => $headerArray, 'data' => $data, 'http_code' => $httpcode]; + private function downloadProgress($resource, $download_size, $downloaded, $upload_size, $uploaded) + { + # The Memory Cache: + # Every file that our Proxy parses has to lie in the memory Cache of PHP + # If you would download a 5GB File then our PHP Process would need 5GB min RAM + # We are gonna handle Files bigger then our defined maximum Cache Size in another way and break the conection at this point. + if ($download_size > intval(env('PROXY_MEMORY_CACHE')) || $downloaded > intval(env('PROXY_MEMORY_CACHE'))) { + return 1; + } } public function proxifyUrl($url, $password = null, $topLevel) { // Only convert valid URLs $url = trim($url); - if (strpos($url, "http") !== 0) { + if (strpos($url, "http") !== 0 || strpos($url, URL::to('/')) === 0) { return $url; } @@ -514,66 +378,4 @@ class ProxyController extends Controller return $iframeUrl; } - - private function rel2abs($rel, $base) - { - - /* return if already absolute URL */ - if (parse_url($rel, PHP_URL_SCHEME) != '' || strlen(trim($rel)) <= 0) { - return ($rel); - } - - /* queries and anchors */ - if ($rel[0] == '#' || $rel[0] == '?') { - return ($base . $rel); - } - - /* parse base URL and convert to local variables: - $scheme, $host, $path */ - extract(parse_url($base)); - - /* remove non-directory element from path */ - if (isset($path)) { - $path = preg_replace('#/[^/]*$#', '', $path); - } - - /* destroy path if relative url points to root */ - if ($rel[0] == '/') { - $path = ''; - } - - /* dirty absolute URL */ - $abs = ''; - - /* do we have a user in our URL? */ - if (isset($user)) { - $abs .= $user; - - /* password too? */ - if (isset($pass)) { - $abs .= ':' . $pass; - } - - $abs .= '@'; - } - - $abs .= $host; - /* did somebody sneak in a port? */ - if (isset($port)) { - $abs .= ':' . $port; - } - - if (isset($path)) { - $abs .= $path; - } - if (isset($rel)) { - $abs .= "/" . ltrim($rel, "/"); - } - /* replace '//' or '/./' or '/foo/../' with '/' */ - $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); - for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {} - - /* absolute URL is ready! */ - return ($scheme . '://' . $abs); - } } diff --git a/resources/views/ProxyPage.blade.php b/resources/views/ProxyPage.blade.php index 849cdfa..0381843 100644 --- a/resources/views/ProxyPage.blade.php +++ b/resources/views/ProxyPage.blade.php @@ -1,5 +1,6 @@ <html> <head> + <meta charset="utf-8" /> <link href="/css/bootstrap.min.css" rel="stylesheet" type="text/css" /> <link href="/css/style.css" rel="stylesheet" type="text/css" /> </head> @@ -18,16 +19,6 @@ </div> <div id="proxy-options" class="col-xs-2"> <ul class="list-unstyled"> - @if(!$scriptsEnabled) - <li><a href="{{$scriptUrl}}" class="btn btn-warning btn-xs">Skripte blockiert</a></li> - @else - <li><a href="{{$scriptUrl}}" class="btn btn-info btn-xs">Skripte zugelassen</a></li> - @endif - @if(!$cookiesEnabled) - <li><a href="{{$cookieUrl}}" class="btn btn-warning btn-xs">Cookies gesperrt</a></li> - @else - <li><a href="{{$cookieUrl}}" class="btn btn-info btn-xs">Cookies zugelassen</a></li> - @endif <li><a href="{!!$targetUrl!!}" class="btn btn-danger btn-xs">Proxy ausschalten</a></li> </ul> </div> -- GitLab