Skip to content
Snippets Groups Projects
Commit 6bc9e813 authored by Davide Aprea's avatar Davide Aprea
Browse files

fix various bugs

parent 64dac664
No related branches found
No related tags found
1 merge request!19Resolve "use multi curl to improve performance"
...@@ -155,13 +155,8 @@ class RequestFetcher extends Command ...@@ -155,13 +155,8 @@ class RequestFetcher extends Command
Log::error($error); Log::error($error);
} }
if ($responseCode !== 200) { $body = \curl_multi_getcontent($info["handle"]);
Log::debug($resulthash);
Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
} else {
$body = \curl_multi_getcontent($info["handle"]);
}
Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) { Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
$pipe->lpush($resulthash, $body); $pipe->lpush($resulthash, $body);
$pipe->expire($resulthash, 60); $pipe->expire($resulthash, 60);
...@@ -174,6 +169,8 @@ class RequestFetcher extends Command ...@@ -174,6 +169,8 @@ class RequestFetcher extends Command
Log::error($e->getMessage()); Log::error($e->getMessage());
} }
} }
} catch (\Exception $e) {
Log::error($e->getMessage());
} finally { } finally {
\curl_multi_remove_handle($mc, $info["handle"]); \curl_multi_remove_handle($mc, $info["handle"]);
} }
......
...@@ -147,12 +147,6 @@ class ProxyController extends Controller ...@@ -147,12 +147,6 @@ class ProxyController extends Controller
$result = []; $result = [];
$httpcode = 200; $httpcode = 200;
# We are gonna cache all files for 60 Minutes to reduce
# redundant file transfers:
$val = base64_encode(serialize($result));
Cache::put($hash, $val, 60);
if (!Cache::has($hash) || env("CACHE_ENABLED") === false) { if (!Cache::has($hash) || env("CACHE_ENABLED") === false) {
$useragent = $_SERVER['HTTP_USER_AGENT']; $useragent = $_SERVER['HTTP_USER_AGENT'];
...@@ -173,94 +167,86 @@ class ProxyController extends Controller ...@@ -173,94 +167,86 @@ class ProxyController extends Controller
$mission = json_encode($mission); $mission = json_encode($mission);
Redis::rpush(RequestFetcher::FETCHQUEUE_KEY, $mission); Redis::rpush(RequestFetcher::FETCHQUEUE_KEY, $mission);
$answer = Redis::brpoplpush($hash, $hash, 1); $answer = Redis::brpoplpush($hash, $hash, 10);
$result = HttpParser::parse($answer);
} else { } else {
$result = Cache::get($hash); $answer = Cache::get($hash);
} }
$result = HttpParser::parse($answer);
if ($result === null) { if ($result === null) {
return $this->streamFile($targetUrl); return $this->streamFile($targetUrl);
} else { } else {
$httpcode = $result["header"]["http_code"]; $httpcode = $result["header"]["http_code"];
extract(parse_url($targetUrl));
extract(parse_url($targetUrl)); $base = $scheme . "://" . $host;
# We will parse whether we have a parser for this document type.
$base = $scheme . "://" . $host; # If not, we will not Proxy it:
$contentTypeHeader = $result["header"]["content-type"];
# We will parse whether we have a parser for this document type. $contentType = strpos($result["header"]["content-type"], ";") !== false ? trim(substr($result["header"]["content-type"], 0, strpos($result["header"]["content-type"], ";"))) : trim($result["header"]["content-type"]);
# If not, we will not Proxy it: $contentEncoding = stripos($contentTypeHeader, "charset=") !== false ? trim(substr($contentTypeHeader, stripos($contentTypeHeader, "charset=") + 8)) : null;
$contentTypeHeader = $result["header"]["content-type"]; $contentEncoding = rtrim($contentEncoding, ";");
$contentType = strpos($result["header"]["content-type"], ";") !== false ? trim(substr($result["header"]["content-type"], 0, strpos($result["header"]["content-type"], ";"))) : trim($result["header"]["content-type"]); if (isset($result["header"]["content-disposition"])) {
$contentEncoding = stripos($contentTypeHeader, "charset=") !== false ? trim(substr($contentTypeHeader, stripos($contentTypeHeader, "charset=") + 8)) : null; if (stripos($result["header"]["content-disposition"], "filename=") === false) {
$contentEncoding = rtrim($contentEncoding, ";"); $basename = basename(parse_url($targetUrl, PHP_URL_PATH));
$newHeader = $result["header"]["content-disposition"];
if (isset($result["header"]["content-disposition"])) { $newHeader = trim($newHeader);
if (stripos($result["header"]["content-disposition"], "filename=") === false) { $newHeader = rtrim($newHeader, ";");
$basename = basename(parse_url($targetUrl, PHP_URL_PATH)); $newHeader .= "; filename=" . $basename;
$newHeader = $result["header"]["content-disposition"]; $result["header"]["content-disposition"] = $newHeader;
$newHeader = trim($newHeader);
$newHeader = rtrim($newHeader, ";");
$newHeader .= "; filename=" . $basename;
$result["header"]["content-disposition"] = $newHeader;
}
} }
}
switch ($contentType) { switch ($contentType) {
case 'text/html': case 'text/html':
# It's an html document # It's an html document
$htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"], $contentEncoding); $htmlDocument = new HtmlDocument($password, $targetUrl, $result["data"], $contentEncoding);
$htmlDocument->proxifyContent(); $htmlDocument->proxifyContent();
$result["data"] = $htmlDocument->getResult(); $result["data"] = $htmlDocument->getResult();
break; break;
case 'application/pdf': case 'application/pdf':
if (!isset($result["header"]["content-disposition"])) { if (!isset($result["header"]["content-disposition"])) {
$name = "document.pdf"; $name = "document.pdf";
$basename = basename($targetUrl); $basename = basename($targetUrl);
if (stripos($basename, ".pdf") !== false) { if (stripos($basename, ".pdf") !== false) {
$name = $basename; $name = $basename;
}
$result["header"]["content-disposition"] = "attachment; filename=$name";
} }
// no break $result["header"]["content-disposition"] = "attachment; filename=$name";
case 'image/png': }
case 'image/jpeg': // no break
case 'image/gif': case 'image/png':
case 'application/font-woff': case 'image/jpeg':
case 'application/x-font-woff': case 'image/gif':
case 'application/x-empty': case 'application/font-woff':
case 'font/woff2': case 'application/x-font-woff':
case 'image/svg+xml': case 'application/x-empty':
case 'application/octet-stream': case 'font/woff2':
case 'text/plain': case 'image/svg+xml':
case 'image/x-icon': case 'application/octet-stream':
case 'font/eot': case 'text/plain':
case 'image/vnd.microsoft.icon': case 'image/x-icon':
case 'application/vnd.ms-fontobject': case 'font/eot':
case 'application/x-font-ttf': case 'image/vnd.microsoft.icon':
case 'application/x-www-form-urlencoded': case 'application/vnd.ms-fontobject':
case 'application/zip': case 'application/x-font-ttf':
case 'binary/octet-stream': case 'application/x-www-form-urlencoded':
# Nothing to do with Images: Just return them case 'application/zip':
break; case 'binary/octet-stream':
case 'text/css': # Nothing to do with Images: Just return them
# Css Documents might contain references to External Documents that need to get Proxified break;
$cssDocument = new CssDocument($password, $targetUrl, $result["data"]); case 'text/css':
$cssDocument->proxifyContent(); # Css Documents might contain references to External Documents that need to get Proxified
$result["data"] = $cssDocument->getResult(); $cssDocument = new CssDocument($password, $targetUrl, $result["data"]);
break; $cssDocument->proxifyContent();
default: $result["data"] = $cssDocument->getResult();
# We have no Parser for this one. Let's respond: break;
abort(500, $contentType . " " . $targetUrl); default:
break; # We have no Parser for this one. Let's respond:
} abort(500, $contentType . " " . $targetUrl);
break;
} }
}
if ($result["data"] === false) { if ($result["data"] === false) {
$result["data"] = ""; $result["data"] = "";
} }
return response($result["data"], $httpcode) return response($result["data"], $httpcode)
->withHeaders($result["header"]); ->withHeaders($result["header"]);
} }
......
...@@ -16,13 +16,34 @@ class HttpParser ...@@ -16,13 +16,34 @@ class HttpParser
$header[$tmp[0]] = $tmp[1]; $header[$tmp[0]] = $tmp[1];
} }
} }
if(!isset($header['content-type'])){
$header['content-type'] = 'application/octet-stream';
}
return $header; return $header;
} }
public static function parse($htmldoc){ public static function parse($htmldoc){
$parts = explode("\r\n\r\n", $htmldoc); try{
$out['header'] = self::parseHeader($parts[count($parts)-2]); $parts = explode("\r\n\r\n", $htmldoc);
$out['data'] = $parts[count($parts)-1]; $regex = "/^HTTP/sm";
return $out; $i = 0;
while($i < count($parts)){
if(!preg_match($regex ,$parts[$i])){
break;
}
$i++;
}
$out['header'] = self::parseHeader($parts[$i-1]);
$out['data'] = implode(array_slice($parts,$i));
unset($out['header']['content-encoding']);
unset($out['header']['content-length']);
unset($out['header']['x-frame-options']);
return $out;
} catch(\Exception $e) {
$out['header']['http_code'] = 200;
$out['header']['content-type'] = "text/html";
$out['data'] = 'no-result';
return $out;
}
} }
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment