Commit b9850bfe authored by Davide Aprea's avatar Davide Aprea
Browse files

Merge branch 'development' into...

Merge branch 'development' into '1020-show-information-about-every-search-engine-that-found-a-result'

# Conflicts:
#   .gitignore
parents 1dca30ac dad27b81
......@@ -23,4 +23,5 @@ npm-debug.log
composer.lock
package-lock.json
local.log
browserstack.err
......@@ -25,7 +25,7 @@ ingress:
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/configuration-snippet: |
more_set_headers "Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; script-src-elem 'self' 'unsafe-inline'; script-src-attr 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; style-src-elem 'self' 'unsafe-inline'; style-src-attr 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self'; media-src; object-src; prefetch-src; child-src; frame-src 'self'; worker-src; frame-ancestors 'self' https://scripts.zdv.uni-mainz.de; form-action 'self'; base-uri; manifest-src; plugin-types; report-uri; report-to";
more_set_headers "Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; script-src-elem 'self' 'unsafe-inline'; script-src-attr 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; style-src-elem 'self' 'unsafe-inline'; style-src-attr 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self'; media-src; object-src; prefetch-src; child-src; frame-src 'self'; worker-src; frame-ancestors 'self' https://scripts.zdv.uni-mainz.de; form-action 'self' www.paypal.com; base-uri; manifest-src; plugin-types; report-uri; report-to";
more_set_headers "X-Frame-Options: sameorigin";
more_set_headers "X-Content-Type-Options: nosniff";
more_set_headers "ReferrerPolicy: origin";
......
......@@ -25,7 +25,7 @@ ingress:
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/configuration-snippet: |
more_set_headers "Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; script-src-elem 'self' 'unsafe-inline'; script-src-attr 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; style-src-elem 'self' 'unsafe-inline'; style-src-attr 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self'; media-src; object-src; prefetch-src; child-src; frame-src 'self'; worker-src; frame-ancestors 'self' https://scripts.zdv.uni-mainz.de; form-action 'self'; base-uri; manifest-src; plugin-types; report-uri; report-to";
more_set_headers "Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; script-src-elem 'self' 'unsafe-inline'; script-src-attr 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; style-src-elem 'self' 'unsafe-inline'; style-src-attr 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self'; media-src; object-src; prefetch-src; child-src; frame-src 'self'; worker-src; frame-ancestors 'self' https://scripts.zdv.uni-mainz.de; form-action 'self' www.paypal.com; base-uri; manifest-src; plugin-types; report-uri; report-to";
more_set_headers "X-Frame-Options: sameorigin";
more_set_headers "X-Content-Type-Options: nosniff";
more_set_headers "ReferrerPolicy: origin";
......
......@@ -25,10 +25,9 @@ class RequestFetcher extends Command
protected $shouldRun = true;
protected $multicurl = null;
protected $oldMultiCurl = null;
protected $maxFetchedDocuments = 100000;
protected $fetchedDocuments = 0;
protected $proxyhost, $proxyuser, $proxypassword;
protected $proxyhost;
protected $proxyuser;
protected $proxypassword;
/**
* Create a new command instance.
......@@ -43,7 +42,6 @@ class RequestFetcher extends Command
$this->proxyport = env("PROXY_PORT", "");
$this->proxyuser = env("PROXY_USER", "");
$this->proxypassword = env("PROXY_PASSWORD", "");
}
/**
......@@ -53,7 +51,6 @@ class RequestFetcher extends Command
*/
public function handle()
{
$pidFile = "/tmp/fetcher";
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
......@@ -80,61 +77,16 @@ class RequestFetcher extends Command
}
try {
$blocking = false;
while ($this->shouldRun) {
$status = curl_multi_exec($this->multicurl, $active);
$currentJobs = [];
if (!$blocking) {
$elements = Redis::pipeline(function ($redis) {
$redis->lrange(\App\MetaGer::FETCHQUEUE_KEY, 0, -1);
$redis->del(\App\MetaGer::FETCHQUEUE_KEY);
});
$currentJobs = $elements[0];
} else {
$currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
if (!empty($currentJob)) {
$currentJobs[] = $currentJob[1];
}
}
if (sizeof($currentJobs) > 0) {
foreach ($currentJobs as $currentJob) {
$currentJob = json_decode($currentJob, true);
$ch = $this->getCurlHandle($currentJob);
if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
$this->shouldRun = false;
Log::error("Couldn't add Handle to multicurl");
break;
}
$this->fetchedDocuments++;
if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
$this->oldMultiCurl = $this->multicurl;
$this->multicurl = curl_multi_init();
$this->fetchedDocuments = 0;
}
$blocking = false;
$active = true;
}
}
$answerRead = $this->readMultiCurl($this->multicurl);
if ($this->oldMultiCurl != null) {
$this->readMultiCurl($this->oldMultiCurl);
$messagesLeft = -1;
if (curl_multi_info_read($this->oldMultiCurl, $messagesLeft) === false) {
if ($messagesLeft = 0) {
Log::debug("Removing finished multicurl handle");
curl_multi_close($this->oldMultiCurl);
$this->oldMultiCurl = null;
}
}
}
if (!$active && !$answerRead) {
$blocking = true;
} else {
usleep(50 * 1000);
$operationsRunning = true;
curl_multi_exec($this->multicurl, $operationsRunning);
$status = $this->readMultiCurl($this->multicurl);
$answersRead = $status[0];
$messagesLeft = $status[1];
$newJobs = $this->checkNewJobs($operationsRunning, $messagesLeft);
if ($newJobs === 0 && $answersRead === 0) {
usleep(10 * 1000);
}
}
} finally {
......@@ -143,21 +95,59 @@ class RequestFetcher extends Command
}
}
/**
* Checks the Redis queue if any new fetch jobs where submitted
* and adds them to multicurl if there are.
* Will be blocking call to redis if there are no running jobs in multicurl
*/
private function checkNewJobs($operationsRunning, $messagesLeft)
{
$newJobs = [];
if ($operationsRunning === 0 && $messagesLeft === -1) {
$newJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
if (!empty($newJob)) {
$newJobs[] = $newJob[1];
}
} else {
$elements = Redis::pipeline(function ($redis) {
$redis->lrange(\App\MetaGer::FETCHQUEUE_KEY, 0, -1);
$redis->del(\App\MetaGer::FETCHQUEUE_KEY);
});
$newJobs = $elements[0];
}
$addedJobs = 0;
foreach ($newJobs as $newJob) {
$newJob = json_decode($newJob, true);
$ch = $this->getCurlHandle($newJob);
if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
$this->shouldRun = false;
Log::error("Couldn't add Handle to multicurl");
break;
} else {
$addedJobs++;
}
}
return $addedJobs;
}
private function readMultiCurl($mc)
{
$answerRead = false;
while (($info = curl_multi_info_read($mc)) !== false) {
$messagesLeft = -1;
$answersRead = 0;
while (($info = curl_multi_info_read($mc, $messagesLeft)) !== false) {
try {
$answerRead = true;
$answersRead++;
$infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
$infos = explode(";", $infos);
$resulthash = $infos[0];
$cacheDurationMinutes = intval($infos[1]);
$name = $infos[2];
$responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
$body = "";
$body = "no-result";
$totalTime = curl_getinfo($info["handle"], CURLINFO_TOTAL_TIME);
$totalTime = curl_getinfo($info["handle"], CURLINFO_TOTAL_TIME);
\App\PrometheusExporter::Duration($totalTime, $name);
$error = curl_error($info["handle"]);
......@@ -166,13 +156,14 @@ class RequestFetcher extends Command
}
if ($responseCode !== 200) {
Log::debug($resulthash);
Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
} else {
$body = \curl_multi_getcontent($info["handle"]);
}
Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
$pipe->set($resulthash, $body);
$pipe->lpush($resulthash, $body);
$pipe->expire($resulthash, 60);
});
......@@ -187,7 +178,7 @@ class RequestFetcher extends Command
\curl_multi_remove_handle($mc, $info["handle"]);
}
}
return $answerRead;
return [$answersRead, $messagesLeft];
}
private function getCurlHandle($job)
......@@ -234,7 +225,6 @@ class RequestFetcher extends Command
public function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Process\n");
echo("Terminating Process\n");
}
}
......@@ -44,6 +44,9 @@ class AdminSpamController extends Controller
public function jsonQueries()
{
$queries = $this->getQueries();
# JSON encoding will fail if invalid UTF-8 Characters are in this string
# mb_convert_encoding will remove thise invalid characters for us
$queries = mb_convert_encoding($queries, "UTF-8", "UTF-8");
return response()->json($queries);
}
......@@ -77,8 +80,10 @@ class AdminSpamController extends Controller
];
}
# JSON encoding will fail if invalid UTF-8 Characters are in this string
# mb_convert_encoding will remove thise invalid characters for us
$resultData = mb_convert_encoding($resultData, "UTF-8", "UTF-8");
return response()->json($resultData);
}
private function getQueries()
......
......@@ -12,9 +12,11 @@ use View;
class MetaGerSearch extends Controller
{
public function search(Request $request, MetaGer $metager, $timing = false)
{
if ($request->filled("chrome-plugin")) {
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), "/plugin"));
}
$timings = null;
if ($timing) {
$timings = ['starttime' => microtime(true)];
......@@ -74,12 +76,23 @@ class MetaGerSearch extends Controller
return response($responseContent);
}
$quicktips = $metager->createQuicktips();
if (!empty($timings)) {
$timings["createQuicktips"] = microtime(true) - $time;
}
# Suche für alle zu verwendenden Suchmaschinen als Job erstellen,
# auf Ergebnisse warten und die Ergebnisse laden
$metager->createSearchEngines($request, $timings);
$metager->startSearch($timings);
# Versuchen die Ergebnisse der Quicktips zu laden
$quicktipResults = $quicktips->loadResults();
if (!empty($timings)) {
$timings["Loaded Quicktip Results"] = microtime(true) - $time;
}
$metager->waitForMainResults();
if (!empty($timings)) {
$timings["waitForMainResults"] = microtime(true) - $time;
......@@ -126,7 +139,7 @@ class MetaGerSearch extends Controller
}
# Die Ausgabe erstellen:
$resultpage = $metager->createView();
$resultpage = $metager->createView($quicktipResults);
if ($spamEntry !== null) {
try {
Cache::put('spam.' . $metager->getFokus() . "." . md5($spamEntry), $resultpage->render(), 604800);
......@@ -153,7 +166,7 @@ class MetaGerSearch extends Controller
// This might speed up page view time for users with slow network
$responseArray = str_split($resultpage->render(), 1024);
foreach ($responseArray as $responsePart) {
echo ($responsePart);
echo($responsePart);
flush();
}
$requestTime = microtime(true) - $time;
......@@ -179,7 +192,6 @@ class MetaGerSearch extends Controller
if ($request->filled('loadMore') && $request->filled('script') && $request->input('script') === "yes") {
return $this->loadMoreJS($request);
}
}
private function loadMoreJS(Request $request)
......@@ -293,6 +305,10 @@ class MetaGerSearch extends Controller
],
"engines" => $metager->getEngines(),
], 1 * 60);
# JSON encoding will fail if invalid UTF-8 Characters are in this string
# mb_convert_encoding will remove thise invalid characters for us
$result = mb_convert_encoding($result, "UTF-8", "UTF-8");
return response()->json($result);
}
......
......@@ -56,11 +56,22 @@ class SettingsController extends Controller
$cookies = Cookie::get();
$settingActive = false;
foreach ($cookies as $key => $value) {
if (\starts_with($key, [$fokus . "_engine_", $fokus . "_setting_"])) {
if (\starts_with($key, [$fokus . "_engine_", $fokus . "_setting_"]) || strpos($key, $fokus . '_blpage') === 0) {
$settingActive = true;
}
}
# Reading cookies for black list entries
$blacklist = [];
foreach($cookies as $key => $value){
if(stripos($key, 'blpage') !== false && stripos($key, $fokus) !== false){
$blacklist[$key] = $value;
}
}
# Generating link with set cookies
$cookieLink = LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), route('loadSettings', $cookies));
return view('settings.index')
->with('title', trans('titles.settings', ['fokus' => $fokusName]))
->with('fokus', $request->input('fokus', ''))
......@@ -69,7 +80,9 @@ class SettingsController extends Controller
->with('sumas', $sumas)
->with('filter', $filters)
->with('settingActive', $settingActive)
->with('url', $url);
->with('url', $url)
->with('blacklist', $blacklist)
->with('cookieLink', $cookieLink);
}
private function getSumas($fokus)
......@@ -232,6 +245,7 @@ class SettingsController extends Controller
Cookie::queue($key, "", 0, $cookiePath, null, false, false);
}
}
$this->clearBlacklist($request);
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), route('settings', ["fokus" => $fokus, "url" => $url])));
}
......@@ -268,4 +282,116 @@ class SettingsController extends Controller
}
return redirect($request->input('url', 'https://metager.de'));
}
public function newBlacklist(Request $request)
{
$fokus = $request->input('fokus', '');
$url = $request->input('url', '');
$regexProtocol = '#^([a-z]{0,5}://)?(www.)?#';
$blacklist = preg_filter($regexProtocol, '', $request->input('blacklist'));
if(stripos($blacklist, '/') !== false){
$blacklist = substr($blacklist, 0, stripos($blacklist, '/'));
}
$regexUrl = '#^(\*\.)?[a-z0-9]+(\.[a-z0-9]+)?(\.[a-z0-9]{2,})$#';
if(preg_match($regexUrl, $blacklist) === 1){
$path = \Request::path();
$cookiePath = "/" . substr($path, 0, strpos($path, "meta/") + 5);
$cookies = Cookie::get();
$cookieCounter = 0;
$noduplicate = true;
ksort($cookies);
if(!empty($cookies)){
foreach ($cookies as $key => $value) {
if(stripos($key, $fokus . '_blpage') === 0){
if($value === $blacklist){
$noduplicate = false;
break;
}
if((int)(substr($key,strlen($fokus . '_blpage'))) === $cookieCounter){
$cookieCounter++;
}
}
}
}
if($noduplicate && !empty($blacklist) > 0 && strlen($blacklist) <= 255){
$cookieName= $fokus.'_blpage'.$cookieCounter;
Cookie::queue($cookieName, $blacklist, 0, $cookiePath, null, false, false);
}
}
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), route('settings', ["fokus" => $fokus, "url" => $url])));
}
public function deleteBlacklist(Request $request)
{
$fokus = $request->input('fokus', '');
$url = $request->input('url', '');
$path = \Request::path();
$cookieKey = $request->input('cookieKey');
$cookiePath = "/" . substr($path, 0, strpos($path, "meta/") + 5);
Cookie::queue($cookieKey, "", 0, $cookiePath, null, false, false);
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), route('settings', ["fokus" => $fokus, "url" => $url])));
}
public function clearBlacklist(Request $request)
{
//function to clear the whole black list
$fokus = $request->input('fokus', '');
$url = $request->input('url', '');
$path = \Request::path();
$empty = $request->input('empty');
$cookiePath = "/" . substr($path, 0, strpos($path, "meta/") + 5);
$cookies = Cookie::get();
foreach($cookies as $key => $value){
if(stripos($key, $fokus . '_blpage') === 0) {
Cookie::queue($key, "", 0, $cookiePath, null, false, false);
}
}
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), route('settings', ["fokus" => $fokus, "url" => $url])));
}
public function loadSettings(Request $request)
{
$path = \Request::path();
$cookiePath = "/" . substr($path, 0, strpos($path, "meta/") + 5);
$sumaFile = MetaGer::getLanguageFile();
$sumaFile = json_decode(file_get_contents($sumaFile), true);
$foki = array_keys($sumaFile['foki']);
$regexUrl = '#^(\*\.)?[a-z0-9]+(\.[a-z0-9]+)?(\.[a-z0-9]{2,})$#';
$cookies = $request->all();
foreach($cookies as $key => $value){
$blpage = false;
foreach($foki as $fokus){
if(strpos($key, $fokus . '_blpage') === 0 && preg_match($regexUrl, $value) === 1){
Cookie::queue($key, $value, 0, $cookiePath, null, false, false);
$blpage = true;
}
}
if($blpage){
continue;
}
foreach($sumaFile['filter']['parameter-filter'] as $suma => $filter){
if($key === $suma && $value === $filter){
Cookie::queue($key, $value, 0, $cookiePath, null, false, false);
}
}
}
return redirect(LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), url('/')));
}
}
......@@ -45,7 +45,7 @@ class StartpageController extends Controller
return view('index')
->with('title', trans('titles.index'))
->with('homeIcon')
->with('browser', (new Agent())->browser())
->with('agent', new Agent())
->with('navbarFocus', 'suche')
->with('focus', $request->input('focus', 'web'))
->with('time', $request->input('param_time', '1500'))
......
......@@ -5,6 +5,7 @@ namespace App\Http\Middleware;
use Closure;
use Illuminate\Support\Facades\Redis;
use Jenssegers\Agent\Agent;
use Cache;
class BrowserVerification
{
......@@ -59,12 +60,12 @@ class BrowserVerification
$key = md5($request->ip() . microtime(true));
echo (view('layouts.resultpage.verificationHeader')->with('key', $key)->render());
echo(view('layouts.resultpage.verificationHeader')->with('key', $key)->render());
flush();
$answer = Redis::connection("cache")->blpop($key, 2);
if ($answer !== null) {
echo (view('layouts.resultpage.resources')->render());
echo(view('layouts.resultpage.resources')->render());
flush();
$request->request->add(["headerPrinted" => true, "jskey" => $key]);
return $next($request);
......@@ -74,9 +75,8 @@ class BrowserVerification
$params["mgv"] = $key;
$url = route("resultpage", $params);
echo (view('layouts.resultpage.unverifiedResultPage')
echo(view('layouts.resultpage.unverifiedResultPage')
->with('url', $url)
->render());
}
}
......@@ -5,6 +5,7 @@ namespace App;
use App;
use Cache;
use Carbon;
use Cookie;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Redis;
use Jenssegers\Agent\Agent;
......@@ -72,7 +73,10 @@ class MetaGer
protected $verificationId;
protected $verificationCount;
protected $searchUid;
protected $redisResultWaitingKey, $redisResultEngineList, $redisEngineResult, $redisCurrentResultList;
protected $redisResultWaitingKey;
protected $redisResultEngineList;
protected $redisEngineResult;
protected $redisCurrentResultList;
public $starttime;
public function __construct($hash = "")
......@@ -131,7 +135,7 @@ class MetaGer
}
# Erstellt aus den gesammelten Ergebnissen den View
public function createView()
public function createView($quicktipResults = [])
{
# Hiermit werden die evtl. ausgewählten SuMas extrahiert, damit die Input-Boxen richtig gesetzt werden können
$focusPages = [];
......@@ -171,7 +175,7 @@ class MetaGer
->with('apiAuthorized', $this->apiAuthorized)
->with('metager', $this)
->with('browser', (new Agent())->browser())
->with('quicktips', action('MetaGerSearch@quicktips', ["search" => $this->eingabe]))
->with('quicktips', $quicktipResults)
->with('focus', $this->fokus)
->with('resultcount', count($this->results));
}
......@@ -244,7 +248,7 @@ class MetaGer
->with('apiAuthorized', $this->apiAuthorized)
->with('metager', $this)
->with('browser', (new Agent())->browser())
->with('quicktips', action('MetaGerSearch@quicktips', ["search" => $this->eingabe, "quotes" => $this->sprueche]))
->with('quicktips', $quicktipResults)
->with('resultcount', count($this->results))
->with('focus', $this->fokus);
break;
......@@ -257,7 +261,7 @@ class MetaGer
$engines = $this->engines;
// combine
$this->combineResults($engines);
if(!empty($timings)){
if (!empty($timings)) {
$timings["prepareResults"]["combined results"] = microtime(true) - $timings["starttime"];
}
// misc (WiP)
......@@ -279,7 +283,7 @@ class MetaGer
return ($a->getRank() < $b->getRank()) ? 1 : -1;
});
}
if(!empty($timings)){
if (!empty($timings)) {
$timings["prepareResults"]["sorted results"] = microtime(true) - $timings["starttime"];
}
# Validate Results
......@@ -290,7 +294,7 @@ class MetaGer
}
}
$this->results = $newResults;
if(!empty($timings)){
if (!empty($timings)) {
$timings["prepareResults"]["validated results"] = microtime(true) - $timings["starttime"];
}
......@@ -310,7 +314,7 @@ class MetaGer
$newResults[] = $ad;
}
$this->ads = $newResults;
if(!empty($timings)){
if (!empty($timings)) {
$timings["prepareResults"]["validated ads"] = microtime(true) - $timings["starttime"];
}