Commit 4001e042 authored by Dominik Hebeler's avatar Dominik Hebeler

Merge branch '859-filter-options-for-metager' into 'development'

Resolve "Filter Options for MetaGer"

Closes #859, #793, #849, #846, #848, #844, #836, #841, #835, #830, #828, #838, #831, #834, #825, #820, #816, #813, #821, #823, #819, #818, #805, #809, #817, #810, #812, #814, #811, #707, #731, #699, #808, #754, #800, #778, #804, #799, #798, #801, #796, #795, #794, #788, #789, #767, #786, #782, #785, #784, #783, #781, #773, #776, #777, #770, #780, #772, #769, #774, #763, #759, #760, #761, #758, #746, #753, #756, #748, #629, #750, #751, #744, #747, #652, #676, #693, #742, #745, #584, #743, #739, #697, #732, #733, #734, #736, #737, #738, #729, #712, #704, #701, #710, #728, #727, #711, #703, #688, #638, #690, #691, #689, #646, #677, #684, #671, #670, #663, #659, #660, #658, #657, #654, #651, #653, #648, #647, #622, #595, #634, #632, #627, #618, #624, #621, #613, #608, #570, #603, #600, #588, #593, #594, #590, #591, #587, #216, #503, #574, #582, #581, #550, #359, #580, #579, #578, #576, #573, #575, #566, #568, #572, #564, #562, #561, #560, #530, #557, #556, #554, #558, #540, #555, #552, #553, #549, #551, #468, #546, #542, #545, #541, #538, #539, #537, #536, #535, #534, #525, #533, #531, #526, #504, #524, #519, #521, #517, #513, #408, #516, #515, #514, #512, #495, #499, #510, #509, #508, #507, #483, #493, #466, #494, #506, #505, #500, #487, #501, #496, #498, #485, #489, #488, #486, #481, #477, #470, #461, #480, #474, #449, #471, #237, #446, #462, #467, #465, #417, #460, #457, #456, #452, #454, #453, #450, #433, #448, #447, #361, #445, #441, #440, #438, #439, #432, #437, #435, #347, #425, #429, #428, #430, #363, #407, #406, #245, #157, #421, #405, #351, #414, #411, #410, #409, #398, #397, #403, #402, #399, #393, #396, #395, #394, #392, #391, #387, #386, #385, #318, #365, #384, #382, #381, #380, #352, #379, #377, #348, #350, #375, #376, #374, #373, #372, #311, #357, #362, #358, #370, #366, #356, #360, #354, #341, #319, #345, #343, #344, #310, #89, #342, #333, #264, #334, #330, #329, #328, #326, #321, #327, #324, #320, #317, #316, #313, #309, #306, #295, #305, #301, #303, #299, #296, #293, #292, #290, #285, #287, #272, #284, #280, #226, #278, #273, #276, #274, #233, #271, #269, #266, #261, #265, #263, #257, #235, #255, #229, #254, #252, #250, #249, #247, #248, #246, #244, #242, #243, #219, #238, #236, #234, #228, #222, #221, #211, #225, #187, #206, #87, #215, #214, #212, #210, #208, #207, #204, #203, #201, #202, #182, #200, #197, #199, #195, #194, #192, #193, #191, #190, #189, #188, #137, #162, #96, #160, #179, #177, #174, #175, #169, #173, #172, #170, #166, #168, #165, #164, #159, #155, #153, #97, #156, #134, #142, #124, #154, #135, #146, #151, #150, #149, #125, #92, #145, #140, #98, #133, #105, #90, #138, #126, #128, #132, #129, #123, #121, #37, #108, #119, #117, #116, #113, #112, #107, #109, #106, #50, #99, #103, #102, #72, #74, #38, #94, #76, #75, #41, #40, #73, #71, #70, #66, #57, #67, #65, #64, #63, #55, #56, #59, #61, and #35

See merge request !1365
parents 2ee13bc2 b9683f8e
......@@ -18,14 +18,14 @@ update(144.76.113.134):
- cd MetaGer_neu
- composer install
- scp -P 63824 metager@metager3.de:~/.env .
- scp -P 63824 metager@metager3.de:~/sumas.xml config/
- scp -P 63824 metager@metager3.de:~/sumasEn.xml config/
- scp -P 63824 metager@metager3.de:~/sumas.json config/
- scp -P 63824 metager@metager3.de:~/sumasEn.json config/
- scp -P 63824 metager@metager3.de:~/blacklistUrl.txt config/
- scp -P 63824 metager@metager3.de:~/blacklistDomains.txt config/
- touch storage/logs/laravel.log
- touch storage/logs/worker.log
- touch database/metager.sqlite
- chmod 777 config/sumas.xml config/sumasEn.xml database/metager.sqlite
- chmod 777 config/sumas.json config/sumas.json database/metager.sqlite
- chmod -R 777 storage
- chmod -R 777 bootstrap/cache
- npm install
......@@ -58,14 +58,14 @@ update(metager2):
- cd MetaGer_neu
- composer install
- scp -P 63824 metager@metager3.de:~/.env .
- scp -P 63824 metager@metager3.de:~/sumas.xml config/
- scp -P 63824 metager@metager3.de:~/sumasEn.xml config/
- scp -P 63824 metager@metager3.de:~/sumas.json config/
- scp -P 63824 metager@metager3.de:~/sumas.json config/
- scp -P 63824 metager@metager3.de:~/blacklistUrl.txt config/
- scp -P 63824 metager@metager3.de:~/blacklistDomains.txt config/
- touch storage/logs/laravel.log
- touch storage/logs/worker.log
- touch database/metager.sqlite
- chmod 777 config/sumas.xml config/sumasEn.xml database/metager.sqlite
- chmod 777 config/sumas.json config/sumas.json database/metager.sqlite
- chmod -R 777 storage
- chmod -R 777 bootstrap/cache
- npm install
......@@ -98,14 +98,14 @@ update(metager3.de):
- cd MetaGer_neu
- composer install
- scp -P 63824 metager@metager3.de:~/.env .
- scp -P 63824 metager@metager3.de:~/sumas.xml config/
- scp -P 63824 metager@metager3.de:~/sumasEn.xml config/
- scp -P 63824 metager@metager3.de:~/sumas.json config/
- scp -P 63824 metager@metager3.de:~/sumas.json config/
- scp -P 63824 metager@metager3.de:~/blacklistUrl.txt config/
- scp -P 63824 metager@metager3.de:~/blacklistDomains.txt config/
- touch storage/logs/laravel.log
- touch storage/logs/worker.log
- touch database/metager.sqlite
- chmod 777 config/sumas.xml config/sumasEn.xml database/metager.sqlite
- chmod 777 config/sumas.json config/sumas.json database/metager.sqlite
- chmod -R 777 storage
- chmod -R 777 bootstrap/cache
- npm install
......
......@@ -161,6 +161,7 @@ class Searcher implements ShouldQueue
// Set this URL to the Curl handle
curl_setopt($this->ch, CURLOPT_URL, $url);
$result = curl_exec($this->ch);
$this->connectionInfo = curl_getinfo($this->ch);
return $result;
}
......@@ -203,10 +204,13 @@ class Searcher implements ShouldQueue
}
if ($this->headers !== null) {
$headers = [];
foreach ($this->headers as $key => $value) {
$headers[] = $key . ":" . $value;
}
# Headers are in the Form:
# <key>:<value>;<key>:<value>
$headerArray = explode(";", $this->headers);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headerArray);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}
return $ch;
......
......@@ -17,12 +17,11 @@ class MetaGer
protected $fokus;
protected $eingabe;
protected $q;
protected $category;
protected $time;
protected $page;
protected $lang;
protected $cache = "";
protected $site;
protected $time = 2000;
protected $hostBlacklist = [];
protected $domainBlacklist = [];
private $urlBlacklist = [];
......@@ -30,6 +29,8 @@ class MetaGer
protected $phrases = [];
protected $engines = [];
protected $results = [];
protected $queryFilter = [];
protected $parameterFilter = [];
protected $ads = [];
protected $warnings = [];
protected $errors = [];
......@@ -49,7 +50,6 @@ class MetaGer
protected $mobile;
protected $resultCount;
protected $sprueche;
protected $maps;
protected $newtab;
protected $domainsBlacklisted = [];
protected $urlsBlacklisted = [];
......@@ -309,7 +309,6 @@ class MetaGer
if ($this->validated) {
$this->ads = [];
$this->maps = false;
}
if (count($this->results) <= 0) {
......@@ -531,100 +530,57 @@ class MetaGer
return;
}
$xml = simplexml_load_file($this->sumaFile);
$sumas = $xml->xpath("suma");
$enabledSearchengines = [];
$this->enabledSearchengines = [];
$overtureEnabled = false;
$sumaCount = 0;
/*
* Erstellt eine Liste mit Foki, die verfügbar sind
*/
$this->availableFoki = [];
foreach ($sumas as $suma) {
$foki = explode(",", trim($suma["type"]));
foreach ($foki as $fokus) {
if (!empty($fokus)) {
$this->availableFoki[$fokus] = "available";
}
}
}
$isCustomSearch = $this->startsWith($this->fokus, 'focus_');
# Im Falle einer Custom-Suche ohne mindestens einer selektierter Suchmaschine wird eine Web-Suche durchgeführt
if ($isCustomSearch && !$this->atLeastOneSearchengineSelected($request)) {
$isCustomSearch = false;
$this->fokus = 'web';
# Check if selected focus is valid
if (empty($this->sumaFile->foki->{$this->fokus})) {
$this->fokus = "web";
}
/* Erstellt die Liste der eingestellten Sumas
* Der einzige Unterschied bei angepasstem Suchfokus ist,
* dass nicht nach den Typen einer Suma,
* sondern den im Request mitgegebenen Typen entschieden wird.
* Ansonsten wird genau das selbe geprüft und gemacht:
* Handelt es sich um spezielle Suchmaschinen die immer an sein müssen
* Wenn es Overture ist vermerken dass Overture an ist
* Suma Zähler erhöhen
* Zu Liste hinzufügen
*/
foreach ($sumas as $suma) {
if (($this->sumaIsSelected($suma, $request, $isCustomSearch)
|| (!$this->isBildersuche()
&& $this->sumaIsAdsuche($suma, $overtureEnabled)))
&& (!$this->sumaIsDisabled($suma))) {
if ($this->sumaIsOverture($suma)) {
$overtureEnabled = true;
}
if ($this->sumaIsNotAdsuche($suma)) {
$sumaCount += 1;
}
$enabledSearchengines[] = $suma;
foreach ($this->sumaFile->foki->{$this->fokus}->sumas as $suma) {
# Check if this engine is disabled and can't be used
$disabled = empty($suma->disabled) ? false : $suma->disabled;
if ($disabled) {
continue;
}
}
# Sonderregelung für alle Suchmaschinen, die zu den Minisuchern gehören. Diese können alle gemeinsam über einen Link abgefragt werden
$subcollections = [];
$tmp = [];
// Es gibt den Schalter "minism=on" Dieser soll bewirken, dass alle Minisucher angeschaltet werden.
// Wenn also "minism=on" ist, dann durchsuchen wir statt den tatsächlich angeschalteten Suchmaschinen,
// alle Suchmaschinen nach "minismCollection"
if ($request->input("minism", "off") === "on") {
// Wir laden alle Minisucher
foreach ($sumas as $engine) {
if (isset($engine["minismCollection"])) {
$subcollections[] = $engine["minismCollection"]->__toString();
# Check if this engine can use eventually defined query-filter
$valid = true;
foreach ($this->queryFilter as $queryFilter => $filter) {
if (empty($this->sumaFile->filter->{"query-filter"}->$queryFilter->sumas->$suma)) {
$valid = false;
break;
}
}
# Nur noch alle eventuell angeschalteten Minisucher deaktivieren
foreach ($enabledSearchengines as $index => $engine) {
if (!isset($engine["minismCollection"])) {
$tmp[] = $engine;
# Check if this engine can use eventually defined parameter-filter
if ($valid) {
foreach ($this->parameterFilter as $filterName => $filter) {
if (empty($filter->sumas->$suma)) {
$valid = false;
break;
}
}
}
} else {
// Wir schalten eine Teilmenge, oder aber gar keine an
foreach ($enabledSearchengines as $engine) {
if (isset($engine['minismCollection'])) {
$subcollections[] = $engine['minismCollection']->__toString();
} else {
$tmp[] = $engine;
}
# If it can we add it
if ($valid) {
$this->enabledSearchengines[$suma] = $this->sumaFile->sumas->{$suma};
}
}
$enabledSearchengines = $tmp;
if (sizeof($subcollections) > 0) {
$enabledSearchengines[] = $this->loadMiniSucher($xml, $subcollections);
}
if ($sumaCount <= 0) {
$this->errors[] = trans('metaGer.settings.noneSelected');
if (sizeof($this->enabledSearchengines) === 0) {
$filter = "";
foreach ($this->queryFilter as $queryFilter => $filterPhrase) {
$filter .= trans($this->sumaFile->filter->{"query-filter"}->{$queryFilter}->name) . ",";
}
$filter = rtrim($filter, ",");
$error = trans('metaGer.engines.noSpecialSearch', ['fokus' => trans($this->sumaFile->foki->{$this->fokus}->{"display-name"}),
'filter' => $filter]);
$this->errors[] = $error;
}
$engines = [];
# Wenn eine Sitesearch durchgeführt werden soll, überprüfen wir ob überhaupt eine der Suchmaschinen eine Sitesearch unterstützt
$siteSearchFailed = $this->checkCanNotSitesearch($enabledSearchengines);
$engines = [];
$typeslist = [];
$counter = 0;
......@@ -635,7 +591,7 @@ class MetaGer
$engine->setResultHash($this->getHashCode());
}
} else {
$engines = $this->actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed);
$engines = $this->actuallyCreateSearchEngines($this->enabledSearchengines);
}
# Wir starten alle Suchen
......@@ -684,52 +640,54 @@ class MetaGer
return false;
}
public function actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed)
public function actuallyCreateSearchEngines($enabledSearchengines)
{
$engines = [];
foreach ($enabledSearchengines as $engine) {
foreach ($enabledSearchengines as $engineName => $engine) {
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
if (!$siteSearchFailed
&& strlen($this->site) > 0
&& (!isset($engine['hasSiteSearch'])
|| $engine['hasSiteSearch']->__toString() === "0")) {
continue;
}
if (!isset($engine["package"])) {
if (!isset($engine->{"parser-class"})) {
die(var_dump($engine));
}
# Setze Pfad zu Parser
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
$path = "App\\Models\\parserSkripte\\" . $engine->{"parser-class"};
# Prüfe ob Parser vorhanden
if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert");
$this->errors[] = trans('metaGer.engines.noParser', ['engine' => $engine["name"]]);
if (!file_exists(app_path() . "/Models/parserSkripte/" . $engine->{"parser-class"} . ".php")) {
Log::error("Konnte " . $engine->{"display-name"} . " nicht abfragen, da kein Parser existiert");
$this->errors[] = trans('metaGer.engines.noParser', ['engine' => $engine->{"display-name"}]);
continue;
}
# Es wird versucht die Suchengine zu erstellen
$time = microtime();
try {
$tmp = new $path($engine, $this);
$tmp = new $path($engineName, $engine, $this);
} catch (\ErrorException $e) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen. " . var_dump($e));
Log::error("Konnte " . $engine->{"display-name"} . " nicht abfragen. " . var_dump($e));
continue;
}
# Ausgabe bei Debug-Modus
if ($tmp->enabled && isset($this->debug)) {
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000);
}
$engines[] = $tmp;
}
return $engines;
}
# Wenn die neu erstellte Engine eingeschaltet ist, wird sie der Liste hinzugefügt
if ($tmp->isEnabled()) {
$engines[] = $tmp;
public function getAvailableParameterFilter()
{
$parameterFilter = $this->sumaFile->filter->{"parameter-filter"};
$availableFilter = [];
foreach ($parameterFilter as $filterName => $filter) {
# Check if any of the enabled search engines provide this filter
foreach ($this->enabledSearchengines as $engineName => $engine) {
if (!empty($filter->sumas->$engineName)) {
$availableFilter[$filterName] = $filter;
}
}
}
return $engines;
return $availableFilter;
}
public function isBildersuche()
......@@ -790,16 +748,6 @@ class MetaGer
return $engines;
}
public function loadMiniSucher($xml, $subcollections)
{
$minisucherEngine = $xml->xpath('suma[@name="minism"]')[0];
$minisucherEngine["subcollections"] = implode(", ", $subcollections);
$subcollectionsString = urlencode("(" . implode(" OR ", $subcollections) . ")");
$minisucherEngine["formData"] = str_replace("<<SUBCOLLECTIONS>>", $subcollectionsString, $minisucherEngine["formData"]);
$minisucherEngine["formData"] = str_replace("<<COUNT>>", sizeof($subcollections) * 10, $minisucherEngine["formData"]);
return $minisucherEngine;
}
# Passt den Suchfokus an, falls für einen Fokus genau alle vorhandenen Sumas eingeschaltet sind
public function adjustFocus($sumas, $enabledSearchengines)
{
......@@ -864,26 +812,6 @@ class MetaGer
}
}
public function checkCanNotSitesearch($enabledSearchengines)
{
if (strlen($this->site) > 0) {
$enginesWithSite = 0;
foreach ($enabledSearchengines as $engine) {
if (isset($engine['hasSiteSearch']) && $engine['hasSiteSearch']->__toString() === "1") {
$enginesWithSite++;
}
}
if ($enginesWithSite === 0) {
$this->errors[] = trans('metaGer.sitesearch.failed', ['site' => $this->site, 'searchLink' => $this->generateSearchLink("web", false)]);
return true;
} else {
$this->warnings[] = trans('metaGer.sitesearch.success', ['site' => $this->site]);
return false;
}
}
return false;
}
public function waitForResults($enginesToLoad, $overtureEnabled, $canBreak)
{
......@@ -944,13 +872,6 @@ class MetaGer
}
}
# Nicht fertige Engines verwefen
foreach ($engines as $engine) {
if (!$engine->loaded) {
$engine->shutdown();
}
}
$this->engines = $engines;
}
......@@ -977,12 +898,14 @@ class MetaGer
$this->fokus = $request->input('focus', 'web');
# Suma-File
if (App::isLocale("en")) {
$this->sumaFile = config_path() . "/sumasEn.xml";
$this->sumaFile = config_path() . "/sumasEn.json";
} else {
$this->sumaFile = config_path() . "/sumas.xml";
$this->sumaFile = config_path() . "/sumas.json";
}
if (!file_exists($this->sumaFile)) {
die(trans('metaGer.formdata.cantLoad'));
} else {
$this->sumaFile = json_decode(file_get_contents($this->sumaFile));
}
# Sucheingabe
$this->eingabe = trim($request->input('eingabe', ''));
......@@ -1001,10 +924,7 @@ class MetaGer
} else {
$this->language = "";
}
# Category
$this->category = $request->input('category', '');
# Request Times
$this->time = $request->input('time', 1500);
# Page
$this->page = 1;
# Lang
......@@ -1022,12 +942,7 @@ class MetaGer
} else {
$this->sprueche = false;
}
$this->maps = $request->input('maps', 'off');
if ($this->maps === "on") {
$this->maps = true;
} else {
$this->maps = false;
}
$this->newtab = $request->input('newtab', 'on');
if ($this->newtab === "on") {
$this->newtab = "_blank";
......@@ -1038,14 +953,7 @@ class MetaGer
$this->theme = preg_replace("/[^[:alnum:][:space:]]/u", '', $request->input('theme', 'default'));
# Ergebnisse pro Seite:
$this->resultCount = $request->input('resultCount', '20');
# Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
if ($request->filled('dart')) {
$this->time = 10000;
$this->warnings[] = trans('metaGer.formdata.dartEurope');
}
if ($this->time <= 500 || $this->time > 20000) {
$this->time = 1000;
}
if ($request->filled('minism') && ($request->filled('fportal') || $request->filled('harvest'))) {
$input = $request->all();
$newInput = [];
......@@ -1075,6 +983,7 @@ class MetaGer
$this->quicktips = true;
}
$this->queryFilter = [];
$this->verificationId = $request->input('verification_id', null);
$this->verificationCount = intval($request->input('verification_count', '0'));
$this->apiKey = $request->input('key', '');
......@@ -1117,7 +1026,39 @@ class MetaGer
public function checkSpecialSearches(Request $request)
{
$this->searchCheckPhrase();
$this->searchCheckSitesearch($request);
# Check for query-filter (i.e. Sitesearch, etc.):
foreach ($this->sumaFile->filter->{"query-filter"} as $filterName => $filter) {
if (!empty($filter->{"optional-parameter"}) && $request->filled($filter->{"optional-parameter"})) {
$this->queryFilter[$filterName] = $request->input($filter->{"optional-parameter"});
} else if (preg_match_all("/" . $filter->regex . "/si", $this->q, $matches) > 0) {
switch ($filter->match) {
case "last":
$this->queryFilter[$filterName] = $matches[$filter->save][sizeof($matches[$filter->save]) - 1];
$toDelete = preg_quote($matches[$filter->delete][sizeof($matches[$filter->delete]) - 1], "/");
$this->q = preg_replace('/(' . $toDelete . '(?!.*' . $toDelete . '))/si', '', $this->q);
break;
default: # First occurence
$this->queryFilter[$filterName] = $matches[$filter->save][0];
$toDelete = preg_quote($matches[$filter->delete][0], "/");
$this->q = preg_replace('/' . $toDelete . '/si', '', $this->q, 1);
}
}
}
# Check for parameter-filter (i.e. SafeSearch)
$this->parameterFilter = [];
$usedParameters = [];
foreach ($this->sumaFile->filter->{"parameter-filter"} as $filterName => $filter) {
if (!empty($usedParameters[$filter->{"get-parameter"}])) {
die("Der Get-Parameter \"" . $filter->{"get-parameter"} . "\" wird mehrfach verwendet!");
} else {
$usedParameters[$filter->{"get-parameter"}] = true;
}
if ($request->filled($filter->{"get-parameter"})) {
$this->parameterFilter[$filterName] = $filter;
}
}
$this->searchCheckHostBlacklist($request);
$this->searchCheckDomainBlacklist($request);
$this->searchCheckUrlBlacklist();
......@@ -1143,19 +1084,6 @@ class MetaGer