diff --git a/app/Jobs/Searcher.php b/app/Jobs/Searcher.php index 5605492bd3c6cb9bc19a804cf2827aabcfd650e6..ec82937b42b85994833d0d930a46ef08deeb002c 100644 --- a/app/Jobs/Searcher.php +++ b/app/Jobs/Searcher.php @@ -161,6 +161,7 @@ class Searcher implements ShouldQueue // Set this URL to the Curl handle curl_setopt($this->ch, CURLOPT_URL, $url); $result = curl_exec($this->ch); + $this->connectionInfo = curl_getinfo($this->ch); return $result; } @@ -203,10 +204,13 @@ class Searcher implements ShouldQueue } if ($this->headers !== null) { + $headers = []; + foreach ($this->headers as $key => $value) { + $headers[] = $key . ":" . $value; + } # Headers are in the Form: # <key>:<value>;<key>:<value> - $headerArray = explode(";", $this->headers); - curl_setopt($ch, CURLOPT_HTTPHEADER, $headerArray); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); } return $ch; diff --git a/app/MetaGer.php b/app/MetaGer.php index ad21554bceb399445c78dabb0dedfb3a58fbff59..899ccc84227649b632dbd6e2d17ad1a7978de201 100644 --- a/app/MetaGer.php +++ b/app/MetaGer.php @@ -17,12 +17,11 @@ class MetaGer protected $fokus; protected $eingabe; protected $q; - protected $category; - protected $time; protected $page; protected $lang; protected $cache = ""; protected $site; + protected $time = 2000; protected $hostBlacklist = []; protected $domainBlacklist = []; private $urlBlacklist = []; @@ -49,7 +48,6 @@ class MetaGer protected $mobile; protected $resultCount; protected $sprueche; - protected $maps; protected $newtab; protected $domainsBlacklisted = []; protected $urlsBlacklisted = []; @@ -309,7 +307,6 @@ class MetaGer if ($this->validated) { $this->ads = []; - $this->maps = false; } if (count($this->results) <= 0) { @@ -531,99 +528,48 @@ class MetaGer return; } - $xml = simplexml_load_file($this->sumaFile); - $sumas = $xml->xpath("suma"); $enabledSearchengines = []; $overtureEnabled = false; - $sumaCount = 0; - /* - * Erstellt eine Liste mit Foki, die verfügbar sind - */ - $this->availableFoki = []; - foreach ($sumas as $suma) { - $foki = explode(",", trim($suma["type"])); - foreach ($foki as $fokus) { - if (!empty($fokus)) { - $this->availableFoki[$fokus] = "available"; - } - - } + # Check if selected focus is valid + if (empty($this->sumaFile->foki->{$this->fokus})) { + $this->fokus = "web"; } - $isCustomSearch = $this->startsWith($this->fokus, 'focus_'); - - # Im Falle einer Custom-Suche ohne mindestens einer selektierter Suchmaschine wird eine Web-Suche durchgeführt - if ($isCustomSearch && !$this->atLeastOneSearchengineSelected($request)) { - $isCustomSearch = false; - $this->fokus = 'web'; - } - - /* Erstellt die Liste der eingestellten Sumas - * Der einzige Unterschied bei angepasstem Suchfokus ist, - * dass nicht nach den Typen einer Suma, - * sondern den im Request mitgegebenen Typen entschieden wird. - * Ansonsten wird genau das selbe geprüft und gemacht: - * Handelt es sich um spezielle Suchmaschinen die immer an sein müssen - * Wenn es Overture ist vermerken dass Overture an ist - * Suma Zähler erhöhen - * Zu Liste hinzufügen - */ - foreach ($sumas as $suma) { - if (($this->sumaIsSelected($suma, $request, $isCustomSearch) - || (!$this->isBildersuche() - && $this->sumaIsAdsuche($suma, $overtureEnabled))) - && (!$this->sumaIsDisabled($suma))) { - if ($this->sumaIsOverture($suma)) { - $overtureEnabled = true; - } - if ($this->sumaIsNotAdsuche($suma)) { - $sumaCount += 1; - } - $enabledSearchengines[] = $suma; + foreach ($this->sumaFile->foki->{$this->fokus}->sumas as $suma) { + # Check if this engine is disabled and can't be used + $disabled = empty($suma->disabled) ? false : $suma->disabled; + if ($disabled) { + continue; } - } - # Sonderregelung für alle Suchmaschinen, die zu den Minisuchern gehören. Diese können alle gemeinsam über einen Link abgefragt werden - $subcollections = []; - - $tmp = []; - // Es gibt den Schalter "minism=on" Dieser soll bewirken, dass alle Minisucher angeschaltet werden. - // Wenn also "minism=on" ist, dann durchsuchen wir statt den tatsächlich angeschalteten Suchmaschinen, - // alle Suchmaschinen nach "minismCollection" - if ($request->input("minism", "off") === "on") { - // Wir laden alle Minisucher - foreach ($sumas as $engine) { - if (isset($engine["minismCollection"])) { - $subcollections[] = $engine["minismCollection"]->__toString(); - } - } - # Nur noch alle eventuell angeschalteten Minisucher deaktivieren - foreach ($enabledSearchengines as $index => $engine) { - if (!isset($engine["minismCollection"])) { - $tmp[] = $engine; + # Check if this engine can use eventually defined query-filter + $valid = true; + foreach ($this->queryFilter as $queryFilter => $filter) { + if (empty($this->sumaFile->filter->$queryFilter->sumas->$suma)) { + $valid = false; + break; } } - } else { - // Wir schalten eine Teilmenge, oder aber gar keine an - foreach ($enabledSearchengines as $engine) { - if (isset($engine['minismCollection'])) { - $subcollections[] = $engine['minismCollection']->__toString(); - } else { - $tmp[] = $engine; - } + # If it can we add it + if ($valid) { + $enabledSearchengines[$suma] = $this->sumaFile->sumas->{$suma}; } + } - $enabledSearchengines = $tmp; - if (sizeof($subcollections) > 0) { - $enabledSearchengines[] = $this->loadMiniSucher($xml, $subcollections); - } - if ($sumaCount <= 0) { - $this->errors[] = trans('metaGer.settings.noneSelected'); + + if (sizeof($enabledSearchengines) === 0) { + $filter = ""; + foreach ($this->queryFilter as $queryFilter => $filterPhrase) { + $filter .= trans($this->sumaFile->filter->{$queryFilter}->name) . ","; + } + $filter = rtrim($filter, ","); + $error = trans('metaGer.engines.noSpecialSearch', ['fokus' => trans($this->sumaFile->foki->{$this->fokus}->{"display-name"}), + 'filter' => $filter]); + $this->errors[] = $error; } + $engines = []; - # Wenn eine Sitesearch durchgeführt werden soll, überprüfen wir ob überhaupt eine der Suchmaschinen eine Sitesearch unterstützt - $siteSearchFailed = $this->checkCanNotSitesearch($enabledSearchengines); $typeslist = []; $counter = 0; @@ -635,9 +581,8 @@ class MetaGer $engine->setResultHash($this->getHashCode()); } } else { - $engines = $this->actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed); + $engines = $this->actuallyCreateSearchEngines($enabledSearchengines); } - # Wir starten alle Suchen foreach ($engines as $engine) { $engine->startSearch($this); @@ -684,50 +629,34 @@ class MetaGer return false; } - public function actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed) + public function actuallyCreateSearchEngines($enabledSearchengines) { $engines = []; - foreach ($enabledSearchengines as $engine) { + foreach ($enabledSearchengines as $engineName => $engine) { - # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll - if (!$siteSearchFailed - && strlen($this->site) > 0 - && (!isset($engine['hasSiteSearch']) - || $engine['hasSiteSearch']->__toString() === "0")) { - continue; - } - - if (!isset($engine["package"])) { + if (!isset($engine->{"parser-class"})) { die(var_dump($engine)); } # Setze Pfad zu Parser - $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); + $path = "App\\Models\\parserSkripte\\" . $engine->{"parser-class"}; # Prüfe ob Parser vorhanden - if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) { - Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert"); - $this->errors[] = trans('metaGer.engines.noParser', ['engine' => $engine["name"]]); + if (!file_exists(app_path() . "/Models/parserSkripte/" . $engine->{"parser-class"} . ".php")) { + Log::error("Konnte " . $engine->{"display-name"} . " nicht abfragen, da kein Parser existiert"); + $this->errors[] = trans('metaGer.engines.noParser', ['engine' => $engine->{"display-name"}]); continue; } # Es wird versucht die Suchengine zu erstellen $time = microtime(); try { - $tmp = new $path($engine, $this); + $tmp = new $path($engineName, $engine, $this); } catch (\ErrorException $e) { - Log::error("Konnte " . $engine["name"] . " nicht abfragen. " . var_dump($e)); + Log::error("Konnte " . $engine->{"display-name"} . " nicht abfragen. " . var_dump($e)); continue; } - # Ausgabe bei Debug-Modus - if ($tmp->enabled && isset($this->debug)) { - $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000); - } - - # Wenn die neu erstellte Engine eingeschaltet ist, wird sie der Liste hinzugefügt - if ($tmp->isEnabled()) { - $engines[] = $tmp; - } + $engines[] = $tmp; } return $engines; } @@ -790,16 +719,6 @@ class MetaGer return $engines; } - public function loadMiniSucher($xml, $subcollections) - { - $minisucherEngine = $xml->xpath('suma[@name="minism"]')[0]; - $minisucherEngine["subcollections"] = implode(", ", $subcollections); - $subcollectionsString = urlencode("(" . implode(" OR ", $subcollections) . ")"); - $minisucherEngine["formData"] = str_replace("<<SUBCOLLECTIONS>>", $subcollectionsString, $minisucherEngine["formData"]); - $minisucherEngine["formData"] = str_replace("<<COUNT>>", sizeof($subcollections) * 10, $minisucherEngine["formData"]); - return $minisucherEngine; - } - # Passt den Suchfokus an, falls für einen Fokus genau alle vorhandenen Sumas eingeschaltet sind public function adjustFocus($sumas, $enabledSearchengines) { @@ -864,26 +783,6 @@ class MetaGer } } - public function checkCanNotSitesearch($enabledSearchengines) - { - if (strlen($this->site) > 0) { - $enginesWithSite = 0; - foreach ($enabledSearchengines as $engine) { - if (isset($engine['hasSiteSearch']) && $engine['hasSiteSearch']->__toString() === "1") { - $enginesWithSite++; - } - } - if ($enginesWithSite === 0) { - $this->errors[] = trans('metaGer.sitesearch.failed', ['site' => $this->site, 'searchLink' => $this->generateSearchLink("web", false)]); - return true; - } else { - $this->warnings[] = trans('metaGer.sitesearch.success', ['site' => $this->site]); - return false; - } - } - return false; - } - public function waitForResults($enginesToLoad, $overtureEnabled, $canBreak) { @@ -977,12 +876,14 @@ class MetaGer $this->fokus = $request->input('focus', 'web'); # Suma-File if (App::isLocale("en")) { - $this->sumaFile = config_path() . "/sumasEn.xml"; + $this->sumaFile = config_path() . "/sumasEn.json"; } else { - $this->sumaFile = config_path() . "/sumas.xml"; + $this->sumaFile = config_path() . "/sumas.json"; } if (!file_exists($this->sumaFile)) { die(trans('metaGer.formdata.cantLoad')); + } else { + $this->sumaFile = json_decode(file_get_contents($this->sumaFile)); } # Sucheingabe $this->eingabe = trim($request->input('eingabe', '')); @@ -1001,10 +902,7 @@ class MetaGer } else { $this->language = ""; } - # Category - $this->category = $request->input('category', ''); - # Request Times - $this->time = $request->input('time', 1500); + # Page $this->page = 1; # Lang @@ -1022,12 +920,7 @@ class MetaGer } else { $this->sprueche = false; } - $this->maps = $request->input('maps', 'off'); - if ($this->maps === "on") { - $this->maps = true; - } else { - $this->maps = false; - } + $this->newtab = $request->input('newtab', 'on'); if ($this->newtab === "on") { $this->newtab = "_blank"; @@ -1038,14 +931,7 @@ class MetaGer $this->theme = preg_replace("/[^[:alnum:][:space:]]/u", '', $request->input('theme', 'default')); # Ergebnisse pro Seite: $this->resultCount = $request->input('resultCount', '20'); - # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden: - if ($request->filled('dart')) { - $this->time = 10000; - $this->warnings[] = trans('metaGer.formdata.dartEurope'); - } - if ($this->time <= 500 || $this->time > 20000) { - $this->time = 1000; - } + if ($request->filled('minism') && ($request->filled('fportal') || $request->filled('harvest'))) { $input = $request->all(); $newInput = []; @@ -1075,6 +961,7 @@ class MetaGer $this->quicktips = true; } + $this->queryFilter = []; $this->verificationId = $request->input('verification_id', null); $this->verificationCount = intval($request->input('verification_count', '0')); $this->apiKey = $request->input('key', ''); @@ -1117,7 +1004,29 @@ class MetaGer public function checkSpecialSearches(Request $request) { $this->searchCheckPhrase(); - $this->searchCheckSitesearch($request); + + # Check for query-filter (i.e. Sitesearch, etc.): + foreach ($this->sumaFile->filter as $filterName => $filter) { + if ($filter->type !== "query-filter") { + continue; + } + if (!empty($filter->{"optional-parameter"}) && $request->filled($filter->{"optional-parameter"})) { + $this->queryFilter[$filterName] = $request->input($filter->{"optional-parameter"}); + } else if (preg_match_all("/" . $filter->regex . "/si", $this->q, $matches) > 0) { + switch ($filter->match) { + case "last": + $this->queryFilter[$filterName] = $matches[$filter->save][sizeof($matches[$filter->save]) - 1]; + $toDelete = preg_quote($matches[$filter->delete][sizeof($matches[$filter->delete]) - 1], "/"); + $this->q = preg_replace('/(' . $toDelete . '(?!.*' . $toDelete . '))/si', '', $this->q); + break; + default: # First occurence + $this->queryFilter[$filterName] = $matches[$filter->save][0]; + $toDelete = preg_quote($matches[$filter->delete][0], "/"); + $this->q = preg_replace('/' . $toDelete . '/si', '', $this->q, 1); + } + } + + } $this->searchCheckHostBlacklist($request); $this->searchCheckDomainBlacklist($request); $this->searchCheckUrlBlacklist(); @@ -1143,19 +1052,6 @@ class MetaGer } } - private function searchCheckSitesearch($request) - { - // matches '[... ]site:test.de[ ...]' - while (preg_match("/(^|.*?\s)site:(\S+)(\s.*|$)/si", $this->q, $match)) { - $this->site = $match[2]; - $this->q = $match[1] . $match[3]; - } - # Overwrite Setting if it's submitted via Parameter - if ($request->has('site')) { - $this->site = $request->input('site'); - } - } - private function searchCheckHostBlacklist($request) { // matches '[... ]-site:test.de[ ...]' @@ -1573,10 +1469,6 @@ class MetaGer { return $this->url; } - public function getTime() - { - return $this->time; - } public function getLanguage() { @@ -1598,16 +1490,6 @@ class MetaGer return $this->sprueche; } - public function getMaps() - { - return $this->maps; - } - - public function getCategory() - { - return $this->category; - } - public function getPhrases() { return $this->phrases; @@ -1622,6 +1504,16 @@ class MetaGer return $this->sumaFile; } + public function getQueryFilter() + { + return $this->queryFilter; + } + + public function getTime() + { + return $this->time; + } + public function getUserHostBlacklist() { return $this->hostBlacklist; diff --git a/app/Models/Result.php b/app/Models/Result.php index 03349b5dfd1db2dfa1e3ffa1e20907d6d9bb62b7..529788464d4021f4f3739c7181e594243059e58d 100644 --- a/app/Models/Result.php +++ b/app/Models/Result.php @@ -31,7 +31,6 @@ class Result # Erstellt ein neues Ergebnis public function __construct($provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $gefVonLink, $sourceRank, $additionalInformation = []) { - $provider = simplexml_load_string($provider); $this->titel = strip_tags(trim($titel)); $this->link = trim($link); $this->anzeigeLink = trim($anzeigeLink); @@ -52,8 +51,8 @@ class Result $this->sourceRank = 20; } $this->sourceRank = 20 - $this->sourceRank; - if (isset($provider["engineBoost"])) { - $this->engineBoost = floatval($provider["engineBoost"]->__toString()); + if (isset($provider->{"engine-boost"})) { + $this->engineBoost = floatval($provider->{"engine-boost"}); } else { $this->engineBoost = 1; } @@ -275,22 +274,6 @@ class Result } } */ - /* Der Host-Filter der sicherstellt, - * dass von jedem Host maximal 3 Links angezeigt werden. - * Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch. - */ - if (($metager->getSite() === "" || $metager->getSite() === null) && - strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false && - strpos($this->strippedHost, "twitter.com") === false && - strpos($this->strippedHost, "www.ladenpreis.net") === false && - strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false && - strpos($this->strippedHost, "www.onenewspage.com") === false && - $this->gefVon !== "Shopzilla") { - $count = $metager->getHostCount($this->strippedHost); - if ($count >= 3) { - return false; - } - } /* Der Dublettenfilter, der sicher stellt, * dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben. diff --git a/app/Models/Searchengine.php b/app/Models/Searchengine.php index b6be56e91867530b6dc2d816a7cb353718ad360c..239e0ba67acd718014aa843d3d27f9b2fcc32599 100644 --- a/app/Models/Searchengine.php +++ b/app/Models/Searchengine.php @@ -12,10 +12,8 @@ abstract class Searchengine { use DispatchesJobs; - public $ch; # Curl Handle zum erhalten der Ergebnisse public $getString = ""; # Der String für die Get-Anfrage public $engine; # Die ursprüngliche Engine XML - public $enabled = true; # true, wenn die Suchmaschine nicht explizit disabled ist public $results = []; # Die geladenen Ergebnisse public $ads = []; # Die geladenen Werbungen public $products = []; # Die geladenen Produkte @@ -31,7 +29,7 @@ abstract class Searchengine public $startTime; # Die Zeit der Erstellung dieser Suchmaschine public $hash; # Der Hash-Wert dieser Suchmaschine - private $user; # Username für HTTP-Auth (falls angegeben) + private $username; # Username für HTTP-Auth (falls angegeben) private $password; # Passwort für HTTP-Auth (falls angegeben) private $headers; # Headers to add @@ -42,58 +40,40 @@ abstract class Searchengine public $write_time = 0; # Wird eventuell für Artefakte benötigt public $connection_time = 0; # Wird eventuell für Artefakte benötigt - public function __construct(\SimpleXMLElement $engine, MetaGer $metager) + public function __construct($name, \stdClass $engine, MetaGer $metager) { - # Versucht möglichst viele attribute aus dem engine XML zu laden - foreach ($engine->attributes() as $key => $value) { - $this->$key = $value->__toString(); - } - - # Standardhomepage metager.de - if (!isset($this->homepage)) { - $this->homepage = "https://metager.de"; - } - - # Speichert die XML der Engine - $this->engine = $engine->asXML(); + $this->engine = $engine; + $this->name = $name; # Cache Standarddauer 60 if (!isset($this->cacheDuration)) { $this->cacheDuration = 60; } - - $this->enabled = true; - - # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab: - if (isset($this->disabled) && strtotime($this->disabled) <= time()) { - # In diesem Fall ist der Timeout der Suchmaschine abgelaufen. - $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet."); - } elseif (isset($this->disabled) && strtotime($this->disabled) > time()) { - $this->enabled = false; - return; - } - $this->useragent = $metager->getUserAgent(); $this->ip = $metager->getIp(); $this->startTime = microtime(); + # check for http Auth + if (!empty($this->engine->{"http-auth-credentials"}->username) && !empty($this->engine->{"http-auth-credentials"}->password)) { + $this->username = $this->engine->{"http-auth-credentials"}->username; + $this->password = $this->engine->{"http-auth-credentials"}->password; + } - # Suchstring generieren - $q = ""; - if (isset($this->hasSiteSearch) && $this->hasSiteSearch === "1") { - if (strlen($metager->getSite()) === 0) { - $q = $metager->getQ(); - } else { - $q = $metager->getQ() . " site:" . $metager->getSite(); - } + $this->headers = $this->engine->{"request-header"}; - } else { - $q = $metager->getQ(); + # Suchstring generieren + $q = $metager->getQ(); + $filters = $metager->getSumaFile()->filter; + foreach ($metager->getQueryFilter() as $queryFilter => $filter) { + $filterOptions = $filters->$queryFilter; + $filterOptionsEngine = $filterOptions->sumas->{$this->name}; + $query = $filterOptionsEngine->prefix . $filter . $filterOptionsEngine->suffix; + $q = $query . " " . $q; } - $this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory()); - $this->hash = md5($this->host . $this->getString . $this->port . $this->name); + + $this->getString = $this->generateGetString($q); + $this->hash = md5($this->engine->host . $this->getString . $this->engine->port . $this->name); $this->resultHash = $metager->getHashCode(); $this->canCache = $metager->canCache(); - if (!isset($this->additionalHeaders)) {$this->additionalHeaders = "";} } abstract public function loadResults($result); @@ -107,6 +87,7 @@ abstract class Searchengine # Prüft, ob die Suche bereits gecached ist, ansonsted wird sie als Job dispatched public function startSearch(\App\MetaGer $metager) { + if ($this->canCache && Cache::has($this->hash)) { $this->cached = true; $this->retrieveResults($metager); @@ -118,18 +99,20 @@ abstract class Searchengine // <ResultHash>;<URL to fetch> // With <ResultHash> being the Hash Value where the fetcher will store the result. // and <URL to fetch> being the full URL to the searchengine + $url = ""; - if ($this->port === "443") { + if ($this->engine->port === 443) { $url = "https://"; } else { $url = "http://"; } - $url .= $this->host; - if ($this->port !== 80 && $this->port !== 443) { - $url .= ":" . $this->port; + $url .= $this->engine->host; + if ($this->engine->port !== 80 && $this->engine->port !== 443) { + $url .= ":" . $this->engine->port; } $url .= $this->getString; $url = base64_encode($url); + $mission = $this->resultHash . ";" . $url . ";" . $metager->getTime(); // Submit this mission to the corresponding Redis Queue // Since each Searcher is dedicated to one specific search engine @@ -176,7 +159,7 @@ abstract class Searchengine } if ($needSearcher && Redis::get($this->name) !== "locked") { Redis::set($this->name, "locked"); - $this->dispatch(new Searcher($this->name, $this->user, $this->password, $this->headers)); + $this->dispatch(new Searcher($this->name, $this->username, $this->password, $this->headers)); } } } @@ -189,46 +172,11 @@ abstract class Searchengine } } - # Magic ??? - private function setStatistic($key, $val) - { - - $oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses; - $newVal = ($oldVal + max($val, 0)) / $this->uses; - Redis::hset($this->name, $key, $newVal); - $this->$key = $newVal; - } - - # Entfernt wenn gesetzt das disabled="1" für diese Suchmaschine aus der sumas.xml - public function enable($sumaFile, $message) - { - $xml = simplexml_load_file($sumaFile); - unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']); - $xml->saveXML($sumaFile); - $this->enabled = true; - } - public function setResultHash($hash) { $this->resultHash = $hash; } - public function closeFp() - { - fclose($this->fp); - } - - # Öffnet einen neuen Socket für diese Engine - public function getSocket() - { - $number = Redis::hget('search.' . $this->hash, $this->name); - if ($number === null) { - return null; - } else { - return pfsockopen($this->getHost() . ":" . $this->port . "/$number", $this->port, $errstr, $errno, 1); - } - } - # Fragt die Ergebnisse von Redis ab und lädt Sie public function retrieveResults(MetaGer $metager) { @@ -256,71 +204,34 @@ abstract class Searchengine } } - public function shutdown() - { - Redis::del($this->host . "." . $this->socketNumber); - } - - # Erstellt den für die Get-Anfrage genutzten Host-Link - protected function getHost() - { - $return = ""; - if ($this->port === "443") { - $return .= "tls://"; - } else { - $return .= "tcp://"; - } - $return .= $this->host; - return $return; - } - # Erstellt den für die Get-Anfrage genutzten String - private function generateGetString($query, $url, $language, $category) + protected function generateGetString($query) { $getString = ""; # Skript: - if (strlen($this->skript) > 0) { - $getString .= $this->skript; + if (!empty($this->engine->path)) { + $getString .= $this->engine->path; } else { $getString .= "/"; } - # FormData: - if (strlen($this->formData) > 0) { - $getString .= "?" . $this->formData; - } - - # Wir müssen noch einige Platzhalter in dem GET-String ersetzen: - # Useragent - if (strpos($getString, "<<USERAGENT>>")) { - $getString = str_replace("<<USERAGENT>>", $this->urlEncode($this->useragent), $getString); - } - - # Query - if (strpos($getString, "<<QUERY>>")) { - $getString = str_replace("<<QUERY>>", $this->urlEncode($query), $getString); + $getString .= "?"; + $parameter = []; + foreach ($this->engine->{"get-parameter"} as $key => $value) { + $parameter[] = $this->urlEncode($key) . "=" . $this->urlEncode($value); } + $getString .= implode("&", $parameter); - # IP - if (strpos($getString, "<<IP>>")) { - $getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString); - } - - # Language - if (strpos($getString, "<<LANGUAGE>>")) { - $getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString); - } - - # Category - if (strpos($getString, "<<CATEGORY>>")) { - $getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString); - } + # Append the Query String + $getString .= "&" . $this->engine->{"query-parameter"} . "=" . $this->urlEncode($query); +/* +die(var_dump($getString)); - # Affildata - if (strpos($getString, "<<AFFILDATA>>")) { - $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString); - } +# Affildata +if (strpos($getString, "<<AFFILDATA>>")) { +$getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString); +}*/ return $getString; } @@ -333,43 +244,4 @@ abstract class Searchengine return urlencode($string); } } - - # Liefert Sonderdaten für Yahoo - private function getOvertureAffilData($url) - { - $affil_data = 'ip=' . $this->ip; - $affil_data .= '&ua=' . $this->useragent; - $affilDataValue = $this->urlEncode($affil_data); - # Wir benötigen die ServeUrl: - $serveUrl = $this->urlEncode($url); - - return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl; - } - - public function isEnabled() - { - return $this->enabled; - } - - # Artefaktmethoden - - public function getCurlInfo() - { - return curl_getinfo($this->ch); - } - - public function getCurlErrors() - { - return curl_errno($this->ch); - } - - public function addCurlHandle($mh) - { - curl_multi_add_handle($mh, $this->ch); - } - - public function removeCurlHandle($mh) - { - curl_multi_remove_handle($mh, $this->ch); - } } diff --git a/app/Models/parserSkripte/Bing.php b/app/Models/parserSkripte/Bing.php deleted file mode 100644 index af6ebc9fe882d9889349159903906f631163f430..0000000000000000000000000000000000000000 --- a/app/Models/parserSkripte/Bing.php +++ /dev/null @@ -1,46 +0,0 @@ -<?php - -namespace app\Models\parserSkripte; - -use App\Models\Searchengine; -use Symfony\Component\DomCrawler\Crawler; -use Log; - -class Bing extends Searchengine -{ - public $results = []; - - public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager) - { - parent::__construct($engine, $metager); - } - - public function loadResults($result) - { - try { - $crawler = new Crawler($result); - $crawler->filter('ol#b_results > li.b_algo')->each(function (Crawler $node, $i) { - $title = $node->filter('li h2 > a')->text(); - $link = $node->filter('li h2 > a')->attr('href'); - $anzeigeLink = $link; - $descr = $node->filter('li div > p')->text(); - - $this->counter++; - $this->results[] = new \App\Models\Result( - $this->engine, - $title, - $link, - $anzeigeLink, - $descr, - $this->displayName,$this->homepage, - $this->counter - ); - }); - } catch (\Exception $e) { - Log::error("A problem occurred parsing results from $this->name:"); - Log::error($e->getMessage()); - return; - } - - } -} diff --git a/app/Models/parserSkripte/BingBilder.php b/app/Models/parserSkripte/BingBilder.php index cd360e2fe165aee30af9eb3cdc7f0aa990c0f8d0..1587e230fb4a14c015940a25239a8969c60e1421 100644 --- a/app/Models/parserSkripte/BingBilder.php +++ b/app/Models/parserSkripte/BingBilder.php @@ -9,9 +9,9 @@ class BingBilder extends Searchengine { public $results = []; - public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager) + public function __construct($name, \stdClass $engine, \App\MetaGer $metager) { - parent::__construct($engine, $metager); + parent::__construct($name, $engine, $metager); } public function loadResults($result) @@ -33,7 +33,7 @@ class BingBilder extends Searchengine $link, $anzeigeLink, $descr, - $this->displayName, $this->homepage, + $this->engine->{"display-name"}, $this->engine->homepage, $this->counter, ['image' => $image] ); @@ -59,12 +59,9 @@ class BingBilder extends Searchengine return; } - $next = new BingBilder(simplexml_load_string($this->engine), $metager); - if (\str_contains($next->getString, "&offset=")) { - $next->getString = preg_replace("/&offset=.*/si", "", $next->getString); - } - $next->getString .= "&offset=" . $nextOffset; - $next->hash = md5($next->host . $next->getString . $next->port . $next->name); + $newEngine = unserialize(serialize($this->engine)); + $newEngine->{"get-parameter"}->offset = $nextOffset; + $next = new BingBilder($this->name, $newEngine, $metager); $this->next = $next; } catch (\Exception $e) { diff --git a/app/Models/parserSkripte/Bing_bilder.php b/app/Models/parserSkripte/Bing_bilder.php deleted file mode 100644 index 56dc4fd21ed1ff81bb938dab7bff5017cdf74f68..0000000000000000000000000000000000000000 --- a/app/Models/parserSkripte/Bing_bilder.php +++ /dev/null @@ -1,41 +0,0 @@ -<?php - -namespace app\Models\parserSkripte; - -use App\Models\Searchengine; -use Symfony\Component\DomCrawler\Crawler; - -class Bing_bilder extends Searchengine -{ - public $results = []; - - public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager) - { - parent::__construct($engine, $metager); - } - - public function loadResults($result) - { - $crawler = new Crawler($result); - $crawler->filter('div#b_content div.item')->each(function (Crawler $node, $i) { - $title = $node->filter('div.meta > a.tit')->text(); - $link = $node->filter('div.meta > a.tit')->attr("href"); - $anzeigeLink = $link; - $descr = $node->filter('div.meta > div.des')->text(); - $image = $node->filter('a.thumb img')->attr("src"); - - $this->counter++; - $this->results[] = new \App\Models\Result( - $this->engine, - $title, - $link, - $anzeigeLink, - $descr, - $this->displayName,$this->homepage, - $this->counter, - ['image' => $image] - ); - }); - - } -} diff --git a/app/Models/parserSkripte/Overture.php b/app/Models/parserSkripte/Overture.php index c54d531f2732facdd035c9dcab06b5e00e39faf4..c8351132db27d09e398edcec27137e82652e1b85 100644 --- a/app/Models/parserSkripte/Overture.php +++ b/app/Models/parserSkripte/Overture.php @@ -9,9 +9,12 @@ class Overture extends Searchengine { public $results = []; - public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager) + public function __construct($name, \stdClass $engine, \App\MetaGer $metager) { - parent::__construct($engine, $metager); + parent::__construct($name, $engine, $metager); + # We need some Affil-Data for the advertisements + $this->getString .= $this->getOvertureAffilData($metager->getUrl()); + $this->hash = md5($this->engine->host . $this->getString . $this->engine->port . $this->name); } public function loadResults($result) @@ -36,8 +39,8 @@ class Overture extends Searchengine $link, $anzeigeLink, $descr, - $this->displayName, - $this->homepage, + $this->engine->{"display-name"}, + $this->engine->homepage, $this->counter ); } @@ -56,7 +59,7 @@ class Overture extends Searchengine $link, $anzeigeLink, $descr, - $this->displayName, $this->homepage, + $this->engine->{"display-name"}, $this->engine->homepage, $this->counter ); } @@ -112,9 +115,21 @@ class Overture extends Searchengine } # Erstellen des neuen Suchmaschinenobjekts und anpassen des GetStrings: - $next = new Overture(simplexml_load_string($this->engine), $metager); + $next = new Overture($this->name, $this->engine, $metager); $next->getString = preg_replace("/&Keywords=.*?&/si", "&", $next->getString) . "&" . $nextArgs; - $next->hash = md5($next->host . $next->getString . $next->port . $next->name); + $next->hash = md5($next->engine->host . $next->getString . $next->engine->port . $next->name); $this->next = $next; } + + # Liefert Sonderdaten für Yahoo + private function getOvertureAffilData($url) + { + $affil_data = 'ip=' . $this->ip; + $affil_data .= '&ua=' . $this->useragent; + $affilDataValue = $this->urlEncode($affil_data); + # Wir benötigen die ServeUrl: + $serveUrl = $this->urlEncode($url); + + return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl; + } } diff --git a/app/Models/parserSkripte/Scopia.php b/app/Models/parserSkripte/Scopia.php index 8516eacb0d032c6ee774755c0fc7f3485effa08a..29a1b5b7cd56f7b9ebab66fec0e36fb30dffc006 100644 --- a/app/Models/parserSkripte/Scopia.php +++ b/app/Models/parserSkripte/Scopia.php @@ -9,9 +9,9 @@ class Scopia extends Searchengine { public $results = []; - public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager) + public function __construct($name, \stdClass $engine, \App\MetaGer $metager) { - parent::__construct($engine, $metager); + parent::__construct($name, $engine, $metager); } public function loadResults($result) @@ -30,6 +30,7 @@ class Scopia extends Searchengine $results = $content->xpath('//results/result'); foreach ($results as $result) { + $title = $result->title->__toString(); $link = $result->url->__toString(); $anzeigeLink = $link; @@ -41,8 +42,8 @@ class Scopia extends Searchengine $link, $anzeigeLink, $descr, - $this->displayName, - $this->homepage, + $this->engine->{"display-name"}, + $this->engine->homepage, $this->counter ); } @@ -76,9 +77,9 @@ class Scopia extends Searchengine $results = $content->xpath('//results/result'); $number = $results[sizeof($results) - 1]->number->__toString(); # Erstellen des neuen Suchmaschinenobjekts und anpassen des GetStrings: - $next = new Scopia(simplexml_load_string($this->engine), $metager); - $next->getString = preg_replace("/\\?s=.*?&/si", "?s=" . $number, $next->getString); - $next->hash = md5($next->host . $next->getString . $next->port . $next->name); + $newEngine = unserialize(serialize($this->engine)); + $newEngine->{"get-parameter"}->s = $number; + $next = new Scopia($this->name, $newEngine, $metager); $this->next = $next; } diff --git a/resources/lang/de/metaGer.php b/resources/lang/de/metaGer.php index 4668449ec948619ed7210db39f7b6893e21ccc33..dff2750917df77dcac326c89c9771a82e2dd71ba 100644 --- a/resources/lang/de/metaGer.php +++ b/resources/lang/de/metaGer.php @@ -7,6 +7,7 @@ return [ 'settings.noneSelected' => 'Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt.', 'engines.noParser' => 'Beim Abfragen von :engine ist ein Fehler aufgetreten. Bitte benachrichtigen Sie uns unter: office@suma-ev.de', + 'engines.noSpecialSearch' => 'Für eine Ihrer Filteroptionen gab es keine Suchmaschine im aktuellen Fokus (:fokus), welche diese unterstützen würde. Sie haben folgende(n) Filter aktiv: ":filter".', 'formdata.cantLoad' => 'Suma-File konnte nicht gefunden werden', 'formdata.noSearch' => 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.', @@ -20,4 +21,5 @@ return [ 'sitesearch.failed' => 'Sie wollten eine Sitesearch auf :site durchführen. Leider unterstützen die eingestellten Suchmaschinen diese nicht. Sie können die Sitesearch im Web-Fokus durchführen. Es werden ihnen Ergebnisse ohne Sitesearch angezeigt.', 'sitesearch.success' => 'Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: ":site" angezeigt.', 'feedback' => 'Nichts Passendes dabei? Geben Sie uns Feedback: ', + 'filter.sitesearch' => 'Sitesearch', ];