diff --git a/app/MetaGer.php.orig b/app/MetaGer.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..06df2d58fec963a7e1fa9c8dc554798a653c7d6f --- /dev/null +++ b/app/MetaGer.php.orig @@ -0,0 +1,728 @@ +<?php +namespace App; + +use Illuminate\Http\Request; +use Jenssegers\Agent\Agent; +use App; +use Storage; +use Log; +use App\lib\TextLanguageDetect\TextLanguageDetect; +use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException; +use Illuminate\Pagination\LengthAwarePaginator; +use Illuminate\Support\Collection; +#use \Illuminate\Pagination\Paginator; + +class MetaGer +{ + # Einstellungen für die Suche + protected $fokus; + protected $eingabe; + protected $q; + protected $category; + protected $time; + protected $page; + protected $lang; + protected $cache = ""; + protected $site; + protected $hostBlacklist = []; + protected $domainBlacklist = []; + protected $stopWords = []; + protected $engines = []; + protected $results = []; + protected $ads = []; + protected $warnings = []; + protected $errors = []; + protected $addedHosts = []; + # Daten über die Abfrage + protected $ip; + protected $language; + protected $agent; + # Konfigurationseinstellungen: + protected $sumaFile; + protected $mobile; + protected $resultCount; + protected $sprueche; + protected $domainsBlacklisted = []; + protected $urlsBlacklisted = []; + protected $url; + protected $languageDetect; + + function __construct() + { + $this->time = microtime(); + define('CRLF', "\r\n"); + define('BUFFER_LENGTH', 8192); + if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") ) + { + # Blacklists einlesen: + $tmp = file_get_contents(config_path() . "/blacklistDomains.txt"); + $this->domainsBlacklisted = explode("\n", $tmp); + $tmp = file_get_contents(config_path() . "/blacklistUrl.txt"); + $this->urlsBlacklisted = explode("\n", $tmp); + }else + { + Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); + } + + $this->languageDetect = new TextLanguageDetect(); + $this->languageDetect->setNameMode("2"); + } + + public function rankAll () + { + foreach( $this->engines as $engine ) + { + $engine->rank($this); + } + } + + public function createView() + { + $viewResults = []; + + # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View: + foreach($this->results as $result) + { + $viewResults[] = get_object_vars($result); + } + + switch ($this->out) { + case 'results': + return view('metager3results') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + default: + return view('metager3') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + } + } + + public function removeInvalids () + { + $results = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $results[] = $result; + } + #$this->results = $results; + } + + public function combineResults () + { + foreach($this->engines as $engine) + { + foreach($engine->results as $result) + { + if($result->valid) + $this->results[] = $result; + } + foreach($engine->ads as $ad) + { + $this->ads[] = $ad; + } + } + uasort($this->results, function($a, $b){ + if($a->getRank() == $b->getRank()) + return 0; + return ($a->getRank() < $b->getRank()) ? 1 : -1; + }); + # Validate Results + $newResults = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $newResults[] = $result; + } + $this->results = $newResults; + + $counter = 0; + $firstRank = 0; + foreach($this->results as $result) + { + if($counter === 0) + $firstRank = $result->rank; + $counter++; + $result->number = $counter; + $confidence = 0; + if($firstRank > 0) + $confidence = $result->rank/$firstRank; + else + $confidence = 0; + if($confidence > 0.65) + $result->color = "#FF4000"; + elseif($confidence > 0.4) + $result->color = "#FF0080"; + elseif($confidence > 0.2) + $result->color = "#C000C0"; + else + $result->color = "#000000"; + } + + //Get current page form url e.g. &page=6 + $currentPage = LengthAwarePaginator::resolveCurrentPage(); + $offset= $currentPage-1; + + //Create a new Laravel collection from the array data + $collection = new Collection($this->results); + + //Define how many items we want to be visible in each page + $perPage = $this->resultCount; + + //Slice the collection to get the items to display in current page + $currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all(); + + //Create our paginator and pass it to the view + $paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage); + $paginatedSearchResults->setPath('/meta/meta.ger3'); + foreach($this->request->all() as $key => $value) + { + $paginatedSearchResults->addQuery($key, $value); + } + + $this->results = $paginatedSearchResults; + } + + public function createSearchEngines (Request $request) + { + + #die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443)); + + + # Überprüfe, welche Sumas eingeschaltet sind + $xml = simplexml_load_file($this->sumaFile); + $enabledSearchengines = []; + $overtureEnabled = FALSE; + + if($this->fokus === "angepasst") + { + $sumas = $xml->xpath("suma"); + /**$maxSumas = 30; + $count = 0; + foreach($sumas as $suma) + { + if($maxSumas === $count) + break; + $enabledSearchengines[] = $suma; + $count++; + }**/ + foreach($sumas as $suma) + { + if($request->has($suma["service"]) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + #|| 1 === 1 #Todo: entfernen + ){ + + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + + $enabledSearchengines[] = $suma; + } + } + } + }else{ + $sumas = $xml->xpath("suma"); + foreach($sumas as $suma){ + $types = explode(",",$suma["type"]); + if(in_array($this->fokus, $types) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + ){ + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } + } + } + } + + if( ( $this->fokus !== "bilder" && sizeof($enabledSearchengines) <= 3 ) || ( $this->fokus === "bilder" && sizeof($enabledSearchengines) === 0) ) + { + $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt."; + } + + $engines = []; + foreach($enabledSearchengines as $engine){ + + if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1")) + { + continue; + } + # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll + + $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); + + $time = microtime(); + $tmp = new $path($engine, $this); + + if($tmp->enabled && isset($this->debug)) + { + $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000); + } + + if($tmp->isEnabled()) + { + $engines[] = $tmp; + $this->sockets[$tmp->name] = $tmp->fp; + } + } + + # Nun passiert ein elementarer Schritt. + # Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können. + # aber natürlich nicht ewig. + # Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet. + # Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten. + usleep(500000); + # Jetzt lesen wir alles aus, was da ist und verwerfen den Rest: + foreach($engines as $engine) + { + $engine->retrieveResults(); + } + + + $this->engines = $engines; + } + + public function parseFormData (Request $request) + { + if($request->input('encoding', '') !== "utf8") + { + # In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen + $input = $request->all(); + foreach($input as $key => $value) + { + $input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1"); + } + $request->replace($input); + } + $this->url = $request->url(); + # Zunächst überprüfen wir die eingegebenen Einstellungen: + # FOKUS + $this->fokus = trans('fokiNames.' + . $request->input('focus', 'web')); + if(strpos($this->fokus,".")) + { + $this->fokus = trans('fokiNames.web'); + } + + # SUMA-FILE + if(App::isLocale("en")){ + $this->sumaFile = config_path() . "/sumasEn.xml"; + }else{ + $this->sumaFile = config_path() . "/sumas.xml"; + } + if(!file_exists($this->sumaFile)) + { + die("Suma-File konnte nicht gefunden werden"); + } + + # Sucheingabe: + $this->eingabe = trim($request->input('eingabe', '')); + if(strlen($this->eingabe) === 0) + { + $this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.'; + } + $this->q = $this->eingabe; + + # IP: + if( isset($_SERVER['HTTP_FROM']) ) + { + $this->ip = $_SERVER['HTTP_FROM']; + }else + { + $this->ip = "127.0.0.1"; + } + # Language: + if( isset($_SERVER['HTTP_LANGUAGE']) ) + { + $this->language = $_SERVER['HTTP_LANGUAGE']; + }else + { + $this->language = ""; + } + # Category + $this->category = $request->input('category', ''); + # Request Times: + $this->time = $request->input('time', 1); + # Page + $this->page = $request->input('page', 1); + # Lang + $this->lang = $request->input('lang', 'all'); + if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" ) + { + $this->lang = "all"; + } + $this->agent = new Agent(); + $this->mobile = $this->agent->isMobile(); + #Sprüche + $this->sprueche = $request->input('sprueche', 'on'); + # Ergebnisse pro Seite: + $this->resultCount = $request->input('resultCount', '20'); + + # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden: + if( $request->has('dart') ) + { + $this->time = 10; + $this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt."; + } + if( $this->time < 0 || $this->time > 20 ) + { + $this->time = 1; + } + if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) ) + { + $input = $request->all(); + $newInput = []; + foreach($input as $key => $value) + { + if( $key !== "fportal" && $key !== "harvest" ) + { + $newInput[$key] = $value; + } + } + $request->replace($newInput); + } + if( $request->has('ebay') ) + { + $this->time = 2; + $this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt."; + } + if( App::isLocale("en") ) + { + $this->sprueche = "off"; + } + if($this->resultCount <= 0 || $this->resultCount > 200 ) + { + $this->resultCount = 1000; + } + if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') ) + { + $this->time = 5000; + $this->cache = "cache"; + } + if( $request->has('tab')) + { + if($request->input('tab') === "1") + { + $this->tab = "_blank"; + }else + { + $this->tab = "_self"; + } + }else + { + $this->tab = "_blank"; + } + $this->out = $request->input('out', "html"); + if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style") + $this->out = "html"; + $this->request = $request; + } + + public function checkSpecialSearches (Request $request) + { + # Site Search: + if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match)) + { + $this->site = $match[2]; + $this->q = $match[1] . $match[3]; + $this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt."; + } + # Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match)) + { + $this->hostBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->hostBlacklist) > 0 ) + { + $hostString = ""; + foreach($this->hostBlacklist as $host) + { + $hostString .= $host . ", "; + } + $hostString = rtrim($hostString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\""; + } + # Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match)) + { + $this->domainBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->domainBlacklist) > 0 ) + { + $domainString = ""; + foreach($this->domainBlacklist as $domain) + { + $domainString .= $domain . ", "; + } + $domainString = rtrim($domainString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\""; + } + + # Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden. + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match)) + { + $this->stopWords[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->stopWords) > 0 ) + { + $stopwordsString = ""; + foreach($this->stopWords as $stopword) + { + $stopwordsString .= $stopword . ", "; + } + $stopwordsString = rtrim($stopwordsString, ", "); + $this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\""; + } + + # Meldung über eine Phrasensuche + if(preg_match("/\"(.+)\"/si", $this->q, $match)){ + $this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\""; + } + } + + public function getFokus () + { + return $this->fokus; + } + + public function getIp () + { + return $this->ip; + } + + public function getEingabe () + { + return $this->eingabe; + } + + public function getQ () + { + if(strlen($this->site) > 0) + return $this->q . " site:" . $this->site; + else + return $this->q; + } + + public function getUrl () + { + return $this->url; + } + public function getTime () + { + return $this->time; + } + + public function getLanguage () + { + return $this->language; + } + + public function getLang () + { + return $this->lang; + } + + public function getSprueche () + { + return $this->sprueche; + } + + public function getCategory () + { + return $this->category; + } + + public function getSumaFile () + { + return $this->sumaFile; + } + + public function getUserHostBlacklist () + { + return $this->hostBlacklist; + } + + public function getUserDomainBlacklist () + { + return $this->domainBlacklist; + } + + public function getDomainBlacklist () + { + return $this->domainsBlacklisted; + } + + public function getUrlBlacklist () + { + return $this->urlsBlacklisted; + } + public function getLanguageDetect () + { + return $this->languageDetect; + } + public function getStopWords () + { + return $this->stopWords; + } +<<<<<<< HEAD + public function getHostCount($host) +======= + public function getHostCount(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(isset($this->addedHosts[$host])) + { + return $this->addedHosts[$host]; + }else + { + return 0; + } + } +<<<<<<< HEAD + public function addHostCount($host) +======= + public function addHostCount(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $hash = md5($host); + if(isset($this->addedHosts[$hash])) + { + $this->addedHosts[$hash] += 1; + }else + { + $this->addedHosts[$hash] = 1; + } + } + public function getSite() + { + return $this->site; + } +<<<<<<< HEAD + public function addLink($link) +======= + public function addLink(String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $hash = md5($link); + if(isset($this->addedLinks[$hash])) + { + return false; + }else + { + $this->addedLinks[$hash] = 1; + + return true; + } + } + +<<<<<<< HEAD + public function generateSearchLink($fokus) +======= + public function generateSearchLink(String $fokus) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $requestData = $this->request->except('page'); + $requestData['focus'] = $fokus; + $requestData['out'] = "results"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateQuicktipLink() + { + $link = action('MetaGerSearch@quicktips'); + + return $link; + } + +<<<<<<< HEAD + public function generateSiteSearchLink($host) +======= + public function generateSiteSearchLink(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " site:$host"; + $requestData['focus'] = "web"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + +<<<<<<< HEAD + public function generateRemovedHostLink ($host) +======= + public function generateRemovedHostLink (String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -host:$host"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + +<<<<<<< HEAD + public function generateRemovedDomainLink ($domain) +======= + public function generateRemovedDomainLink (String $domain) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $domain = urlencode($domain); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -domain:$domain"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function getTab () + { + return $this->tab; + } + public function getResults () + { + return $this->results; + } + public function popAd() + { + if(count($this->ads) > 0) + return get_object_vars(array_shift($this->ads)); + else + return null; + } +} \ No newline at end of file diff --git a/app/MetaGer_BACKUP_9840.php b/app/MetaGer_BACKUP_9840.php new file mode 100644 index 0000000000000000000000000000000000000000..06df2d58fec963a7e1fa9c8dc554798a653c7d6f --- /dev/null +++ b/app/MetaGer_BACKUP_9840.php @@ -0,0 +1,728 @@ +<?php +namespace App; + +use Illuminate\Http\Request; +use Jenssegers\Agent\Agent; +use App; +use Storage; +use Log; +use App\lib\TextLanguageDetect\TextLanguageDetect; +use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException; +use Illuminate\Pagination\LengthAwarePaginator; +use Illuminate\Support\Collection; +#use \Illuminate\Pagination\Paginator; + +class MetaGer +{ + # Einstellungen für die Suche + protected $fokus; + protected $eingabe; + protected $q; + protected $category; + protected $time; + protected $page; + protected $lang; + protected $cache = ""; + protected $site; + protected $hostBlacklist = []; + protected $domainBlacklist = []; + protected $stopWords = []; + protected $engines = []; + protected $results = []; + protected $ads = []; + protected $warnings = []; + protected $errors = []; + protected $addedHosts = []; + # Daten über die Abfrage + protected $ip; + protected $language; + protected $agent; + # Konfigurationseinstellungen: + protected $sumaFile; + protected $mobile; + protected $resultCount; + protected $sprueche; + protected $domainsBlacklisted = []; + protected $urlsBlacklisted = []; + protected $url; + protected $languageDetect; + + function __construct() + { + $this->time = microtime(); + define('CRLF', "\r\n"); + define('BUFFER_LENGTH', 8192); + if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") ) + { + # Blacklists einlesen: + $tmp = file_get_contents(config_path() . "/blacklistDomains.txt"); + $this->domainsBlacklisted = explode("\n", $tmp); + $tmp = file_get_contents(config_path() . "/blacklistUrl.txt"); + $this->urlsBlacklisted = explode("\n", $tmp); + }else + { + Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); + } + + $this->languageDetect = new TextLanguageDetect(); + $this->languageDetect->setNameMode("2"); + } + + public function rankAll () + { + foreach( $this->engines as $engine ) + { + $engine->rank($this); + } + } + + public function createView() + { + $viewResults = []; + + # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View: + foreach($this->results as $result) + { + $viewResults[] = get_object_vars($result); + } + + switch ($this->out) { + case 'results': + return view('metager3results') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + default: + return view('metager3') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + } + } + + public function removeInvalids () + { + $results = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $results[] = $result; + } + #$this->results = $results; + } + + public function combineResults () + { + foreach($this->engines as $engine) + { + foreach($engine->results as $result) + { + if($result->valid) + $this->results[] = $result; + } + foreach($engine->ads as $ad) + { + $this->ads[] = $ad; + } + } + uasort($this->results, function($a, $b){ + if($a->getRank() == $b->getRank()) + return 0; + return ($a->getRank() < $b->getRank()) ? 1 : -1; + }); + # Validate Results + $newResults = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $newResults[] = $result; + } + $this->results = $newResults; + + $counter = 0; + $firstRank = 0; + foreach($this->results as $result) + { + if($counter === 0) + $firstRank = $result->rank; + $counter++; + $result->number = $counter; + $confidence = 0; + if($firstRank > 0) + $confidence = $result->rank/$firstRank; + else + $confidence = 0; + if($confidence > 0.65) + $result->color = "#FF4000"; + elseif($confidence > 0.4) + $result->color = "#FF0080"; + elseif($confidence > 0.2) + $result->color = "#C000C0"; + else + $result->color = "#000000"; + } + + //Get current page form url e.g. &page=6 + $currentPage = LengthAwarePaginator::resolveCurrentPage(); + $offset= $currentPage-1; + + //Create a new Laravel collection from the array data + $collection = new Collection($this->results); + + //Define how many items we want to be visible in each page + $perPage = $this->resultCount; + + //Slice the collection to get the items to display in current page + $currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all(); + + //Create our paginator and pass it to the view + $paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage); + $paginatedSearchResults->setPath('/meta/meta.ger3'); + foreach($this->request->all() as $key => $value) + { + $paginatedSearchResults->addQuery($key, $value); + } + + $this->results = $paginatedSearchResults; + } + + public function createSearchEngines (Request $request) + { + + #die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443)); + + + # Überprüfe, welche Sumas eingeschaltet sind + $xml = simplexml_load_file($this->sumaFile); + $enabledSearchengines = []; + $overtureEnabled = FALSE; + + if($this->fokus === "angepasst") + { + $sumas = $xml->xpath("suma"); + /**$maxSumas = 30; + $count = 0; + foreach($sumas as $suma) + { + if($maxSumas === $count) + break; + $enabledSearchengines[] = $suma; + $count++; + }**/ + foreach($sumas as $suma) + { + if($request->has($suma["service"]) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + #|| 1 === 1 #Todo: entfernen + ){ + + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + + $enabledSearchengines[] = $suma; + } + } + } + }else{ + $sumas = $xml->xpath("suma"); + foreach($sumas as $suma){ + $types = explode(",",$suma["type"]); + if(in_array($this->fokus, $types) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + ){ + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } + } + } + } + + if( ( $this->fokus !== "bilder" && sizeof($enabledSearchengines) <= 3 ) || ( $this->fokus === "bilder" && sizeof($enabledSearchengines) === 0) ) + { + $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt."; + } + + $engines = []; + foreach($enabledSearchengines as $engine){ + + if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1")) + { + continue; + } + # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll + + $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); + + $time = microtime(); + $tmp = new $path($engine, $this); + + if($tmp->enabled && isset($this->debug)) + { + $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000); + } + + if($tmp->isEnabled()) + { + $engines[] = $tmp; + $this->sockets[$tmp->name] = $tmp->fp; + } + } + + # Nun passiert ein elementarer Schritt. + # Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können. + # aber natürlich nicht ewig. + # Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet. + # Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten. + usleep(500000); + # Jetzt lesen wir alles aus, was da ist und verwerfen den Rest: + foreach($engines as $engine) + { + $engine->retrieveResults(); + } + + + $this->engines = $engines; + } + + public function parseFormData (Request $request) + { + if($request->input('encoding', '') !== "utf8") + { + # In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen + $input = $request->all(); + foreach($input as $key => $value) + { + $input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1"); + } + $request->replace($input); + } + $this->url = $request->url(); + # Zunächst überprüfen wir die eingegebenen Einstellungen: + # FOKUS + $this->fokus = trans('fokiNames.' + . $request->input('focus', 'web')); + if(strpos($this->fokus,".")) + { + $this->fokus = trans('fokiNames.web'); + } + + # SUMA-FILE + if(App::isLocale("en")){ + $this->sumaFile = config_path() . "/sumasEn.xml"; + }else{ + $this->sumaFile = config_path() . "/sumas.xml"; + } + if(!file_exists($this->sumaFile)) + { + die("Suma-File konnte nicht gefunden werden"); + } + + # Sucheingabe: + $this->eingabe = trim($request->input('eingabe', '')); + if(strlen($this->eingabe) === 0) + { + $this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.'; + } + $this->q = $this->eingabe; + + # IP: + if( isset($_SERVER['HTTP_FROM']) ) + { + $this->ip = $_SERVER['HTTP_FROM']; + }else + { + $this->ip = "127.0.0.1"; + } + # Language: + if( isset($_SERVER['HTTP_LANGUAGE']) ) + { + $this->language = $_SERVER['HTTP_LANGUAGE']; + }else + { + $this->language = ""; + } + # Category + $this->category = $request->input('category', ''); + # Request Times: + $this->time = $request->input('time', 1); + # Page + $this->page = $request->input('page', 1); + # Lang + $this->lang = $request->input('lang', 'all'); + if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" ) + { + $this->lang = "all"; + } + $this->agent = new Agent(); + $this->mobile = $this->agent->isMobile(); + #Sprüche + $this->sprueche = $request->input('sprueche', 'on'); + # Ergebnisse pro Seite: + $this->resultCount = $request->input('resultCount', '20'); + + # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden: + if( $request->has('dart') ) + { + $this->time = 10; + $this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt."; + } + if( $this->time < 0 || $this->time > 20 ) + { + $this->time = 1; + } + if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) ) + { + $input = $request->all(); + $newInput = []; + foreach($input as $key => $value) + { + if( $key !== "fportal" && $key !== "harvest" ) + { + $newInput[$key] = $value; + } + } + $request->replace($newInput); + } + if( $request->has('ebay') ) + { + $this->time = 2; + $this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt."; + } + if( App::isLocale("en") ) + { + $this->sprueche = "off"; + } + if($this->resultCount <= 0 || $this->resultCount > 200 ) + { + $this->resultCount = 1000; + } + if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') ) + { + $this->time = 5000; + $this->cache = "cache"; + } + if( $request->has('tab')) + { + if($request->input('tab') === "1") + { + $this->tab = "_blank"; + }else + { + $this->tab = "_self"; + } + }else + { + $this->tab = "_blank"; + } + $this->out = $request->input('out', "html"); + if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style") + $this->out = "html"; + $this->request = $request; + } + + public function checkSpecialSearches (Request $request) + { + # Site Search: + if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match)) + { + $this->site = $match[2]; + $this->q = $match[1] . $match[3]; + $this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt."; + } + # Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match)) + { + $this->hostBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->hostBlacklist) > 0 ) + { + $hostString = ""; + foreach($this->hostBlacklist as $host) + { + $hostString .= $host . ", "; + } + $hostString = rtrim($hostString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\""; + } + # Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match)) + { + $this->domainBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->domainBlacklist) > 0 ) + { + $domainString = ""; + foreach($this->domainBlacklist as $domain) + { + $domainString .= $domain . ", "; + } + $domainString = rtrim($domainString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\""; + } + + # Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden. + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match)) + { + $this->stopWords[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->stopWords) > 0 ) + { + $stopwordsString = ""; + foreach($this->stopWords as $stopword) + { + $stopwordsString .= $stopword . ", "; + } + $stopwordsString = rtrim($stopwordsString, ", "); + $this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\""; + } + + # Meldung über eine Phrasensuche + if(preg_match("/\"(.+)\"/si", $this->q, $match)){ + $this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\""; + } + } + + public function getFokus () + { + return $this->fokus; + } + + public function getIp () + { + return $this->ip; + } + + public function getEingabe () + { + return $this->eingabe; + } + + public function getQ () + { + if(strlen($this->site) > 0) + return $this->q . " site:" . $this->site; + else + return $this->q; + } + + public function getUrl () + { + return $this->url; + } + public function getTime () + { + return $this->time; + } + + public function getLanguage () + { + return $this->language; + } + + public function getLang () + { + return $this->lang; + } + + public function getSprueche () + { + return $this->sprueche; + } + + public function getCategory () + { + return $this->category; + } + + public function getSumaFile () + { + return $this->sumaFile; + } + + public function getUserHostBlacklist () + { + return $this->hostBlacklist; + } + + public function getUserDomainBlacklist () + { + return $this->domainBlacklist; + } + + public function getDomainBlacklist () + { + return $this->domainsBlacklisted; + } + + public function getUrlBlacklist () + { + return $this->urlsBlacklisted; + } + public function getLanguageDetect () + { + return $this->languageDetect; + } + public function getStopWords () + { + return $this->stopWords; + } +<<<<<<< HEAD + public function getHostCount($host) +======= + public function getHostCount(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(isset($this->addedHosts[$host])) + { + return $this->addedHosts[$host]; + }else + { + return 0; + } + } +<<<<<<< HEAD + public function addHostCount($host) +======= + public function addHostCount(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $hash = md5($host); + if(isset($this->addedHosts[$hash])) + { + $this->addedHosts[$hash] += 1; + }else + { + $this->addedHosts[$hash] = 1; + } + } + public function getSite() + { + return $this->site; + } +<<<<<<< HEAD + public function addLink($link) +======= + public function addLink(String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $hash = md5($link); + if(isset($this->addedLinks[$hash])) + { + return false; + }else + { + $this->addedLinks[$hash] = 1; + + return true; + } + } + +<<<<<<< HEAD + public function generateSearchLink($fokus) +======= + public function generateSearchLink(String $fokus) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $requestData = $this->request->except('page'); + $requestData['focus'] = $fokus; + $requestData['out'] = "results"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateQuicktipLink() + { + $link = action('MetaGerSearch@quicktips'); + + return $link; + } + +<<<<<<< HEAD + public function generateSiteSearchLink($host) +======= + public function generateSiteSearchLink(String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " site:$host"; + $requestData['focus'] = "web"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + +<<<<<<< HEAD + public function generateRemovedHostLink ($host) +======= + public function generateRemovedHostLink (String $host) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -host:$host"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + +<<<<<<< HEAD + public function generateRemovedDomainLink ($domain) +======= + public function generateRemovedDomainLink (String $domain) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $domain = urlencode($domain); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -domain:$domain"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function getTab () + { + return $this->tab; + } + public function getResults () + { + return $this->results; + } + public function popAd() + { + if(count($this->ads) > 0) + return get_object_vars(array_shift($this->ads)); + else + return null; + } +} \ No newline at end of file diff --git a/app/MetaGer_BASE_9840.php b/app/MetaGer_BASE_9840.php new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/MetaGer_LOCAL_9840.php b/app/MetaGer_LOCAL_9840.php new file mode 100644 index 0000000000000000000000000000000000000000..3c8984a5fd138872d4579745d3a11aa77444e706 --- /dev/null +++ b/app/MetaGer_LOCAL_9840.php @@ -0,0 +1,700 @@ +<?php +namespace App; + +use Illuminate\Http\Request; +use Jenssegers\Agent\Agent; +use App; +use Storage; +use Log; +use App\lib\TextLanguageDetect\TextLanguageDetect; +use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException; +use Illuminate\Pagination\LengthAwarePaginator; +use Illuminate\Support\Collection; +#use \Illuminate\Pagination\Paginator; + +class MetaGer +{ + # Einstellungen für die Suche + protected $fokus; + protected $eingabe; + protected $q; + protected $category; + protected $time; + protected $page; + protected $lang; + protected $cache = ""; + protected $site; + protected $hostBlacklist = []; + protected $domainBlacklist = []; + protected $stopWords = []; + protected $engines = []; + protected $results = []; + protected $ads = []; + protected $warnings = []; + protected $errors = []; + protected $addedHosts = []; + # Daten über die Abfrage + protected $ip; + protected $language; + protected $agent; + # Konfigurationseinstellungen: + protected $sumaFile; + protected $mobile; + protected $resultCount; + protected $sprueche; + protected $domainsBlacklisted = []; + protected $urlsBlacklisted = []; + protected $url; + protected $languageDetect; + + function __construct() + { + $this->time = microtime(); + define('CRLF', "\r\n"); + define('BUFFER_LENGTH', 8192); + if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") ) + { + # Blacklists einlesen: + $tmp = file_get_contents(config_path() . "/blacklistDomains.txt"); + $this->domainsBlacklisted = explode("\n", $tmp); + $tmp = file_get_contents(config_path() . "/blacklistUrl.txt"); + $this->urlsBlacklisted = explode("\n", $tmp); + }else + { + Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); + } + + $this->languageDetect = new TextLanguageDetect(); + $this->languageDetect->setNameMode("2"); + } + + public function rankAll () + { + foreach( $this->engines as $engine ) + { + $engine->rank($this); + } + } + + public function createView() + { + $viewResults = []; + + # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View: + foreach($this->results as $result) + { + $viewResults[] = get_object_vars($result); + } + + switch ($this->out) { + case 'results': + return view('metager3results') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + default: + return view('metager3') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + } + } + + public function removeInvalids () + { + $results = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $results[] = $result; + } + #$this->results = $results; + } + + public function combineResults () + { + foreach($this->engines as $engine) + { + foreach($engine->results as $result) + { + if($result->valid) + $this->results[] = $result; + } + foreach($engine->ads as $ad) + { + $this->ads[] = $ad; + } + } + uasort($this->results, function($a, $b){ + if($a->getRank() == $b->getRank()) + return 0; + return ($a->getRank() < $b->getRank()) ? 1 : -1; + }); + # Validate Results + $newResults = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $newResults[] = $result; + } + $this->results = $newResults; + + $counter = 0; + $firstRank = 0; + foreach($this->results as $result) + { + if($counter === 0) + $firstRank = $result->rank; + $counter++; + $result->number = $counter; + $confidence = 0; + if($firstRank > 0) + $confidence = $result->rank/$firstRank; + else + $confidence = 0; + if($confidence > 0.65) + $result->color = "#FF4000"; + elseif($confidence > 0.4) + $result->color = "#FF0080"; + elseif($confidence > 0.2) + $result->color = "#C000C0"; + else + $result->color = "#000000"; + } + + //Get current page form url e.g. &page=6 + $currentPage = LengthAwarePaginator::resolveCurrentPage(); + $offset= $currentPage-1; + + //Create a new Laravel collection from the array data + $collection = new Collection($this->results); + + //Define how many items we want to be visible in each page + $perPage = $this->resultCount; + + //Slice the collection to get the items to display in current page + $currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all(); + + //Create our paginator and pass it to the view + $paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage); + $paginatedSearchResults->setPath('/meta/meta.ger3'); + foreach($this->request->all() as $key => $value) + { + $paginatedSearchResults->addQuery($key, $value); + } + + $this->results = $paginatedSearchResults; + } + + public function createSearchEngines (Request $request) + { + + #die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443)); + + + # Überprüfe, welche Sumas eingeschaltet sind + $xml = simplexml_load_file($this->sumaFile); + $enabledSearchengines = []; + $overtureEnabled = FALSE; + + if($this->fokus === "angepasst") + { + $sumas = $xml->xpath("suma"); + /**$maxSumas = 30; + $count = 0; + foreach($sumas as $suma) + { + if($maxSumas === $count) + break; + $enabledSearchengines[] = $suma; + $count++; + }**/ + foreach($sumas as $suma) + { + if($request->has($suma["service"]) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + #|| 1 === 1 #Todo: entfernen + ){ + + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + + $enabledSearchengines[] = $suma; + } + } + } + }else{ + $sumas = $xml->xpath("suma"); + foreach($sumas as $suma){ + $types = explode(",",$suma["type"]); + if(in_array($this->fokus, $types) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + ){ + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } + } + } + } + + if( ( $this->fokus !== "bilder" && sizeof($enabledSearchengines) <= 3 ) || ( $this->fokus === "bilder" && sizeof($enabledSearchengines) === 0) ) + { + $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt."; + } + + $engines = []; + foreach($enabledSearchengines as $engine){ + + if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1")) + { + continue; + } + # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll + + $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); + + $time = microtime(); + $tmp = new $path($engine, $this); + + if($tmp->enabled && isset($this->debug)) + { + $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000); + } + + if($tmp->isEnabled()) + { + $engines[] = $tmp; + $this->sockets[$tmp->name] = $tmp->fp; + } + } + + # Nun passiert ein elementarer Schritt. + # Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können. + # aber natürlich nicht ewig. + # Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet. + # Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten. + usleep(500000); + # Jetzt lesen wir alles aus, was da ist und verwerfen den Rest: + foreach($engines as $engine) + { + $engine->retrieveResults(); + } + + + $this->engines = $engines; + } + + public function parseFormData (Request $request) + { + if($request->input('encoding', '') !== "utf8") + { + # In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen + $input = $request->all(); + foreach($input as $key => $value) + { + $input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1"); + } + $request->replace($input); + } + $this->url = $request->url(); + # Zunächst überprüfen wir die eingegebenen Einstellungen: + # FOKUS + $this->fokus = trans('fokiNames.' + . $request->input('focus', 'web')); + if(strpos($this->fokus,".")) + { + $this->fokus = trans('fokiNames.web'); + } + + # SUMA-FILE + if(App::isLocale("en")){ + $this->sumaFile = config_path() . "/sumasEn.xml"; + }else{ + $this->sumaFile = config_path() . "/sumas.xml"; + } + if(!file_exists($this->sumaFile)) + { + die("Suma-File konnte nicht gefunden werden"); + } + + # Sucheingabe: + $this->eingabe = trim($request->input('eingabe', '')); + if(strlen($this->eingabe) === 0) + { + $this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.'; + } + $this->q = $this->eingabe; + + # IP: + if( isset($_SERVER['HTTP_FROM']) ) + { + $this->ip = $_SERVER['HTTP_FROM']; + }else + { + $this->ip = "127.0.0.1"; + } + # Language: + if( isset($_SERVER['HTTP_LANGUAGE']) ) + { + $this->language = $_SERVER['HTTP_LANGUAGE']; + }else + { + $this->language = ""; + } + # Category + $this->category = $request->input('category', ''); + # Request Times: + $this->time = $request->input('time', 1); + # Page + $this->page = $request->input('page', 1); + # Lang + $this->lang = $request->input('lang', 'all'); + if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" ) + { + $this->lang = "all"; + } + $this->agent = new Agent(); + $this->mobile = $this->agent->isMobile(); + #Sprüche + $this->sprueche = $request->input('sprueche', 'on'); + # Ergebnisse pro Seite: + $this->resultCount = $request->input('resultCount', '20'); + + # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden: + if( $request->has('dart') ) + { + $this->time = 10; + $this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt."; + } + if( $this->time < 0 || $this->time > 20 ) + { + $this->time = 1; + } + if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) ) + { + $input = $request->all(); + $newInput = []; + foreach($input as $key => $value) + { + if( $key !== "fportal" && $key !== "harvest" ) + { + $newInput[$key] = $value; + } + } + $request->replace($newInput); + } + if( $request->has('ebay') ) + { + $this->time = 2; + $this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt."; + } + if( App::isLocale("en") ) + { + $this->sprueche = "off"; + } + if($this->resultCount <= 0 || $this->resultCount > 200 ) + { + $this->resultCount = 1000; + } + if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') ) + { + $this->time = 5000; + $this->cache = "cache"; + } + if( $request->has('tab')) + { + if($request->input('tab') === "1") + { + $this->tab = "_blank"; + }else + { + $this->tab = "_self"; + } + }else + { + $this->tab = "_blank"; + } + $this->out = $request->input('out', "html"); + if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style") + $this->out = "html"; + $this->request = $request; + } + + public function checkSpecialSearches (Request $request) + { + # Site Search: + if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match)) + { + $this->site = $match[2]; + $this->q = $match[1] . $match[3]; + $this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt."; + } + # Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match)) + { + $this->hostBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->hostBlacklist) > 0 ) + { + $hostString = ""; + foreach($this->hostBlacklist as $host) + { + $hostString .= $host . ", "; + } + $hostString = rtrim($hostString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\""; + } + # Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match)) + { + $this->domainBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->domainBlacklist) > 0 ) + { + $domainString = ""; + foreach($this->domainBlacklist as $domain) + { + $domainString .= $domain . ", "; + } + $domainString = rtrim($domainString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\""; + } + + # Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden. + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match)) + { + $this->stopWords[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->stopWords) > 0 ) + { + $stopwordsString = ""; + foreach($this->stopWords as $stopword) + { + $stopwordsString .= $stopword . ", "; + } + $stopwordsString = rtrim($stopwordsString, ", "); + $this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\""; + } + + # Meldung über eine Phrasensuche + if(preg_match("/\"(.+)\"/si", $this->q, $match)){ + $this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\""; + } + } + + public function getFokus () + { + return $this->fokus; + } + + public function getIp () + { + return $this->ip; + } + + public function getEingabe () + { + return $this->eingabe; + } + + public function getQ () + { + if(strlen($this->site) > 0) + return $this->q . " site:" . $this->site; + else + return $this->q; + } + + public function getUrl () + { + return $this->url; + } + public function getTime () + { + return $this->time; + } + + public function getLanguage () + { + return $this->language; + } + + public function getLang () + { + return $this->lang; + } + + public function getSprueche () + { + return $this->sprueche; + } + + public function getCategory () + { + return $this->category; + } + + public function getSumaFile () + { + return $this->sumaFile; + } + + public function getUserHostBlacklist () + { + return $this->hostBlacklist; + } + + public function getUserDomainBlacklist () + { + return $this->domainBlacklist; + } + + public function getDomainBlacklist () + { + return $this->domainsBlacklisted; + } + + public function getUrlBlacklist () + { + return $this->urlsBlacklisted; + } + public function getLanguageDetect () + { + return $this->languageDetect; + } + public function getStopWords () + { + return $this->stopWords; + } + public function getHostCount($host) + { + if(isset($this->addedHosts[$host])) + { + return $this->addedHosts[$host]; + }else + { + return 0; + } + } + public function addHostCount($host) + { + $hash = md5($host); + if(isset($this->addedHosts[$hash])) + { + $this->addedHosts[$hash] += 1; + }else + { + $this->addedHosts[$hash] = 1; + } + } + public function getSite() + { + return $this->site; + } + public function addLink($link) + { + $hash = md5($link); + if(isset($this->addedLinks[$hash])) + { + return false; + }else + { + $this->addedLinks[$hash] = 1; + + return true; + } + } + + public function generateSearchLink($fokus) + { + $requestData = $this->request->except('page'); + $requestData['focus'] = $fokus; + $requestData['out'] = "results"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateQuicktipLink() + { + $link = action('MetaGerSearch@quicktips'); + + return $link; + } + + public function generateSiteSearchLink($host) + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " site:$host"; + $requestData['focus'] = "web"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateRemovedHostLink ($host) + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -host:$host"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateRemovedDomainLink ($domain) + { + $domain = urlencode($domain); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -domain:$domain"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function getTab () + { + return $this->tab; + } + public function getResults () + { + return $this->results; + } + public function popAd() + { + if(count($this->ads) > 0) + return get_object_vars(array_shift($this->ads)); + else + return null; + } +} \ No newline at end of file diff --git a/app/MetaGer_REMOTE_9840.php b/app/MetaGer_REMOTE_9840.php new file mode 100644 index 0000000000000000000000000000000000000000..44f1dacc6f0af3eb3ea4388ab054cd27daf4b73a --- /dev/null +++ b/app/MetaGer_REMOTE_9840.php @@ -0,0 +1,700 @@ +<?php +namespace App; + +use Illuminate\Http\Request; +use Jenssegers\Agent\Agent; +use App; +use Storage; +use Log; +use App\lib\TextLanguageDetect\TextLanguageDetect; +use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException; +use Illuminate\Pagination\LengthAwarePaginator; +use Illuminate\Support\Collection; +#use \Illuminate\Pagination\Paginator; + +class MetaGer +{ + # Einstellungen für die Suche + protected $fokus; + protected $eingabe; + protected $q; + protected $category; + protected $time; + protected $page; + protected $lang; + protected $cache = ""; + protected $site; + protected $hostBlacklist = []; + protected $domainBlacklist = []; + protected $stopWords = []; + protected $engines = []; + protected $results = []; + protected $ads = []; + protected $warnings = []; + protected $errors = []; + protected $addedHosts = []; + # Daten über die Abfrage + protected $ip; + protected $language; + protected $agent; + # Konfigurationseinstellungen: + protected $sumaFile; + protected $mobile; + protected $resultCount; + protected $sprueche; + protected $domainsBlacklisted = []; + protected $urlsBlacklisted = []; + protected $url; + protected $languageDetect; + + function __construct() + { + $this->time = microtime(); + define('CRLF', "\r\n"); + define('BUFFER_LENGTH', 8192); + if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") ) + { + # Blacklists einlesen: + $tmp = file_get_contents(config_path() . "/blacklistDomains.txt"); + $this->domainsBlacklisted = explode("\n", $tmp); + $tmp = file_get_contents(config_path() . "/blacklistUrl.txt"); + $this->urlsBlacklisted = explode("\n", $tmp); + }else + { + Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); + } + + $this->languageDetect = new TextLanguageDetect(); + $this->languageDetect->setNameMode("2"); + } + + public function rankAll () + { + foreach( $this->engines as $engine ) + { + $engine->rank($this); + } + } + + public function createView() + { + $viewResults = []; + + # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View: + foreach($this->results as $result) + { + $viewResults[] = get_object_vars($result); + } + + switch ($this->out) { + case 'results': + return view('metager3results') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + default: + return view('metager3') + ->with('results', $viewResults) + ->with('eingabe', $this->eingabe) + ->with('mobile', $this->mobile) + ->with('warnings', $this->warnings) + ->with('errors', $this->errors) + ->with('metager', $this); + break; + } + } + + public function removeInvalids () + { + $results = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $results[] = $result; + } + #$this->results = $results; + } + + public function combineResults () + { + foreach($this->engines as $engine) + { + foreach($engine->results as $result) + { + if($result->valid) + $this->results[] = $result; + } + foreach($engine->ads as $ad) + { + $this->ads[] = $ad; + } + } + uasort($this->results, function($a, $b){ + if($a->getRank() == $b->getRank()) + return 0; + return ($a->getRank() < $b->getRank()) ? 1 : -1; + }); + # Validate Results + $newResults = []; + foreach($this->results as $result) + { + if($result->isValid($this)) + $newResults[] = $result; + } + $this->results = $newResults; + + $counter = 0; + $firstRank = 0; + foreach($this->results as $result) + { + if($counter === 0) + $firstRank = $result->rank; + $counter++; + $result->number = $counter; + $confidence = 0; + if($firstRank > 0) + $confidence = $result->rank/$firstRank; + else + $confidence = 0; + if($confidence > 0.65) + $result->color = "#FF4000"; + elseif($confidence > 0.4) + $result->color = "#FF0080"; + elseif($confidence > 0.2) + $result->color = "#C000C0"; + else + $result->color = "#000000"; + } + + //Get current page form url e.g. &page=6 + $currentPage = LengthAwarePaginator::resolveCurrentPage(); + $offset= $currentPage-1; + + //Create a new Laravel collection from the array data + $collection = new Collection($this->results); + + //Define how many items we want to be visible in each page + $perPage = $this->resultCount; + + //Slice the collection to get the items to display in current page + $currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all(); + + //Create our paginator and pass it to the view + $paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage); + $paginatedSearchResults->setPath('/meta/meta.ger3'); + foreach($this->request->all() as $key => $value) + { + $paginatedSearchResults->addQuery($key, $value); + } + + $this->results = $paginatedSearchResults; + } + + public function createSearchEngines (Request $request) + { + + #die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443)); + + + # Überprüfe, welche Sumas eingeschaltet sind + $xml = simplexml_load_file($this->sumaFile); + $enabledSearchengines = []; + $overtureEnabled = FALSE; + + if($this->fokus === "angepasst") + { + $sumas = $xml->xpath("suma"); + /**$maxSumas = 30; + $count = 0; + foreach($sumas as $suma) + { + if($maxSumas === $count) + break; + $enabledSearchengines[] = $suma; + $count++; + }**/ + foreach($sumas as $suma) + { + if($request->has($suma["service"]) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + #|| 1 === 1 #Todo: entfernen + ){ + + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + + $enabledSearchengines[] = $suma; + } + } + } + }else{ + $sumas = $xml->xpath("suma"); + foreach($sumas as $suma){ + $types = explode(",",$suma["type"]); + if(in_array($this->fokus, $types) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) + ){ + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } + } + } + } + + if( ( $this->fokus !== "bilder" && sizeof($enabledSearchengines) <= 3 ) || ( $this->fokus === "bilder" && sizeof($enabledSearchengines) === 0) ) + { + $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt."; + } + + $engines = []; + foreach($enabledSearchengines as $engine){ + + if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1")) + { + continue; + } + # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll + + $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); + + $time = microtime(); + $tmp = new $path($engine, $this); + + if($tmp->enabled && isset($this->debug)) + { + $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000); + } + + if($tmp->isEnabled()) + { + $engines[] = $tmp; + $this->sockets[$tmp->name] = $tmp->fp; + } + } + + # Nun passiert ein elementarer Schritt. + # Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können. + # aber natürlich nicht ewig. + # Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet. + # Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten. + usleep(500000); + # Jetzt lesen wir alles aus, was da ist und verwerfen den Rest: + foreach($engines as $engine) + { + $engine->retrieveResults(); + } + + + $this->engines = $engines; + } + + public function parseFormData (Request $request) + { + if($request->input('encoding', '') !== "utf8") + { + # In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen + $input = $request->all(); + foreach($input as $key => $value) + { + $input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1"); + } + $request->replace($input); + } + $this->url = $request->url(); + # Zunächst überprüfen wir die eingegebenen Einstellungen: + # FOKUS + $this->fokus = trans('fokiNames.' + . $request->input('focus', 'web')); + if(strpos($this->fokus,".")) + { + $this->fokus = trans('fokiNames.web'); + } + + # SUMA-FILE + if(App::isLocale("en")){ + $this->sumaFile = config_path() . "/sumasEn.xml"; + }else{ + $this->sumaFile = config_path() . "/sumas.xml"; + } + if(!file_exists($this->sumaFile)) + { + die("Suma-File konnte nicht gefunden werden"); + } + + # Sucheingabe: + $this->eingabe = trim($request->input('eingabe', '')); + if(strlen($this->eingabe) === 0) + { + $this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.'; + } + $this->q = $this->eingabe; + + # IP: + if( isset($_SERVER['HTTP_FROM']) ) + { + $this->ip = $_SERVER['HTTP_FROM']; + }else + { + $this->ip = "127.0.0.1"; + } + # Language: + if( isset($_SERVER['HTTP_LANGUAGE']) ) + { + $this->language = $_SERVER['HTTP_LANGUAGE']; + }else + { + $this->language = ""; + } + # Category + $this->category = $request->input('category', ''); + # Request Times: + $this->time = $request->input('time', 1); + # Page + $this->page = $request->input('page', 1); + # Lang + $this->lang = $request->input('lang', 'all'); + if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" ) + { + $this->lang = "all"; + } + $this->agent = new Agent(); + $this->mobile = $this->agent->isMobile(); + #Sprüche + $this->sprueche = $request->input('sprueche', 'on'); + # Ergebnisse pro Seite: + $this->resultCount = $request->input('resultCount', '20'); + + # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden: + if( $request->has('dart') ) + { + $this->time = 10; + $this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt."; + } + if( $this->time < 0 || $this->time > 20 ) + { + $this->time = 1; + } + if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) ) + { + $input = $request->all(); + $newInput = []; + foreach($input as $key => $value) + { + if( $key !== "fportal" && $key !== "harvest" ) + { + $newInput[$key] = $value; + } + } + $request->replace($newInput); + } + if( $request->has('ebay') ) + { + $this->time = 2; + $this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt."; + } + if( App::isLocale("en") ) + { + $this->sprueche = "off"; + } + if($this->resultCount <= 0 || $this->resultCount > 200 ) + { + $this->resultCount = 1000; + } + if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') ) + { + $this->time = 5000; + $this->cache = "cache"; + } + if( $request->has('tab')) + { + if($request->input('tab') === "1") + { + $this->tab = "_blank"; + }else + { + $this->tab = "_self"; + } + }else + { + $this->tab = "_blank"; + } + $this->out = $request->input('out', "html"); + if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style") + $this->out = "html"; + $this->request = $request; + } + + public function checkSpecialSearches (Request $request) + { + # Site Search: + if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match)) + { + $this->site = $match[2]; + $this->q = $match[1] . $match[3]; + $this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt."; + } + # Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match)) + { + $this->hostBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->hostBlacklist) > 0 ) + { + $hostString = ""; + foreach($this->hostBlacklist as $host) + { + $hostString .= $host . ", "; + } + $hostString = rtrim($hostString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\""; + } + # Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match)) + { + $this->domainBlacklist[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->domainBlacklist) > 0 ) + { + $domainString = ""; + foreach($this->domainBlacklist as $domain) + { + $domainString .= $domain . ", "; + } + $domainString = rtrim($domainString, ", "); + $this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\""; + } + + # Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden. + # Wir prüfen, ob das hier der Fall ist: + while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match)) + { + $this->stopWords[] = $match[3]; + $this->q = $match[1] . $match[4]; + } + if( sizeof($this->stopWords) > 0 ) + { + $stopwordsString = ""; + foreach($this->stopWords as $stopword) + { + $stopwordsString .= $stopword . ", "; + } + $stopwordsString = rtrim($stopwordsString, ", "); + $this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\""; + } + + # Meldung über eine Phrasensuche + if(preg_match("/\"(.+)\"/si", $this->q, $match)){ + $this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\""; + } + } + + public function getFokus () + { + return $this->fokus; + } + + public function getIp () + { + return $this->ip; + } + + public function getEingabe () + { + return $this->eingabe; + } + + public function getQ () + { + if(strlen($this->site) > 0) + return $this->q . " site:" . $this->site; + else + return $this->q; + } + + public function getUrl () + { + return $this->url; + } + public function getTime () + { + return $this->time; + } + + public function getLanguage () + { + return $this->language; + } + + public function getLang () + { + return $this->lang; + } + + public function getSprueche () + { + return $this->sprueche; + } + + public function getCategory () + { + return $this->category; + } + + public function getSumaFile () + { + return $this->sumaFile; + } + + public function getUserHostBlacklist () + { + return $this->hostBlacklist; + } + + public function getUserDomainBlacklist () + { + return $this->domainBlacklist; + } + + public function getDomainBlacklist () + { + return $this->domainsBlacklisted; + } + + public function getUrlBlacklist () + { + return $this->urlsBlacklisted; + } + public function getLanguageDetect () + { + return $this->languageDetect; + } + public function getStopWords () + { + return $this->stopWords; + } + public function getHostCount(String $host) + { + if(isset($this->addedHosts[$host])) + { + return $this->addedHosts[$host]; + }else + { + return 0; + } + } + public function addHostCount(String $host) + { + $hash = md5($host); + if(isset($this->addedHosts[$hash])) + { + $this->addedHosts[$hash] += 1; + }else + { + $this->addedHosts[$hash] = 1; + } + } + public function getSite() + { + return $this->site; + } + public function addLink(String $link) + { + $hash = md5($link); + if(isset($this->addedLinks[$hash])) + { + return false; + }else + { + $this->addedLinks[$hash] = 1; + + return true; + } + } + + public function generateSearchLink(String $fokus) + { + $requestData = $this->request->except('page'); + $requestData['focus'] = $fokus; + $requestData['out'] = "results"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateQuicktipLink() + { + $link = action('MetaGerSearch@quicktips'); + + return $link; + } + + public function generateSiteSearchLink(String $host) + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " site:$host"; + $requestData['focus'] = "web"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateRemovedHostLink (String $host) + { + $host = urlencode($host); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -host:$host"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function generateRemovedDomainLink (String $domain) + { + $domain = urlencode($domain); + $requestData = $this->request->except('page'); + $requestData['eingabe'] .= " -domain:$domain"; + $link = action('MetaGerSearch@search', $requestData); + return $link; + } + + public function getTab () + { + return $this->tab; + } + public function getResults () + { + return $this->results; + } + public function popAd() + { + if(count($this->ads) > 0) + return get_object_vars(array_shift($this->ads)); + else + return null; + } +} \ No newline at end of file diff --git a/app/Models/Result.php.orig b/app/Models/Result.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..d7e87d9491f9ee67932538a75da8962930285980 --- /dev/null +++ b/app/Models/Result.php.orig @@ -0,0 +1,250 @@ +<?php + +namespace App\Models; + + + +class Result +{ + + function __construct ( \SimpleXMLElement $provider, $titel, $link, $anzeigeLink , $descr, $gefVon, $sourceRank, $partnershop = false ) + { + $this->titel = strip_tags(trim($titel)); + $this->link = trim($link); + $this->anzeigeLink = trim($anzeigeLink); + $this->descr = strip_tags(trim($descr)); + $this->descr = preg_replace("/\n+/si", " ", $this->descr); + $this->gefVon = trim($gefVon); + $this->proxyLink = $this->generateProxyLink($this->link); + $this->sourceRank = $sourceRank; + if($this->sourceRank <= 0 || $this->sourceRank > 20) + $this->sourceRank = 20; + $this->sourceRank = 20 - $this->sourceRank; + if(isset($provider["engineBoost"])) + { + $this->engineBoost = $provider["engineBoost"]; + }else + { + $this->engineBoost = 1; + } + + $this->valid = true; + $this->host = @parse_url($link, PHP_URL_HOST); + $this->strippedHost = $this->getStrippedHost($this->anzeigeLink); + $this->strippedDomain = $this->getStrippedDomain($this->strippedHost); + $this->strippedLink = $this->getStrippedLink($this->anzeigeLink); + $this->rank = 0; + $this->partnershop = $partnershop; + + #die($this->anzeigeLink . "\r\n" . $this->strippedHost); + } + + public function rank (\App\MetaGer $metager) + { + + $rank = 0; + $rank += ($this->sourceRank * 0.02); + + #URL-Boost + $link = $this->anzeigeLink; + if(strpos($link, "http") !== 0) + { + $link = "http://" . $link; + } + $link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH); + $tmpLi = $link; + $tmpEingabe = $metager->getQ(); + $count = 0; + $tmpLink = ""; + + $regex = [ + "/\s+/si", + "/http:/si", + "/https:/si", + "/www\./si", + "/\//si", + "/\./si", + "/-/si" + ]; + foreach($regex as $reg) + { + $link = preg_replace($regex, "", $link); + $tmpEingabe = preg_replace($regex, "", $tmpEingabe); + } + #die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>"); + foreach(str_split($tmpEingabe) as $char) + { + if(strpos(strtolower($tmpLink), strtolower($char)) >= 0) + { + $count++; + $tmpLink = str_replace(urlencode($char), "", $tmpLink); + } + if(strlen($this->descr) > 80 && strlen($link) > 0) + { + $rank += $count /((strlen($link)) * 60); + } + } + + # Boost für Vorkommen der Suchwörter: + $maxRank = 0.1; + $tmpTitle = $this->titel; + $tmpDescription = $this->descr; + $isWithin = false; + $tmpRank = 0; + $tmpEingabe = $metager->getQ(); + $tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe); + $tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe); + #die($tmpEingabe); + foreach(explode(" ", trim($tmpEingabe)) as $el) + { + $el = preg_quote($el, "/"); + if(preg_match("/\b$el\b/si", $tmpTitle)) + { + $tmpRank += .7 * .6 * $maxRank; + }elseif (strpos($tmpTitle, $el) !== false) { + $tmpRank += .3 * .6 * $maxRank; + } + + if(preg_match("/\b$el\b/si", $tmpDescription)) + { + $tmpRank += .7 * .4 * $maxRank; + }elseif (strpos($tmpDescription, $el) !== false) { + $tmpRank += .3 * .4 * $maxRank; + } + } + $tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10; + $rank += $tmpRank; + + if($this->engineBoost > 0) + { + $rank *= floatval($this->engineBoost); + } + + $this->rank = $rank; + } + + public function getRank () + { + return $this->rank; + } + + public function isValid (\App\MetaGer $metager) + { + # Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists: + if(in_array($this->strippedHost, $metager->getUserHostBlacklist()) + || in_array($this->strippedDomain, $metager->getUserDomainBlacklist())) + return false; + + # Jetzt unsere URL und Domain Blacklist + if($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist()))) + { + return false; + } + + $text = $this->titel . " " . $this->descr; + + if($metager->getLang() !== "all") + { + $result = $metager->getLanguageDetect()->detect($text, 1); + $lang = ""; + foreach($result as $key => $value) + { + $lang = $key; + } + + if($lang !== "" && $lang !== $metager->getLang()) + return false; + } + + # Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus: + foreach($metager->getStopWords() as $stopWord) + { + if(stripos($text, $stopWord) !== false) + { + return false; + } + } + + # Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden + # und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben + # Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch: + if($metager->getSite() === "" && + strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false && + strpos($this->strippedHost, "twitter.com") === false && + strpos($this->strippedHost, "www.ladenpreis.net") === false && + strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false && + strpos($this->strippedHost, "www.onenewspage.com") === false) + { + $count = $metager->getHostCount($this->strippedHost); + if($count >= 3) + { + return false; + } + } + + # Unabhängig davon unser Dublettenfilter: + if($metager->addLink($this->strippedLink)) + { + $metager->addHostCount($this->strippedHost); + return true; + }else + { + return false; + } + } + +<<<<<<< HEAD + private function getStrippedHost ($link) +======= + private function getStrippedHost (String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(strpos($link, "http") !== 0) + $link = "http://" . $link; + $link = @parse_url($link, PHP_URL_HOST); + $link = preg_replace("/^www\./si", "", $link); + return $link; + } +<<<<<<< HEAD + private function getStrippedLink ($link) +======= + private function getStrippedLink (String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(strpos($link, "http") !== 0) + $link = "http://" . $link; + $host = $this->strippedHost; + $path = @parse_url($link , PHP_URL_PATH); + return $host . $path; + } + +<<<<<<< HEAD + private function getStrippedDomain ($link) +======= + private function getStrippedDomain (String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match)) + { + return $match[1]; + }else + { + return $link; + } + } + +<<<<<<< HEAD + private function generateProxyLink ($link) +======= + private function generateProxyLink (String $link) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + if(!$link) + return ""; + $tmp = $link; + $tmp = preg_replace("/\r?\n$/s", "", $tmp); + $tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp); + return "https://proxy.suma-ev.de/cgi-bin/nph-proxy.cgi/en/I0/" . $tmp; + + } +} \ No newline at end of file diff --git a/app/Models/Searchengine.php.orig b/app/Models/Searchengine.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..2a562edb5b3d090472455596837b331f3df0217e --- /dev/null +++ b/app/Models/Searchengine.php.orig @@ -0,0 +1,500 @@ +<?php + +namespace App\Models; +use App\MetaGer; +use Log; +use Redis; + +abstract class Searchengine +{ + + protected $ch; # Curl Handle zum erhalten der Ergebnisse + public $fp; + protected $getString = ""; + protected $engine; + protected $counter = 0; + protected $socketNumber = null; + public $enabled = true; + public $results = []; + public $ads = []; + public $write_time = 0; + public $connection_time = 0; + + function __construct(\SimpleXMLElement $engine, MetaGer $metager) + { + foreach($engine->attributes() as $key => $value){ + $this->$key = $value->__toString(); + } + $this->engine = $engine; + + # Wir registrieren die Benutzung dieser Suchmaschine + $this->uses = intval(Redis::hget($this->name, "uses")) + 1; + Redis::hset($this->name, "uses", $this->uses); + + # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab: + if(isset($this->disabled) && strtotime($this->disabled) <= time() ) + { + # In diesem Fall ist der Timeout der Suchmaschine abgelaufen. + $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet."); + }elseif (isset($this->disabled) && strtotime($this->disabled) > time()) + { + $this->enabled = false; + return; + } + + # User-Agent definieren: + if( isset($_SERVER['HTTP_USER_AGENT'])) + { + $this->useragent = $_SERVER['HTTP_USER_AGENT']; + }else + { + $this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1"; + } + $this->ip = $metager->getIp(); + $this->gefVon = "<a href=\"" . $this->homepage . "\" target=\"_blank\">" . $this->displayName . "</a>"; + $this->startTime = microtime(); + + $this->getString = $this->generateGetString($metager->getQ(), $metager->getUrl(), $metager->getLanguage(), $metager->getCategory()); + $counter = 0; + # Wir benötigen einen verfügbaren Socket, über den wir kommunizieren können: + $time = microtime(true); + $this->fp = $this->getFreeSocket(); + + $this->setStatistic("connection_time", ((microtime(true)-$time) / 1000000)); + if(!$this->fp) + { + $this->disable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde für 1h deaktiviert, weil keine Verbindung aufgebaut werden konnte"); + }else + { + $time = microtime(true); + $this->writeRequest(); + $this->setStatistic("write_time", ((microtime(true)-$time) / 1000000)); + } + + } + +<<<<<<< HEAD + public abstract function loadResults($result); +======= + public abstract function loadResults(String $result); +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + + private function writeRequest () + { + $out = "GET " . $this->getString . " HTTP/1.1\r\n"; + $out .= "Host: " . $this->host . "\r\n"; + $out .= "User-Agent: " . $this->useragent . "\r\n"; + $out .= "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"; + $out .= "Accept-Language: de,en-US;q=0.7,en;q=0.3\r\n"; + $out .= "Accept-Encoding: gzip, deflate, br\r\n"; + $out .= "Connection: keep-alive\r\n\r\n"; + + # Anfrage senden: + $sent = 0; $string = $out; $time = microtime(true); + while(true) + { + try{ + $tmp = fwrite($this->fp, $string); + }catch(\ErrorException $e) + { + # Irgendwas ist mit unserem Socket passiert. Wir brauchen einen neuen: + fclose($this->fp); + Redis::del($this->name . "." . $this->socketNumber); + $this->fp = $this->getFreeSocket(); + $sent = 0; + $string = $out; + continue; + } + if($tmp){ + $sent += $tmp; + $string = substr($string, $tmp); + }else + abort(500, "Fehler beim schreiben."); + + if(((microtime(true) - $time) / 1000000) >= 500) + { + abort(500, "Konnte die Request Daten nicht an: " . $this->name . " senden"); + } + + if($sent >= strlen($out)) + break; + } + } + + public function rank (\App\MetaGer $metager) + { + foreach($this->results as $result) + { + $result->rank($metager); + } + } + + private function getFreeSocket() + { + # Je nach Auslastung des Servers ( gleichzeitige Abfragen ), kann es sein, dass wir mehrere Sockets benötigen um die Abfragen ohne Wartezeit beantworten zu können. + # pfsockopen öffnet dabei einen persistenten Socket, der also auch zwischen den verschiedenen php Prozessen geteilt werden kann. + # Wenn der Hostname mit einem bereits erstellten Socket übereinstimmt, wird die Verbindung also aufgegriffen und fortgeführt. + # Allerdings dürfen wir diesen nur verwenden, wenn er nicht bereits von einem anderen Prozess zur Kommunikation verwendet wird. + # Wenn dem so ist, probieren wir den nächsten Socket zu verwenden. + # Dies festzustellen ist komplizierter, als man sich das vorstellt. Folgendes System sollte funktionieren: + # 1. Stelle fest, ob dieser Socket neu erstellt wurde, oder ob ein existierender geöffnet wurde. + $counter = 0; $fp = null; + do + { + + if( intval(Redis::exists($this->host . ".$counter")) === 0 ) + { + Redis::set($this->host . ".$counter", 1); + Redis::expire($this->host . ".$counter", 5); + $this->socketNumber = $counter; + + try + { + $fp = pfsockopen($this->getHost() . ":" . $this->port . "/$counter", $this->port, $errstr, $errno, 1); + }catch(\ErrorException $e) + { + break; + } + # Wir gucken, ob der Lesepuffer leer ist: + stream_set_blocking($fp, 0); + if(fgets($fp, BUFFER_LENGTH) !== false) + { + Log::error("Der Lesepuffer von: " . $this->name . " war nach dem Erstellen nicht leer. Musste den Socket neu starten."); + fclose($fp); + $fp = pfsockopen($this->getHost() . ":" . $this->port . "/$counter", $this->port, $errstr, $errno, 1); + } + header($this->name . ": " . $counter . "_" . $this->getHost()); + break; + } + $counter++; + }while(true); + + return $fp; + } + +<<<<<<< HEAD + private function setStatistic($key, float $val) +======= + private function setStatistic(String $key, float $val) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses; + $newVal = ($oldVal + max($val, 0)) / $this->uses; + Redis::hset($this->name, $key, $newVal); + $this->$key = $newVal; + } + +<<<<<<< HEAD + public function disable($sumaFile, $message) +======= + public function disable(string $sumaFile, string $message) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + Log::info($message); + $xml = simplexml_load_file($sumaFile); + $xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = date(DATE_RFC822, mktime(date("H")+1,date("i"), date("s"), date("m"), date("d"), date("Y"))); + $xml->saveXML($sumaFile); + } + +<<<<<<< HEAD + public function enable($sumaFile, $message) +======= + public function enable(string $sumaFile, string $message) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + Log::info($message); + $xml = simplexml_load_file($sumaFile); + unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']); + $xml->saveXML($sumaFile); + } + + public function closeFp() + { + fclose($this->fp); + } + + public function retrieveResults() + { + $time = microtime(true); + $headers = ''; + $body = ''; + $length = 0; + if(!$this->fp) + { + return; + } + // get headers FIRST + $c = 0; + stream_set_blocking($this->fp, 0); + do + { + // use fgets() not fread(), fgets stops reading at first newline + // or buffer which ever one is reached first + $data = fgets($this->fp, BUFFER_LENGTH); + // a sincle CRLF indicates end of headers + if ($data === false || $data == CRLF || feof($this->fp) || ((microtime()-$time)/1000000) > 100 ) { + // break BEFORE OUTPUT + break; + } + if( sizeof(($tmp = explode(": ", $data))) === 2 ) + $headers[trim($tmp[0])] = trim($tmp[1]); + $c++; + } + while (true); + // end of headers + if(sizeof($headers) > 1){ + $bodySize = 0; + if( isset($headers["Transfer-Encoding"]) && $headers["Transfer-Encoding"] === "chunked" ) + { + $body = $this->readChunked(); + + }elseif( isset($headers['Content-Length']) ) + { + $length = trim($headers['Content-Length']); + if(is_numeric($length) && $length >= 1) + $body = $this->readBody($length); + $bodySize = strlen($body); + }else + { + die("Konnte nicht herausfinden, wie ich die Serverantwort von: " . $this->name . " auslesen soll. Header war: " . print_r($headers)); + } + } + + Redis::del($this->host . "." . $this->socketNumber); + $this->setStatistic("read_time", ((microtime(true)-$time) / 1000000)); + if( isset($headers["Content-Encoding"]) && $headers['Content-Encoding'] === "gzip") + { + $body = $this->gunzip($body); + } + + #print_r($headers); + #print($body); + #print("\r\n". $bodySize); + #exit; + #die(print_r($headers)); + // $body and $headers should contain your stream data + $this->loadResults($body); + #print(print_r($headers, TRUE) . $body); + #exit; + } + + private function readBody(int $length) + { + $theData = ''; + $done = false; + stream_set_blocking($this->fp, 0); + $startTime = time(); + $lastTime = $startTime; + while (!feof($this->fp) && !$done && (($startTime + 1) > time()) && $length !== 0) + { + usleep(100); + $theNewData = fgets($this->fp, BUFFER_LENGTH); + $theData .= $theNewData; + $length -= strlen($theNewData); + $done = (trim($theNewData) === '0'); + + } + return $theData; + } + + private function readChunked() + { + $body = ''; + // read from chunked stream + // loop though the stream + do + { + // NOTE: for chunked encoding to work properly make sure + // there is NOTHING (besides newlines) before the first hexlength + + // get the line which has the length of this chunk (use fgets here) + $line = fgets($this->fp, BUFFER_LENGTH); + + // if it's only a newline this normally means it's read + // the total amount of data requested minus the newline + // continue to next loop to make sure we're done + if ($line == CRLF) { + continue; + } + + // the length of the block is sent in hex decode it then loop through + // that much data get the length + // NOTE: hexdec() ignores all non hexadecimal chars it finds + $length = hexdec($line); + + if (!is_int($length)) { + trigger_error('Most likely not chunked encoding', E_USER_ERROR); + } + + // zero is sent when at the end of the chunks + // or the end of the stream or error + if ($line === false || $length < 1 || feof($this->fp)) { + if($length <= 0) + fgets($this->fp, BUFFER_LENGTH); + // break out of the streams loop + break; + } + + // loop though the chunk + do + { + // read $length amount of data + // (use fread here) + $data = fread($this->fp, $length); + + // remove the amount received from the total length on the next loop + // it'll attempt to read that much less data + $length -= strlen($data); + + // PRINT out directly + #print $data; + #flush(); + // you could also save it directly to a file here + + // store in string for later use + $body .= $data; + + // zero or less or end of connection break + if ($length <= 0 || feof($this->fp)) + { + // break out of the chunk loop + if($length <= 0) + fgets($this->fp, BUFFER_LENGTH); + break; + } + } + while (true); + // end of chunk loop + } + while (true); + // end of stream loop + return $body; + } + + private function gunzip($zipped) { + $offset = 0; + if (substr($zipped,0,2) == "\x1f\x8b") + $offset = 2; + if (substr($zipped,$offset,1) == "\x08") + { + try + { + return gzinflate(substr($zipped, $offset + 8)); + } catch (\Exception $e) + { + abort(500, "Fehler beim unzip des Ergebnisses von folgendem Anbieter: " . $this->name); + } + } + return "Unknown Format"; + } + + protected function getHost() + { + $return = ""; + if( $this->port === "443" ) + { + $return .= "tls://"; + }else + { + $return .= "tcp://"; + } + $return .= $this->host; + return $return; + } + + public function getCurlInfo() + { + return curl_getinfo($this->ch); + } + + public function getCurlErrors() + { + return curl_errno($this->ch); + } + + public function addCurlHandle ($mh) + { + curl_multi_add_handle($mh, $this->ch); + } + + public function removeCurlHandle ($mh) + { + curl_multi_remove_handle($mh, $this->ch); + } + + private function generateGetString($query, $url, $language, $category) + { + $getString = ""; + + # Skript: + if(strlen($this->skript) > 0) + $getString .= $this->skript; + else + $getString .= "/"; + # FormData: + if(strlen($this->formData) > 0) + $getString .= "?" . $this->formData; + + # Wir müssen noch einige Platzhalter in dem GET-String ersetzen: + if( strpos($getString, "<<USERAGENT>>") ){ + $getString = str_replace("<<USERAGENT>>", $this->urlEncode($this->useragent), $getString); + } + + if( strpos($getString, "<<QUERY>>") ) + { + $getString = str_replace("<<QUERY>>", $this->urlEncode($query), $getString); + } + + if( strpos($getString, "<<IP>>") ) + { + $getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString); + } + + if( strpos($getString, "<<LANGUAGE>>") ) + { + $getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString); + } + + if( strpos($getString, "<<CATEGORY>>") ) + { + $getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString); + } + + if( strpos($getString, "<<AFFILDATA>>") ) + { + $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString); + } + return $getString; + } + + protected function urlEncode($string) + { + if(isset($this->inputEncoding)) + { + return urlencode(mb_convert_encoding($string, $this->inputEncoding)); + }else + { + return urlencode($string); + } + } + + private function getOvertureAffilData($url) + { + $affil_data = 'ip=' . $this->ip; + $affil_data .= '&ua=' . $this->useragent; + if ( isset($_SERVER['HTTP_X_FORWARDED_FOR']) ) { + $affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR']; + } + $affilDataValue = $this->urlEncode($affil_data); + # Wir benötigen die ServeUrl: + $serveUrl = $this->urlEncode($url); + + return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl; + } + + public function isEnabled () + { + return $this->enabled; + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Allesklar.php.orig b/app/Models/parserSkripte/Allesklar.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..7428c2f85160c959dae1a6d85b9b7dc09d86926a --- /dev/null +++ b/app/Models/parserSkripte/Allesklar.php.orig @@ -0,0 +1,65 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; +use Symfony\Component\DomCrawler\Crawler; + +class Allesklar extends Searchengine +{ + protected $tds = ""; + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $crawler = new Crawler(utf8_decode($result)); + $crawler = $crawler + ->filter('table[width=585]') + ->reduce(function(Crawler $node, $i) { + if($i < 5) + { + return false; + } + }); + + $this->counter = 0; + $crawler->filter('table')->each(function (Crawler $node, $i) + { + try { + $this->string = ""; + $titleTag = $node->filter('tr > td > a')->first(); + $title = trim($titleTag->filter('a')->text()); + $link = $titleTag->filter('a')->attr('href'); + if($i === 0) + { + $descr = trim($node->filter('tr > td.bodytext')->eq(3)->text()); + }else + { + $descr = trim($node->filter('tr > td.bodytext')->eq(2)->text()); + } + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $link, + $descr, + $this->gefVon, + $this->counter + ); + } catch (\InvalidArgumentException $e) + { + + } + + }); + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/BASE.php.orig b/app/Models/parserSkripte/BASE.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..fb4bad77a8b2d973416187b406e6c680376bd802 --- /dev/null +++ b/app/Models/parserSkripte/BASE.php.orig @@ -0,0 +1,39 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class BASE extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine,\App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $title = ""; + $link = ""; + $anzeigeLink = $link; + $descr = ""; + + #die($result); + + /*$this->counter++; + $this->results[] = new \App\Models\Result( + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + );*/ + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Bing.php.orig b/app/Models/parserSkripte/Bing.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..cfc249e8ca1162e7ba6e3fd87c55bce9abdf024a --- /dev/null +++ b/app/Models/parserSkripte/Bing.php.orig @@ -0,0 +1,48 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; +use Symfony\Component\DomCrawler\Crawler; + +class Bing extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $crawler = new Crawler($result); + $crawler->filter('ol#b_results > li.b_algo')->each(function (Crawler $node, $i) + { + $title = $node->filter('li h2 > a')->text(); + $link = $node->filter('li h2 > a')->attr('href'); + $anzeigeLink = $link; + $descr = $node->filter('li div > p')->text(); + + #die($result); + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } ); + + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Dmoznebel.php.orig b/app/Models/parserSkripte/Dmoznebel.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..2c531e18603bd6ba9cb3ef555c974d821867d493 --- /dev/null +++ b/app/Models/parserSkripte/Dmoznebel.php.orig @@ -0,0 +1,40 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Dmoznebel extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $title = ""; + $link = ""; + $anzeigeLink = $link; + $descr = ""; + + die($result); + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Dmoznebel_int.php.orig b/app/Models/parserSkripte/Dmoznebel_int.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..33936e9900640b1d31080c9d2057286bc89fd101 --- /dev/null +++ b/app/Models/parserSkripte/Dmoznebel_int.php.orig @@ -0,0 +1,40 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Dmoznebel_int extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $title = ""; + $link = ""; + $anzeigeLink = $link; + $descr = ""; + + die($result); + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Ebay.php.orig b/app/Models/parserSkripte/Ebay.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..0397403ec5d6bbae2e392e1a91a7fd30716cbcd9 --- /dev/null +++ b/app/Models/parserSkripte/Ebay.php.orig @@ -0,0 +1,56 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Ebay extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//rss/channel/item'); + $count = 0; + foreach($results as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"description"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Exalead.php.orig b/app/Models/parserSkripte/Exalead.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..d82ca1e53891f3dc1709e22a7f48f8de2c5658c9 --- /dev/null +++ b/app/Models/parserSkripte/Exalead.php.orig @@ -0,0 +1,39 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Exalead extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $title = ""; + $link = ""; + $anzeigeLink = $link; + $descr = ""; + + #die($result); + + /*$this->counter++; + $this->results[] = new \App\Models\Result( + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); */ + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Fastbot.php.orig b/app/Models/parserSkripte/Fastbot.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..49dab80458f1bb10d1a2582872a841d5d37dcc85 --- /dev/null +++ b/app/Models/parserSkripte/Fastbot.php.orig @@ -0,0 +1,52 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Fastbot extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + if ( strpos($this->urlEncode($metager->getEingabe()), "%") !== FALSE ) + { + $this->enabled = false; + return null; + } + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $result = utf8_encode($result); + $counter = 0; + foreach( explode("\n", $result) as $line ) + { + $line = trim($line); + if( strlen($line) > 0 ){ + # Hier bekommen wir jedes einzelne Ergebnis + $result = explode("|:|", $line); + $link = $result[1]; + $link = substr($link, strpos($link, "href=\"") + 6); + $link = substr($link, 0, strpos($link, "\"")); + $counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + trim(strip_tags($result[1])), + $link, + $result[3], + $result[2], + $this->gefVon, + $counter + ); + } + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Goyax.php.orig b/app/Models/parserSkripte/Goyax.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..7a5281cacb2c8d5c836d34b0b8ca4ea01c0999cd --- /dev/null +++ b/app/Models/parserSkripte/Goyax.php.orig @@ -0,0 +1,51 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Goyax extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + die($result); + $results = trim($result); + + foreach( explode("\n", $results) as $result ) + { + + $res = explode("|", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = $res[0]; + $link = $res[2]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Mg_hochsch_de.php.orig b/app/Models/parserSkripte/Mg_hochsch_de.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..175848d87c6a4cb008816b761cbb03db41c9a0c9 --- /dev/null +++ b/app/Models/parserSkripte/Mg_hochsch_de.php.orig @@ -0,0 +1,50 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Mg_hochsch_de extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + die($result); + $results = trim($result); + + foreach( explode("\n", $results) as $result ) + { + $res = explode("|", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = $res[0]; + $link = $res[2]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Minisucher.php.orig b/app/Models/parserSkripte/Minisucher.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..27886382c2143fc3706176a0476329f2751b5dd8 --- /dev/null +++ b/app/Models/parserSkripte/Minisucher.php.orig @@ -0,0 +1,60 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class Minisucher extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine,\App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($content) +======= + public function loadResults (String $content) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $content = simplexml_load_string($content); + if(!$content) + { + return; + } + $results = $content->xpath('//response/result/doc'); + + $string = ""; + + $counter = 0; + foreach($results as $result) + { + $counter++; + $result = simplexml_load_string($result->saveXML()); + $title = $result->xpath('//doc/arr[@name="title"]/str')[0]->__toString(); + $link = $result->xpath('//doc/str[@name="url"]')[0]->__toString(); + $anzeigeLink = $link; + $descr = ""; + $descriptions = $content->xpath("//response/lst[@name='highlighting']/lst[@name='$link']/arr[@name='content']/str"); + foreach($descriptions as $description) + { + $descr .= $description->__toString(); + } + $descr = strip_tags($descr); + $provider = $result->xpath('//doc/str[@name="subcollection"]')[0]->__toString(); + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $link, + $descr, + $this->gefVon, + $counter + ); + } + + + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Mnogosearch.php.orig b/app/Models/parserSkripte/Mnogosearch.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..7820714ff772302b03b58b79d1f89de613c468d0 --- /dev/null +++ b/app/Models/parserSkripte/Mnogosearch.php.orig @@ -0,0 +1,55 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; +use Symfony\Component\DomCrawler\Crawler; + +class Mnogosearch extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $counter = 0; + $crawler = new Crawler($result); + $crawler->filter('table[width=600]') + ->reduce(function (Crawler $node, $i) + { + if(strpos($node->text(), "Result pages:") !== FALSE) + { + return false; + } + }) + ->each(function(Crawler $node, $i) + { + $title = $node->filter('table > tr > td ')->eq(1)->filter('td > div')->text(); + $title = preg_replace("/\s+/si", " ", $title); + + $link = $node->filter('table > tr > td ')->eq(1)->filter('td > div > a')->attr('href'); + $anzeigeLink = $link; + $descr = $node->filter('table > tr > td ')->eq(1)->filter('td > div')->eq(1)->text(); + $this->counter++; + + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + }); + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Nebel.php.orig b/app/Models/parserSkripte/Nebel.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..aa73500da3f1b5446c0ab30b33d610350f4d3f8f --- /dev/null +++ b/app/Models/parserSkripte/Nebel.php.orig @@ -0,0 +1,46 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Nebel extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine,\App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = trim($result); + foreach( explode("\n", $results) as $result ) + { + $res = explode("|", $result); + if(sizeof($res) < 3) + continue; + $title = $res[2]; + $link = $res[0]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Onenewspage.php.orig b/app/Models/parserSkripte/Onenewspage.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..52acfe7438f7270e6a9394d792ab55a7bb98be49 --- /dev/null +++ b/app/Models/parserSkripte/Onenewspage.php.orig @@ -0,0 +1,49 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Onenewspage extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = trim($result); + + foreach( explode("\n", $results) as $result ) + { + $res = explode("|", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = $res[0]; + $link = $res[2]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Onenewspagegermany.php.orig b/app/Models/parserSkripte/Onenewspagegermany.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..c29b4adb1e6b8bdf130b0fd114d54c650ad99a1a --- /dev/null +++ b/app/Models/parserSkripte/Onenewspagegermany.php.orig @@ -0,0 +1,48 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; +use App\Models\Result; + +class Onenewspagegermany extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $counter = 0; + foreach( explode("\n", $result) as $line ) + { + $line = trim($line); + if( strlen($line) > 0 ){ + # Hier bekommen wir jedes einzelne Ergebnis + $result = explode("|", $line); + if(sizeof($result) < 3) + { + continue; + } + $counter++; + $this->results[] = new Result( + $this->engine, + trim(strip_tags($result[0])), + $result[2], + $result[2], + $result[1], + $this->gefVon, + $counter + ); + } + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Onenewspagevideo.php.orig b/app/Models/parserSkripte/Onenewspagevideo.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..25ae4fcba31f853437c66e7e298c4a5b0bba5e78 --- /dev/null +++ b/app/Models/parserSkripte/Onenewspagevideo.php.orig @@ -0,0 +1,48 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Onenewspagevideo extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = trim($result); + foreach( explode("\n", $results) as $result ) + { + $res = explode("|", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = $res[0]; + $link = $res[2]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Opencrawlastronomie.php.orig b/app/Models/parserSkripte/Opencrawlastronomie.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..b787953e19b3a9867189807ac4d492978ea70435 --- /dev/null +++ b/app/Models/parserSkripte/Opencrawlastronomie.php.orig @@ -0,0 +1,56 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Opencrawlastronomie extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//rss/channel/item'); + $count = 0; + foreach($results as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"description"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Opencrawlregengergie.php.orig b/app/Models/parserSkripte/Opencrawlregengergie.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..f0cbace76faa7ba543742fc7047d0891f46dcb3c --- /dev/null +++ b/app/Models/parserSkripte/Opencrawlregengergie.php.orig @@ -0,0 +1,56 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Opencrawlregengergie extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//rss/channel/item'); + $count = 0; + foreach($results as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"description"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Opencrawltauchen.php.orig b/app/Models/parserSkripte/Opencrawltauchen.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..48a97eab27655898e169830de670ff64bd72d9aa --- /dev/null +++ b/app/Models/parserSkripte/Opencrawltauchen.php.orig @@ -0,0 +1,55 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Opencrawltauchen extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//rss/channel/item'); + $count = 0; + foreach($results as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"description"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + + } + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Overture.php.orig b/app/Models/parserSkripte/Overture.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..8f4ad68a3cf801bf546a33dc49e7d7a60deeecb1 --- /dev/null +++ b/app/Models/parserSkripte/Overture.php.orig @@ -0,0 +1,72 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; +use Log; + +class Overture extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $result = preg_replace("/\r\n/si", "", $result); + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//Results/ResultSet[@id="inktomi"]/Listing'); + foreach($results as $result) + { + $title = $result["title"]; + $link = $result->{"ClickUrl"}->__toString(); + $anzeigeLink = $result["siteHost"]; + $descr = $result["description"]; + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + # Nun noch die Werbeergebnisse: + $ads = $content->xpath('//Results/ResultSet[@id="searchResults"]/Listing'); + foreach($ads as $ad) + { + $title = $ad["title"]; + $link = $ad->{"ClickUrl"}->__toString(); + $anzeigeLink = $ad["siteHost"]; + $descr = $ad["description"]; + $this->counter++; + $this->ads[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/OvertureAds.php.orig b/app/Models/parserSkripte/OvertureAds.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..53ccb1887df4c7211052093a7459cf15028a54d5 --- /dev/null +++ b/app/Models/parserSkripte/OvertureAds.php.orig @@ -0,0 +1,53 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class OvertureAds extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $result = preg_replace("/\r\n/si", "", $result); + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + + $ads = $content->xpath('//Results/ResultSet[@id="searchResults"]/Listing'); + foreach($ads as $ad) + { + $title = $ad["title"]; + $link = $ad->{"ClickUrl"}->__toString(); + $anzeigeLink = $ad["siteHost"]; + $descr = $ad["description"]; + $this->counter++; + $this->ads[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Qip.php.orig b/app/Models/parserSkripte/Qip.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..5ad9022baa17fe52edf421d778499afc89c1c2ff --- /dev/null +++ b/app/Models/parserSkripte/Qip.php.orig @@ -0,0 +1,50 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Qip extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + die($result); + $results = trim($result); + + foreach( explode("\n", $results) as $result ) + { + $res = explode("|", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = $res[0]; + $link = $res[2]; + $anzeigeLink = $link; + $descr = $res[1]; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Qualigo.php.orig b/app/Models/parserSkripte/Qualigo.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..d0818cd5d5505be91b45bd515be707fd9e3fa5c6 --- /dev/null +++ b/app/Models/parserSkripte/Qualigo.php.orig @@ -0,0 +1,51 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class Qualigo extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($results) +======= + public function loadResults (String $results) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($results); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//RL/RANK'); + foreach($results as $result) + { + $title = $result->{"TITLE"}->__toString(); + $link = $result->{"URL"}->__toString(); + $anzeigeLink = $result->{"ORIGURL"}->__toString(); + $descr = $result->{"ABSTRACT"}->__toString(); + $this->counter++; + $this->ads[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Similar_product.php.orig b/app/Models/parserSkripte/Similar_product.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..21872bfa1d8b57e9bd0d07e6e90cde06372bcedf --- /dev/null +++ b/app/Models/parserSkripte/Similar_product.php.orig @@ -0,0 +1,42 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Similar_product extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = json_decode($result); + + foreach($results->{"products"} as $result) + { + $title = $result->{"title"}; + $link = $result->{"product_url"}; + $anzeigeLink = $link; + $descr = $result->{"description"}; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Similar_product_ads.php.orig b/app/Models/parserSkripte/Similar_product_ads.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..bfa8c961f9c5e5d66a4c4f2ec9e6a9c7ee4ead3b --- /dev/null +++ b/app/Models/parserSkripte/Similar_product_ads.php.orig @@ -0,0 +1,49 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class Similar_product_ads extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + $tmp = $metager->getEingabe(); + $tmp = preg_replace("/\W/si", "", $tmp); + if(strlen($tmp) < 3) + { + $this->removeCurlHandle($mh); + } + } + +<<<<<<< HEAD + public function loadResults ($results) +======= + public function loadResults (String $results) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = json_decode($result); + + foreach($results->{"products"} as $result) + { + $title = $result->{"title"}; + $link = $result->{"product_url"}; + $anzeigeLink = $link; + $descr = $result->{"description"}; + + $this->counter++; + $this->ads[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Suchticker.php.orig b/app/Models/parserSkripte/Suchticker.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..b74a8de1894434f93c7f6b7c8f4b4d16aed5c1b4 --- /dev/null +++ b/app/Models/parserSkripte/Suchticker.php.orig @@ -0,0 +1,49 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Suchticker extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $results = trim($result); + + foreach( explode("\n", $results) as $result ) + { + $res = explode("';'", $result); + if(sizeof($res) < 3) + { + continue; + } + $title = trim($res[0], "'"); + $link = trim($res[1], "'"); + $anzeigeLink = $link; + $descr = trim($res[2], "'"); + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Tuhh.php.orig b/app/Models/parserSkripte/Tuhh.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..6c5c2911b12e5728db36b69625602764ff284175 --- /dev/null +++ b/app/Models/parserSkripte/Tuhh.php.orig @@ -0,0 +1,54 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Tuhh extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $count = 0; + foreach($content->{"entry"} as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}["href"]->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"summary"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Wikipedia.php.orig b/app/Models/parserSkripte/Wikipedia.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..23dd5f13481c65557ded7ce98fb892affa8119a9 --- /dev/null +++ b/app/Models/parserSkripte/Wikipedia.php.orig @@ -0,0 +1,35 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Wikipedia extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine,\App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $result = utf8_decode($result); + $counter = 0; + + #die($crawler); + $this->results[] = new \App\Models\Result( + $this->engine, + trim(strip_tags($result[1])), + $link, + $result[3], + $result[2], + $this->gefVon, + $counter + ); + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Witch.php.orig b/app/Models/parserSkripte/Witch.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..a5d0aea249a8342f6bb7019abf9fe7c9b079d05c --- /dev/null +++ b/app/Models/parserSkripte/Witch.php.orig @@ -0,0 +1,53 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Witch extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + $result = html_entity_decode(trim(utf8_encode($result))); + + $results = explode("\n", $result); + + array_shift($results); + foreach($results as $res) + { + + $res = explode(";", $res); + if(sizeof($res) !== 4) + { + continue; + } + $title = trim($res[0], "'"); + $link = trim($res[2], "'"); + $anzeigeLink = $link; + $descr = trim($res[1], "'"); + + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Yacy.php.orig b/app/Models/parserSkripte/Yacy.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..f203f821f18f5187a003f1b8de18e57247c6c28d --- /dev/null +++ b/app/Models/parserSkripte/Yacy.php.orig @@ -0,0 +1,53 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Yacy extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + # die($result); + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + return; + $results = $content->xpath("//rss/channel/item"); + if(!$results) + return; + foreach($results as $res) + { + $title = $res->{"title"}; + $link = $res->{"link"}; + $anzeigeLink = $link; + $descr = $res->{"description"}; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Yacyunih.php.orig b/app/Models/parserSkripte/Yacyunih.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..48258f1dfa7d9392e8af3f2bd94c2429497a7a92 --- /dev/null +++ b/app/Models/parserSkripte/Yacyunih.php.orig @@ -0,0 +1,58 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Yacyunih extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + if(!$content) + { + return; + } + $results = $content->xpath('//rss/channel/item'); + if(!$results) + return; + $count = 0; + foreach($results as $result) + { + if($count > 10) + break; + $title = $result->{"title"}->__toString(); + $link = $result->{"link"}->__toString(); + $anzeigeLink = $link; + $descr = strip_tags($result->{"description"}->__toString()); + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + $count++; + + } + + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Yandex.php.orig b/app/Models/parserSkripte/Yandex.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..ef2d7711ca002e4a64838d0a4c7bc37b1a40488f --- /dev/null +++ b/app/Models/parserSkripte/Yandex.php.orig @@ -0,0 +1,39 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Yandex extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $title = ""; + $link = ""; + $anzeigeLink = $link; + $descr = ""; + + #die($result); + + /*$this->counter++; + $this->results[] = new \App\Models\Result( + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + );*/ + } +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Zeitde.php.orig b/app/Models/parserSkripte/Zeitde.php.orig new file mode 100644 index 0000000000000000000000000000000000000000..53bfd4488858c70c768466227490c07b1279ecce --- /dev/null +++ b/app/Models/parserSkripte/Zeitde.php.orig @@ -0,0 +1,47 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Zeitde extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + +<<<<<<< HEAD + public function loadResults ($result) +======= + public function loadResults (String $result) +>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a + { + + $results = json_decode($result); + if(!$results) + return; + foreach( $results->{"matches"} as $result ) + { + $title = $result->{"title"}; + $link = $result->{"href"}; + $anzeigeLink = $link; + $descr = $result->{"snippet"}; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + + + + } +} \ No newline at end of file