From 992bf2a688a97753d49feec227775a5b3cb00ef9 Mon Sep 17 00:00:00 2001 From: Dominik Pfennig <dominik@suma-ev.de> Date: Thu, 19 May 2016 08:45:54 +0200 Subject: [PATCH] =?UTF-8?q?Bis=20zu=20dem=20Punkt,=20an=20dem=20die=20Such?= =?UTF-8?q?maschinen=20geladen=20sind,=20ist=20jegliche=20Logik=20=C3=BCbe?= =?UTF-8?q?rnommen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Http/Controllers/MetaGerSearch.php | 7 +- app/MetaGer.php | 124 ++++++++++++++---- app/Models/Searchengine.php | 46 +++++-- app/Models/parserSkripte/Fastbot.php | 9 +- .../parserSkripte/Onenewspagegermany.php | 4 +- app/Models/parserSkripte/OvertureAds.php | 21 +++ app/Models/parserSkripte/Qualigo.php | 20 +++ .../parserSkripte/Similar_product_ads.php | 26 ++++ config/.gitignore | 3 +- 9 files changed, 214 insertions(+), 46 deletions(-) create mode 100644 app/Models/parserSkripte/OvertureAds.php create mode 100644 app/Models/parserSkripte/Qualigo.php create mode 100644 app/Models/parserSkripte/Similar_product_ads.php diff --git a/app/Http/Controllers/MetaGerSearch.php b/app/Http/Controllers/MetaGerSearch.php index 6ba0b6099..79213bfe4 100644 --- a/app/Http/Controllers/MetaGerSearch.php +++ b/app/Http/Controllers/MetaGerSearch.php @@ -18,8 +18,11 @@ class MetaGerSearch extends Controller { # Mit gelieferte Formulardaten parsen und abspeichern: $metager->parseFormData($request); - # Nach Spezialsuchen überprüfen: - $metager->checkSpecialSearches($request); + if($metager->getFokus() !== "bilder" ) + { + # Nach Spezialsuchen überprüfen: + $metager->checkSpecialSearches($request); + } # Alle Suchmaschinen erstellen $metager->createSearchEngines($request); # Ergebnisse der Suchmaschinen kombinieren: diff --git a/app/MetaGer.php b/app/MetaGer.php index 40f875a77..2131b3028 100644 --- a/app/MetaGer.php +++ b/app/MetaGer.php @@ -4,6 +4,8 @@ namespace App; use Illuminate\Http\Request; use Jenssegers\Agent\Agent; use App; +use Storage; +use Log; class MetaGer { @@ -23,6 +25,7 @@ class MetaGer protected $engines = []; protected $results = []; protected $warnings = []; + protected $errors = []; # Daten über die Abfrage protected $ip; protected $language; @@ -32,10 +35,23 @@ class MetaGer protected $mobile; protected $resultCount; protected $sprueche; + protected $domainsBlacklisted = []; + protected $urlsBlacklisted = []; + protected $url; function __construct() { - #$this->eingabe = Input::get('eingabe'); + if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") ) + { + # Blacklists einlesen: + $tmp = file_get_contents(config_path() . "/blacklistDomains.txt"); + $this->domainsBlacklisted = explode("\n", $tmp); + $tmp = file_get_contents(config_path() . "/blacklistUrl.txt"); + $this->urlsBlacklisted = explode("\n", $tmp); + }else + { + Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); + } } public function createView() @@ -49,7 +65,8 @@ class MetaGer return view('metager3') ->with('results', $viewResults) ->with('eingabe', $this->eingabe) - ->with('warnings', $this->warnings); + ->with('warnings', $this->warnings) + ->with('errors', $this->errors); } public function combineResults () @@ -76,18 +93,22 @@ class MetaGer foreach($sumas as $suma) { if($request->has($suma["service"]) - # || ( FOKUS !== "bilder" - # && ($suma["name"]->__toString() === "qualigo" - # || $suma["name"]->__toString() === "similar_product_ads" - # || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) - # ) - # ) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) ){ - if($suma["name"]->__toString() === "overture") - { - $overtureEnabled = TRUE; - } - $enabledSearchengines[] = $suma; + + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } } } }else{ @@ -95,26 +116,38 @@ class MetaGer foreach($sumas as $suma){ $types = explode(",",$suma["type"]); if(in_array($this->fokus, $types) - # || ( FOKUS !== "bilder" - # && ($suma["name"]->__toString() === "qualigo" - # || $suma["name"]->__toString() === "similar_product_ads" - # || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) - # ) - # ) + || ( $this->fokus !== "bilder" + && ($suma["name"]->__toString() === "qualigo" + || $suma["name"]->__toString() === "similar_product_ads" + || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" ) + ) + ) ){ - if($suma["name"]->__toString() === "overture") - { - $overtureEnabled = TRUE; - } - $enabledSearchengines[] = $suma; + if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1")) + { + if($suma["name"]->__toString() === "overture") + { + $overtureEnabled = TRUE; + } + $enabledSearchengines[] = $suma; + } } } } + if( ( $this->fokus !== "bilder" && sizeof($enabledSearchengines) <= 3 ) || ( $this->fokus === "bilder" && sizeof($enabledSearchengines) === 0) ) + { + $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt."; + } + $engines = []; foreach($enabledSearchengines as $engine){ $path = "App\Models\parserSkripte\\" . ucfirst($engine["name"]->__toString()); - $engines[] = new $path($engine, $mh, $this->q, $this->time); + $tmp = new $path($engine, $mh, $this); + if($tmp) + { + $engines[] = $tmp; + } } # Nun führen wir die Get-Requests aus und warten auf alle Ergebnisse: @@ -133,6 +166,12 @@ class MetaGer # Und auch den Multicurl-Handle: curl_multi_close($mh); + $string = ["Curl-Timings:"]; + foreach($engines as $engine) + { + $string[] = $engine->getCurlInfo(); + } + Log::debug($string); $this->engines = $engines; } @@ -149,6 +188,7 @@ class MetaGer } $request->replace($input); } + $this->url = $request->url(); # Zunächst überprüfen wir die eingegebenen Einstellungen: # FOKUS $this->fokus = trans('fokiNames.' @@ -322,4 +362,38 @@ class MetaGer $this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\""; } } + + public function getFokus () + { + return $this->fokus; + } + + public function getIp () + { + return $this->ip; + } + + public function getEingabe () + { + return $this->eingabe; + } + + public function getUrl () + { + return $this->url; + } + public function getTime () + { + return $this->time; + } + + public function getLanguage () + { + return $this->language; + } + + public function getCategory () + { + return $this->category; + } } \ No newline at end of file diff --git a/app/Models/Searchengine.php b/app/Models/Searchengine.php index 7c8c20895..92e8ea6c4 100644 --- a/app/Models/Searchengine.php +++ b/app/Models/Searchengine.php @@ -1,13 +1,15 @@ <?php namespace App\Models; +use App\MetaGer; abstract class Searchengine { protected $ch; # Curl Handle zum erhalten der Ergebnisse + public $results = []; - function __construct(\SimpleXMLElement $engine, $mh, $query, $time) + function __construct(\SimpleXMLElement $engine, $mh, MetaGer $metager) { foreach($engine->attributes() as $key => $value){ $this->$key = $value->__toString(); @@ -20,17 +22,33 @@ abstract class Searchengine { $this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1"; } - - $this->ch = curl_init($this->generateGetString($query)); + $this->ip = $metager->getIp(); + $this->ch = curl_init($this->generateGetString($metager->getEingabe(), $metager->getUrl(), $metager->getLanguage(), $metager->getCategory()) ); curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT , $time); + curl_setopt($this->ch, CURLOPT_USERAGENT, $this->useragent); // set browser/user agent + curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, 1); // automatically follow Location: headers (ie redirects) + curl_setopt($this->ch, CURLOPT_AUTOREFERER, 1); // auto set the referer in the event of a redirect + curl_setopt($this->ch, CURLOPT_MAXREDIRS, 5); // make sure we dont get stuck in a loop + curl_setopt($this->ch, CURLOPT_CONNECTTIMEOUT , $metager->getTime()); + curl_setopt($this->ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4 ); + curl_setopt($this->ch, CURLOPT_TIMEOUT, 1); // 10s timeout time for cURL connection + if($this->port ==="443") + { + curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, true); // allow https verification if true + curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 2); // check common name and verify with host name + curl_setopt($this->ch, CURLOPT_SSLVERSION,3); // verify ssl version 2 or 3 + } $this->addCurlHandle($mh); } public abstract function loadResults(); + public function getCurlInfo() + { + return curl_getinfo($this->ch); + } + public function addCurlHandle ($mh) { curl_multi_add_handle($mh, $this->ch); @@ -41,7 +59,7 @@ abstract class Searchengine curl_multi_remove_handle($mh, $this->ch); } - private function generateGetString($query) + private function generateGetString($query, $url, $language, $category) { $getString = ""; # Protokoll: @@ -71,27 +89,27 @@ abstract class Searchengine if( strpos($getString, "<<IP>>") ) { - $getString = str_replace("<<IP>>", $this->urlEncode(IP), $getString); + $getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString); } if( strpos($getString, "<<LANGUAGE>>") ) { - $getString = str_replace("<<LANGUAGE>>", $this->urlEncode(LANGUAGE), $getString); + $getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString); } if( strpos($getString, "<<CATEGORY>>") ) { - $getString = str_replace("<<CATEGORY>>", $this->urlEncode(CATEGORY), $getString); + $getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString); } if( strpos($getString, "<<AFFILDATA>>") ) { - $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData(), $getString); + $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString); } return $getString; } - private function urlEncode($string) + protected function urlEncode($string) { if(isset($this->inputEncoding)) { @@ -102,16 +120,16 @@ abstract class Searchengine } } - private function getOvertureAffilData() + private function getOvertureAffilData($url) { - $affil_data = 'ip=' . IP; + $affil_data = 'ip=' . $this->ip; $affil_data .= '&ua=' . $this->useragent; if ( isset($_SERVER['HTTP_X_FORWARDED_FOR']) ) { $affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR']; } $affilDataValue = $this->urlEncode($affil_data); # Wir benötigen die ServeUrl: - $serveUrl = $this->urlEncode(Request::url());# + $serveUrl = $this->urlEncode($url); return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl; } diff --git a/app/Models/parserSkripte/Fastbot.php b/app/Models/parserSkripte/Fastbot.php index 118034749..01d1af472 100644 --- a/app/Models/parserSkripte/Fastbot.php +++ b/app/Models/parserSkripte/Fastbot.php @@ -7,9 +7,14 @@ class Fastbot extends Searchengine { public $results = []; - function __construct (\SimpleXMLElement $engine, $mh, $query, $time) + function __construct (\SimpleXMLElement $engine, $mh, \App\MetaGer $metager) { - parent::__construct($engine, $mh, $query, $time); + parent::__construct($engine, $mh, $metager); + if ( strpos($this->urlEncode($metager->getEingabe()), "%") !== FALSE ) + { + $this->removeCurlHandle($mh); + return FALSE; + } } public function loadResults () diff --git a/app/Models/parserSkripte/Onenewspagegermany.php b/app/Models/parserSkripte/Onenewspagegermany.php index 5e5ee0da2..7bc8eea60 100644 --- a/app/Models/parserSkripte/Onenewspagegermany.php +++ b/app/Models/parserSkripte/Onenewspagegermany.php @@ -8,9 +8,9 @@ class Onenewspagegermany extends Searchengine { public $results = []; - function __construct (\SimpleXMLElement $engine, $mh, $query, $time) + function __construct (\SimpleXMLElement $engine, $mh, $query, $time, $ip, $url) { - parent::__construct($engine, $mh, $query, $time); + parent::__construct($engine, $mh, $query, $time, $ip, $url); } public function loadResults () diff --git a/app/Models/parserSkripte/OvertureAds.php b/app/Models/parserSkripte/OvertureAds.php new file mode 100644 index 000000000..adc3936da --- /dev/null +++ b/app/Models/parserSkripte/OvertureAds.php @@ -0,0 +1,21 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class OvertureAds extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, $mh, \App\MetaGer $metager) + { + parent::__construct($engine, $mh, $metager); + } + + public function loadResults () + { + $result = utf8_encode(curl_multi_getcontent($this->ch)); + #die($result); + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Qualigo.php b/app/Models/parserSkripte/Qualigo.php new file mode 100644 index 000000000..cbdf8d203 --- /dev/null +++ b/app/Models/parserSkripte/Qualigo.php @@ -0,0 +1,20 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class Qualigo extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, $mh, \App\MetaGer $metager) + { + parent::__construct($engine, $mh, $metager); + } + + public function loadResults () + { + $result = utf8_encode(curl_multi_getcontent($this->ch)); + } + +} \ No newline at end of file diff --git a/app/Models/parserSkripte/Similar_product_ads.php b/app/Models/parserSkripte/Similar_product_ads.php new file mode 100644 index 000000000..2c2e3f2b5 --- /dev/null +++ b/app/Models/parserSkripte/Similar_product_ads.php @@ -0,0 +1,26 @@ +<?php + +namespace App\Models\parserSkripte; + +use App\Models\Searchengine; + +class Similar_product_ads extends Searchengine +{ + + function __construct (\SimpleXMLElement $engine, $mh, \App\MetaGer $metager) + { + parent::__construct($engine, $mh, $metager); + $tmp = $metager->getEingabe(); + $tmp = preg_replace("/\W/si", "", $tmp); + if(strlen($tmp) < 3) + { + $this->removeCurlHandle($mh); + } + } + + public function loadResults () + { + $result = utf8_encode(curl_multi_getcontent($this->ch)); + } + +} \ No newline at end of file diff --git a/config/.gitignore b/config/.gitignore index b81c7954b..08908afc5 100644 --- a/config/.gitignore +++ b/config/.gitignore @@ -1 +1,2 @@ -*.xml \ No newline at end of file +*.xml +*.txt \ No newline at end of file -- GitLab