Skip to content
Snippets Groups Projects
MetaGer.php 23.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • <?php
    namespace App;
    
    use Illuminate\Http\Request;
    use Jenssegers\Agent\Agent;
    use App;
    
    use App\lib\TextLanguageDetect\TextLanguageDetect;
    use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException;
    use Illuminate\Pagination\LengthAwarePaginator;
    use Illuminate\Support\Collection;
    #use \Illuminate\Pagination\Paginator;
    
    
    class MetaGer
    {
    	# Einstellungen für die Suche
    	protected $fokus;
    	protected $eingabe;
    	protected $q;
    	protected $category;
    	protected $time;
    	protected $page;
    	protected $lang;
    	protected $cache = "";
    	protected $site;
    	protected $hostBlacklist = [];
    	protected $domainBlacklist = [];
    	protected $stopWords = [];
    	protected $engines = [];
    	protected $results = [];
    
        protected $ads = [];
    
        protected $addedHosts = [];
    
    	# Daten über die Abfrage
    	protected $ip;
    	protected $language;
    	protected $agent;
    	# Konfigurationseinstellungen:
    	protected $sumaFile;
    	protected $mobile;
    	protected $resultCount;
    	protected $sprueche;
    
        protected $domainsBlacklisted = [];
        protected $urlsBlacklisted = [];
        protected $url;
    
        protected $languageDetect;
    
            $this->time = microtime();   
    
            define('CRLF', "\r\n");
            define('BUFFER_LENGTH', 8192);
    
            if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") )
            {
                # Blacklists einlesen:
                $tmp = file_get_contents(config_path() . "/blacklistDomains.txt");
                $this->domainsBlacklisted = explode("\n", $tmp);
                $tmp = file_get_contents(config_path() . "/blacklistUrl.txt");
                $this->urlsBlacklisted = explode("\n", $tmp);
            }else
            {
                Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
            }
    
    
            $this->languageDetect = new TextLanguageDetect();
            $this->languageDetect->setNameMode("2");
    
        public function rankAll ()
        {
            foreach( $this->engines as $engine )
            {
                $engine->rank($this);
            }
        }
    
    
    	public function createView()
    	{
    		$viewResults = [];
    
            # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View:
            foreach($this->results as $result)
            {
                $viewResults[] = get_object_vars($result);
            }
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
            if( $this->fokus === "bilder" )
            {
                switch ($this->out) 
                {
                    case 'results':
    
                        return view('metager3bilderresults')
                            ->with('results', $viewResults)
                            ->with('eingabe', $this->eingabe)
                            ->with('mobile', $this->mobile)
                            ->with('warnings', $this->warnings)
                            ->with('errors', $this->errors)
                            ->with('metager', $this);
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
                    default:
                        return view('metager3bilder')
                            ->with('results', $viewResults)
                            ->with('eingabe', $this->eingabe)
                            ->with('mobile', $this->mobile)
                            ->with('warnings', $this->warnings)
                            ->with('errors', $this->errors)
                            ->with('metager', $this);
                }
            }
    
    
            switch ($this->out) {
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
                case 'results':
                    return view('metager3results')
    
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this);
                    break;
                default:
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
                    return view('metager3')
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this);
    
        public function removeInvalids ()
        {
            $results = [];
            foreach($this->results as $result)
            {
                if($result->isValid($this))
                    $results[] = $result;
            }
            #$this->results = $results;
        }
    
    
    	public function combineResults ()
    	{
    		foreach($this->engines as $engine)
    		{
    
                foreach($engine->results as $result)
                {
                    if($result->valid)
                        $this->results[] = $result;
                }
                foreach($engine->ads as $ad)
                {
                    $this->ads[] = $ad;
                }
    
            uasort($this->results, function($a, $b){
                if($a->getRank() == $b->getRank())
                    return 0;
                return ($a->getRank() < $b->getRank()) ? 1 : -1;
            });
            # Validate Results
            $newResults = [];
            foreach($this->results as $result)
            {
                if($result->isValid($this))
                    $newResults[] = $result;
            }
            $this->results = $newResults;
    
            $counter = 0;
            $firstRank = 0;
            foreach($this->results as $result)
            {
                if($counter === 0)
                    $firstRank = $result->rank;
                $counter++;
                $result->number = $counter;
                $confidence = 0;
                if($firstRank > 0)
                    $confidence = $result->rank/$firstRank;
                else
                    $confidence = 0;
                if($confidence > 0.65)
                    $result->color = "#FF4000";
                elseif($confidence > 0.4)
                    $result->color = "#FF0080";
                elseif($confidence > 0.2)
                    $result->color = "#C000C0";
                else
                    $result->color = "#000000";
            }
    
            //Get current page form url e.g. &page=6
            $currentPage = LengthAwarePaginator::resolveCurrentPage();
            $offset= $currentPage-1;
    
            //Create a new Laravel collection from the array data
            $collection = new Collection($this->results);
    
            //Define how many items we want to be visible in each page
            $perPage = $this->resultCount;
    
            //Slice the collection to get the items to display in current page
            $currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
    
            //Create our paginator and pass it to the view
            $paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
            $paginatedSearchResults->setPath('/meta/meta.ger3');
            foreach($this->request->all() as $key => $value)
            {
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
                if( $key === "out" )
    
                $paginatedSearchResults->addQuery($key, $value);
            }
    
            $this->results = $paginatedSearchResults;
    
    	}
    
    	public function createSearchEngines (Request $request)
    	{
    
    
            #die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443));
    
    
    		# Überprüfe, welche Sumas eingeschaltet sind
            $xml = simplexml_load_file($this->sumaFile);
            $enabledSearchengines = [];
            $overtureEnabled = FALSE;
    
            if($this->fokus === "angepasst")
            {
                $sumas = $xml->xpath("suma");
    
                /**$maxSumas = 30;
                $count = 0;
                foreach($sumas as $suma)
                {
                    if($maxSumas === $count)
                        break;
                    $enabledSearchengines[] = $suma;
                    $count++;
                }**/
    
                foreach($sumas as $suma)
                {
                    if($request->has($suma["service"]) 
    
                    	#|| ( $this->fokus !== "bilder" 
                    	#	&& ($suma["name"]->__toString() === "qualigo" 
                    	#		|| $suma["name"]->__toString() === "similar_product_ads" 
                    	#		|| ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
                    	#		)
                    	#	)
    
                        #|| 1 === 1  #Todo: entfernen
    
    
                    	if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
                        {
                            if($suma["name"]->__toString() === "overture")
                            {
                                $overtureEnabled = TRUE;
                            }
    
                            if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
                                $countSumas += 1;
    
                    }
                }
            }else{
                $sumas = $xml->xpath("suma");
                foreach($sumas as $suma){
                    $types = explode(",",$suma["type"]);
                    if(in_array($this->fokus, $types) 
    
                    	|| ( $this->fokus !== "bilder" 
                    		&& ($suma["name"]->__toString() === "qualigo" 
                    			|| $suma["name"]->__toString() === "similar_product_ads" 
                    			|| ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
                    			)
                    		)
    
                        if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
                        {
                            if($suma["name"]->__toString() === "overture")
                            {
                                $overtureEnabled = TRUE;
                            }
    
                            if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
                                $countSumas += 1;
    
            {
                $this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt.";
            }
    
    
    		$engines = [];
    		foreach($enabledSearchengines as $engine){
    
                if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1"))
                {
                    continue;
                }
                # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
    
    
                $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
    
    
                $time = microtime();
    
                $tmp = new $path($engine, $this);
    
    
                if($tmp->enabled && isset($this->debug))
                {
                    $this->warnings[] = $tmp->service . "   Connection_Time: " . $tmp->connection_time . "    Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000);
                }
    
    
                    $this->sockets[$tmp->name] = $tmp->fp;
    
            # Nun passiert ein elementarer Schritt.
            # Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können.
            # aber natürlich nicht ewig.
            # Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet.
            # Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten.
    
            # Jetzt lesen wir alles aus, was da ist und verwerfen den Rest:
    
            $enginesToLoad = count($engines);
            $loadedEngines = 0;
            $time = 0;
            while( true )
            {
                # Abbruchbedingung
                if($time < 500)
                {
                    if($loadedEngines >= $enginesToLoad)
                        break;
                }elseif( $time >= 500 && $time < $this->time)
                {
                    if( ($loadedEngines / ($enginesToLoad * 1.0)) >= 0.8 )
                        break;
                }else
                {
                    break;
                }
                foreach($engines as $engine)
                {
                    if(!$engine->loaded)
                    {
                        $success = $engine->retrieveResults();
                        if($engine->loaded)
                            $loadedEngines += 1;
                    }
                }
                usleep(50000);
                $time += 50;
            }
            foreach( $engines as $engine )
    
                if( !$engine->loaded )
                    $engine->shutdown();
    
            $this->engines = $engines;
    	}
    
    	public function parseFormData (Request $request)
    	{
    		if($request->input('encoding', '') !== "utf8")
    		{
    			# In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen
    			$input = $request->all();
    			foreach($input as $key => $value)
    			{
    				$input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1");
    			}
    			$request->replace($input);
    		}
    
    		# Zunächst überprüfen wir die eingegebenen Einstellungen:
            # FOKUS
            $this->fokus = trans('fokiNames.'
            	. $request->input('focus', 'web'));
            if(strpos($this->fokus,"."))
            {
                $this->fokus = trans('fokiNames.web');
            }
    
            # SUMA-FILE
            if(App::isLocale("en")){
                $this->sumaFile = config_path() . "/sumasEn.xml";
            }else{
                $this->sumaFile = config_path() . "/sumas.xml";
            }
            if(!file_exists($this->sumaFile))
            {
                die("Suma-File konnte nicht gefunden werden");
            }
    
            # Sucheingabe:
            $this->eingabe = trim($request->input('eingabe', ''));
            if(strlen($this->eingabe) === 0)
            {
                $this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.';
            }
            $this->q = $this->eingabe;
    
            # IP:
            if( isset($_SERVER['HTTP_FROM']) )
            {
                $this->ip = $_SERVER['HTTP_FROM'];
            }else
            {
                $this->ip = "127.0.0.1";
            }
            # Language:
            if( isset($_SERVER['HTTP_LANGUAGE']) )
            {
                $this->language = $_SERVER['HTTP_LANGUAGE'];
            }else
            {
                $this->language = "";
            }
            # Category
            $this->category = $request->input('category', '');
            # Request Times:
    
            $this->time = $request->input('time', 1000);
           
    
            # Page
            $this->page = $request->input('page', 1);
            # Lang
            $this->lang = $request->input('lang', 'all');
    
            if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" )
    
            {
            	$this->lang = "all";
            }
            $this->agent = new Agent();
            $this->mobile = $this->agent->isMobile();
            #Sprüche
    
            $this->sprueche = $request->input('sprueche', 'off');
            if($this->sprueche === "off" )
                $this->sprueche = true;
            else
                $this->sprueche = false;
    
            # Ergebnisse pro Seite:
            $this->resultCount = $request->input('resultCount', '20');
    
            # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
            if( $request->has('dart') )
            {
    
            	$this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt.";
            }
    
            if( $this->time <= 500 || $this->time > 20000 )
    
            }
            if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) )
            {
            	$input = $request->all();
            	$newInput = [];
            	foreach($input as $key => $value)
            	{
            		if( $key !== "fportal" && $key !== "harvest" )
            		{
            			$newInput[$key] = $value;
            		}
            	}
            	$request->replace($newInput);
            }
            if( $request->has('ebay') )
            {
    
            	$this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt.";
            }
            if( App::isLocale("en") )
            {
            	$this->sprueche = "off";
            }
            if($this->resultCount <= 0 || $this->resultCount > 200 )
            {
            	$this->resultCount = 1000;
            }
            if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') )
            {
            	$this->time = 5000;
            	$this->cache = "cache";
            }
    
            if( $request->has('tab'))
            {
                if($request->input('tab') === "1")
                {
                    $this->tab = "_blank";
                }else
                {
                    $this->tab = "_self";
                }
            }else
            {
                $this->tab = "_blank";
            }
            $this->out = $request->input('out', "html");
            if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style")
                $this->out = "html";
            $this->request = $request;
    
    	}
    
    	public function checkSpecialSearches (Request $request)
    	{
    		# Site Search:
    		if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match))
    		{
    			$this->site = $match[2];
    			$this->q = $match[1] . $match[3];
    			$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
    		}
    		# Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden
    		# Wir prüfen, ob das hier der Fall ist:
    		while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match))
    		{
    			$this->hostBlacklist[] = $match[3];
    			$this->q = $match[1] . $match[4];
    		}
    		if( sizeof($this->hostBlacklist) > 0 )
    		{
    			$hostString = "";
    			foreach($this->hostBlacklist as $host)
    			{
    				$hostString .= $host . ", ";
    			}
    			$hostString = rtrim($hostString, ", ");
    			$this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\"";
    		}
    		# Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden
    		# Wir prüfen, ob das hier der Fall ist:
    		while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match))
    		{
    			$this->domainBlacklist[] = $match[3];
    			$this->q = $match[1] . $match[4];
    		}
    		if( sizeof($this->domainBlacklist) > 0 )
    		{
    			$domainString = "";
    			foreach($this->domainBlacklist as $domain)
    			{
    				$domainString .= $domain . ", ";
    			}
    			$domainString = rtrim($domainString, ", ");
    			$this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\"";
    		}
    		
    		# Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden.
    		# Wir prüfen, ob das hier der Fall ist:
    		while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match))
    		{
    			$this->stopWords[] = $match[3];
    			$this->q = $match[1] . $match[4];
    		}
    		if( sizeof($this->stopWords) > 0 )
    		{
    			$stopwordsString = "";
    			foreach($this->stopWords as $stopword)
    			{
    				$stopwordsString .= $stopword . ", ";
    			}
    			$stopwordsString = rtrim($stopwordsString, ", ");
    			$this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\"";
    		}
    
    		# Meldung über eine Phrasensuche
    		if(preg_match("/\"(.+)\"/si", $this->q, $match)){
    			$this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\"";
    		}
    	}
    
    
        public function getFokus ()
        {
            return $this->fokus;
        }
    
        public function getIp ()
        {
            return $this->ip;
        }
    
        public function getEingabe ()
        {
            return $this->eingabe;
        }
    
    
        public function getQ ()
        {
            if(strlen($this->site) > 0)
                return $this->q . " site:" . $this->site;
            else
                return $this->q;
        }
    
    
        public function getUrl ()
        {
            return $this->url;
        }
        public function getTime ()
        {
            return $this->time;
        }
    
        public function getLanguage ()
        {
            return $this->language;
        }
    
    
        public function getLang ()
        {
            return $this->lang;
        }
    
        public function getSprueche ()
        {
            return $this->sprueche;
        }
    
    
    
        public function getSumaFile ()
        {
            return $this->sumaFile;
        }
    
    
        public function getUserHostBlacklist ()
        {
            return $this->hostBlacklist;
        }
    
        public function getUserDomainBlacklist ()
        {
            return $this->domainBlacklist;
        }
    
        public function getDomainBlacklist ()
        {
            return $this->domainsBlacklisted;
        }
    
        public function getUrlBlacklist ()
        {
            return $this->urlsBlacklisted;
        }
        public function getLanguageDetect ()
        {
            return $this->languageDetect;
        }
        public function getStopWords ()
        {
            return $this->stopWords;
        }
    
        {
            if(isset($this->addedHosts[$host]))
            {
                return $this->addedHosts[$host];
            }else
            {
                return 0;
            }
        }
    
        {
            $hash = md5($host);
            if(isset($this->addedHosts[$hash]))
            {
                $this->addedHosts[$hash] += 1;
            }else
            {
                $this->addedHosts[$hash] = 1;
            }
        }
        public function getSite()
        {
            return $this->site;
        }
    
        {
            $hash = md5($link);
            if(isset($this->addedLinks[$hash]))
            {
                return false;
            }else
            {
                $this->addedLinks[$hash] = 1;
    
                return true;
            }
        }
    
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
            $requestData = $this->request->except('page');
            $requestData['focus'] = $fokus;
            $requestData['out'] = "results";
            $link = action('MetaGerSearch@search', $requestData);
    
            return $link;
        }
    
        public function generateQuicktipLink()
        {
            $link = action('MetaGerSearch@quicktips');
    
            return $link;
        }
    
    
        {
            $host = urlencode($host);
    
            $requestData = $this->request->except(['page','out']);
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
            $requestData['eingabe'] .= " site:$host";
            $requestData['focus'] = "web";
            $link = action('MetaGerSearch@search', $requestData);
    
        {
            $host = urlencode($host);
    
            $requestData = $this->request->except(['page','out']);
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
            $requestData['eingabe'] .= " -host:$host";
            $link = action('MetaGerSearch@search', $requestData);
    
        {
            $domain = urlencode($domain);
    
            $requestData = $this->request->except(['page','out']);
    
    Dominik Hebeler's avatar
    Dominik Hebeler committed
            $requestData['eingabe'] .= " -domain:$domain";
            $link = action('MetaGerSearch@search', $requestData);
    
            return $link;
        }
    
        public function getTab ()
        {
            return $this->tab;
        }
        public function getResults ()
        {
            return $this->results;
        }
        public function popAd()
        {
            if(count($this->ads) > 0)
                return get_object_vars(array_shift($this->ads));
            else
                return null;
        }
    
        public function getImageProxyLink($link)
        {
            $requestData = [];
            $requestData["url"] = $link;
            $link = action('Pictureproxy@get', $requestData);
            return $link;
        }