MetaGer.php 41.9 KB
Newer Older
1 2 3 4
<?php
namespace App;

use App;
5
use Cache;
6 7
use Illuminate\Http\Request;
use Jenssegers\Agent\Agent;
Dominik Hebeler's avatar
Bugfix  
Dominik Hebeler committed
8
use LaravelLocalization;
9
use Log;
10
use Predis\Connection\ConnectionException;
11
use Redis;
12 13 14

class MetaGer
{
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
    # Einstellungen für die Suche
    protected $fokus;
    protected $eingabe;
    protected $q;
    protected $category;
    protected $time;
    protected $page;
    protected $lang;
    protected $cache = "";
    protected $site;
    protected $hostBlacklist   = [];
    protected $domainBlacklist = [];
    protected $stopWords       = [];
    protected $phrases         = [];
    protected $engines         = [];
    protected $results         = [];
    protected $ads             = [];
Dominik Hebeler's avatar
Dominik Hebeler committed
32
    protected $products        = [];
33 34 35
    protected $warnings        = [];
    protected $errors          = [];
    protected $addedHosts      = [];
36
    protected $startCount      = 0;
37
    protected $canCache        = false;
38 39 40 41 42 43 44 45 46
    # Daten über die Abfrage
    protected $ip;
    protected $language;
    protected $agent;
    # Konfigurationseinstellungen:
    protected $sumaFile;
    protected $mobile;
    protected $resultCount;
    protected $sprueche;
47
    protected $domainsBlacklisted = [];
48
    protected $urlsBlacklisted    = [];
49 50 51
    protected $url;
    protected $languageDetect;

52 53
    public function __construct()
    {
54
        # Timer starten
55
        $this->starttime = microtime(true);
56 57

        # Versuchen Blacklists einzulesen
58 59
        if (file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt")) {
            $tmp                      = file_get_contents(config_path() . "/blacklistDomains.txt");
60
            $this->domainsBlacklisted = explode("\n", $tmp);
61 62 63
            $tmp                      = file_get_contents(config_path() . "/blacklistUrl.txt");
            $this->urlsBlacklisted    = explode("\n", $tmp);
        } else {
64
            Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
65 66
        }

67
        # Parser Skripte einhängen
68 69 70 71 72 73 74 75
        $dir = app_path() . "/Models/parserSkripte/";
        foreach (scandir($dir) as $filename) {
            $path = $dir . $filename;
            if (is_file($path)) {
                require $path;
            }
        }

76
        # Cachebarkeit testen
77 78 79 80 81 82
        try {
            Cache::has('test');
            $this->canCache = true;
        } catch (ConnectionException $e) {
            $this->canCache = false;
        }
83
    }
84

85
    # Erstellt aus den gesammelten Ergebnissen den View
86 87 88
    public function createView()
    {
        $viewResults = [];
89
        # Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View:
90
        foreach ($this->results as $result) {
91 92 93 94 95 96
            $viewResults[] = get_object_vars($result);
        }

        # Wir müssen natürlich noch den Log für die durchgeführte Suche schreiben:
        $this->createLogs();

97 98
        if ($this->fokus === "bilder") {
            switch ($this->out) {
99 100 101 102 103 104 105 106
                case 'results':
                    return view('metager3bilderresults')
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this)
107
                        ->with('browser', (new Agent())->browser());
108 109 110 111 112 113 114 115
                default:
                    return view('metager3bilder')
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this)
116
                        ->with('browser', (new Agent())->browser());
117
            }
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
        } else {
            switch ($this->out) {
                case 'results':
                    return view('metager3results')
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this)
                        ->with('browser', (new Agent())->browser());
                    break;
                case 'results-with-style':
                    return view('metager3')
                        ->with('results', $viewResults)
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this)
                        ->with('suspendheader', "yes")
                        ->with('browser', (new Agent())->browser());
                    break;
                default:
                    return view('metager3')
                        ->with('eingabe', $this->eingabe)
                        ->with('mobile', $this->mobile)
                        ->with('warnings', $this->warnings)
                        ->with('errors', $this->errors)
                        ->with('metager', $this)
                        ->with('browser', (new Agent())->browser());
                    break;
            }
151 152 153
        }
    }

Phil Höfer's avatar
Phil Höfer committed
154
    public function prepareResults()
155
    {
Phil Höfer's avatar
Phil Höfer committed
156
        $engines = $this->engines;
157

Phil Höfer's avatar
Phil Höfer committed
158 159 160 161 162 163 164 165
        // combine
        $combinedResults = $this->combineResults($engines);
        // sort
        //$sortedResults = $this->sortResults($engines);
        // filter
        // augment (boost&adgoal)
        // authorize
        // misc (WiP)
166 167
        uasort($this->results, function ($a, $b) {
            if ($a->getRank() == $b->getRank()) {
168
                return 0;
169 170
            }

171 172
            return ($a->getRank() < $b->getRank()) ? 1 : -1;
        });
173

174 175
        # Validate Results
        $newResults = [];
176 177
        foreach ($this->results as $result) {
            if ($result->isValid($this)) {
178
                $newResults[] = $result;
179 180
            }

181 182 183
        }
        $this->results = $newResults;

184 185 186 187 188 189
        # Boost implementation
        $this->results = $this->parseBoost($this->results);

        #Adgoal Implementation
        $this->results = $this->parseAdgoal($this->results);

190
        $counter   = 0;
191
        $firstRank = 0;
192

193
        if (isset($this->startForwards)) {
194
            $this->startCount = $this->startForwards;
195
        } elseif (isset($this->startBackwards)) {
196
            $this->startCount = $this->startBackwards - count($this->results) - 1;
197
        } else {
198 199 200
            $this->startCount = 0;
        }

201 202
        foreach ($this->results as $result) {
            if ($counter === 0) {
203
                $firstRank = $result->rank;
204 205
            }

206
            $counter++;
207
            $result->number = $counter + $this->startCount;
208 209 210 211
            $confidence     = 0;
            if ($firstRank > 0) {
                $confidence = $result->rank / $firstRank;
            } else {
212
                $confidence = 0;
213 214 215
            }

            if ($confidence > 0.65) {
216
                $result->color = "#FF4000";
217
            } elseif ($confidence > 0.4) {
218
                $result->color = "#FF0080";
219
            } elseif ($confidence > 0.2) {
220
                $result->color = "#C000C0";
221
            } else {
222
                $result->color = "#000000";
223 224
            }

225 226
        }

227
        if (LaravelLocalization::getCurrentLocale() === "en") {
228 229 230
            $this->ads = [];
        }

231
        $this->validated = false;
232
        if (isset($this->password)) {
233 234 235
            # Wir bieten einen bezahlten API-Zugriff an, bei dem dementsprechend die Werbung ausgeblendet wurde:
            # Aktuell ist es nur die Uni-Mainz. Deshalb überprüfen wir auch nur diese.
            $password = getenv('mainz');
236
            $eingabe  = $this->eingabe;
237
            $password = md5($eingabe . $password);
238 239
            if ($this->password === $password) {
                $this->ads       = [];
240
                $this->products  = [];
241 242 243
                $this->validated = true;
            }
        }
244 245

        if (count($this->results) <= 0) {
246
            $this->errors[] = trans('metaGer.results.failed');
247
        }
248

249
        if ($this->canCache() && isset($this->next) && count($this->next) > 0 && count($this->results) > 0) {
250
            $page       = $this->page + 1;
251
            $this->next = [
252 253 254
                'page'          => $page,
                'startForwards' => $this->results[count($this->results) - 1]->number,
                'engines'       => $this->next,
255 256
            ];
            Cache::put(md5(serialize($this->next)), serialize($this->next), 60);
257 258
        } else {
            $this->next = [];
259 260
        }

261
    }
262

Phil Höfer's avatar
Phil Höfer committed
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
    public function combineResults($engines)
    {
        foreach ($engines as $engine) {
            if (isset($engine->next)) {
                $this->next[] = $engine->next;
            }
            if (isset($engine->last)) {
                $this->last[] = $engine->last;
            }
            foreach ($engine->results as $result) {
                if ($result->valid) {
                    $this->results[] = $result;
                }
            }
            foreach ($engine->ads as $ad) {
                $this->ads[] = $ad;
            }
            foreach ($engine->products as $product) {
                $this->products[] = $product;
            }
        }

    }

287 288
    public function parseBoost($results)
    {
289 290 291 292 293 294 295 296 297 298 299 300
        foreach ($results as $result) {
            if (preg_match('/^(http[s]?\:\/\/)?(www.)?amazon\.de/', $result->anzeigeLink)) {
                if (preg_match('/\?/', $result->anzeigeLink)) {
                    $result->link .= '&tag=boostmg01-21';
                } else {
                    $result->link .= '?tag=boostmg01-21';
                }
                $result->partnershop = true;

            }
        }
        return $results;
301
    }
302

303 304
    public function parseAdgoal($results)
    {
305
        $publicKey  = getenv('adgoal_public');
306
        $privateKey = getenv('adgoal_private');
307
        if ($publicKey === false) {
308 309 310
            return $results;
        }
        $tldList = "";
311 312
        try {
            foreach ($results as $result) {
313
                $link = $result->anzeigeLink;
314
                if (strpos($link, "http") !== 0) {
315 316 317 318 319 320 321 322 323 324
                    $link = "http://" . $link;
                }
                $tldList .= parse_url($link, PHP_URL_HOST) . ",";
                $result->tld = parse_url($link, PHP_URL_HOST);
            }
            $tldList = rtrim($tldList, ",");

            # Hashwert
            $hash = md5("meta" . $publicKey . $tldList . "GER");

325
            # Query
326 327
            $query = urlencode($this->q);

328
            $link   = "https://api.smartredirect.de/api_v2/CheckForAffiliateUniversalsearchMetager.php?p=" . $publicKey . "&k=" . $hash . "&tld=" . $tldList . "&q=" . $query;
329 330 331
            $answer = json_decode(file_get_contents($link));

            # Nun müssen wir nur noch die Links für die Advertiser ändern:
332
            foreach ($answer as $el) {
333
                $hoster = $el[0];
334
                $hash   = $el[1];
335

336 337
                foreach ($results as $result) {
                    if ($hoster === $result->tld) {
338 339
                        # Hier ist ein Advertiser:
                        # Das Logo hinzufügen:
340
                        if ($result->image !== "") {
341
                            $result->logo = "https://img.smartredirect.de/logos_v2/60x30/" . $hash . ".gif";
342
                        } else {
343
                            $result->image = "https://img.smartredirect.de/logos_v2/120x60/" . $hash . ".gif";
344 345
                        }

346 347 348
                        # Den Link hinzufügen:
                        $publicKey = $publicKey;
                        $targetUrl = $result->anzeigeLink;
349
                        if (strpos($targetUrl, "http") !== 0) {
350
                            $targetUrl = "http://" . $targetUrl;
351 352 353 354 355
                        }

                        $gateHash            = md5($targetUrl . $privateKey);
                        $newLink             = "https://api.smartredirect.de/api_v2/ClickGate.php?p=" . $publicKey . "&k=" . $gateHash . "&url=" . urlencode($targetUrl) . "&q=" . $query;
                        $result->link        = $newLink;
356 357 358 359
                        $result->partnershop = true;
                    }
                }
            }
360
        } catch (\ErrorException $e) {
361 362 363 364 365
            return $results;
        }

        return $results;
    }
366

Karl's avatar
Karl committed
367 368 369 370
    /*
     * Die Erstellung der Suchmaschinen bis die Ergebnisse da sind mit Unterfunktionen
     */

371 372
    public function createSearchEngines(Request $request)
    {
373
        # Wenn es kein Suchwort gibt
374
        if (!$request->has("eingabe")) {
375
            return;
376
        }
377

378
        $xml                  = simplexml_load_file($this->sumaFile);
379
        $sumas                = $xml->xpath("suma");
380
        $enabledSearchengines = [];
381
        $overtureEnabled      = false;
382 383
        $sumaCount            = 0;

Karl's avatar
Karl committed
384
        /* Erstellt die Liste der eingestellten Sumas
385 386 387
         * Der einzige Unterschied bei angepasstem Suchfokus ist,
         * dass nicht nach den Typen einer Suma,
         * sondern den im Request mitgegebenen Typen entschieden wird.
Karl's avatar
Karl committed
388 389 390 391 392 393
         * Ansonsten wird genau das selbe geprüft und gemacht:
         * Handelt es sich um spezielle Suchmaschinen die immer an sein müssen
         * Wenn es Overture ist vermerken dass Overture an ist
         * Suma Zähler erhöhen
         * Zu Liste hinzufügen
         */
394
        foreach ($sumas as $suma) {
395 396 397 398 399 400
            if ($this->sumaIsSelected($suma, $request)
                || ($this->isBildersuche()
                    && $this->sumaIsAdsuche($suma, $overtureEnabled))
                && (!$this->sumaIsDisabled($suma))) {
                if ($this->sumaIsOverture($suma)) {
                    $overtureEnabled = true;
401
                }
402 403
                if ($this->sumaIsNotAdsuche($suma)) {
                    $sumaCount += 1;
404
                }
405
                $enabledSearchengines[] = $suma;
406 407 408 409 410
            }
        }

        # Sonderregelung für alle Suchmaschinen, die zu den Minisuchern gehören. Diese können alle gemeinsam über einen Link abgefragt werden
        $subcollections = [];
411 412

        $tmp = [];
413 414
        foreach ($enabledSearchengines as $engine) {
            if (isset($engine['minismCollection'])) {
415
                $subcollections[] = $engine['minismCollection']->__toString();
416
            } else {
417
                $tmp[] = $engine;
418 419
            }

420 421
        }
        $enabledSearchengines = $tmp;
422
        if (sizeof($subcollections) > 0) {
423
            $enabledSearchengines[] = $this->loadMiniSucher($xml, $subcollections);
424 425
        }

426
        if ($sumaCount <= 0) {
427
            $this->errors[] = trans('metaGer.settings.noneSelected');
428
        }
429

430
        $engines = [];
431

432
        # Wenn eine Sitesearch durchgeführt werden soll, überprüfen wir ob überhaupt eine der Suchmaschinen eine Sitesearch unterstützt
433
        $siteSearchFailed = $this->checkCanNotSitesearch($enabledSearchengines);
434 435

        $typeslist = [];
436
        $counter   = 0;
437

438 439
        if ($this->requestIsCached($request)) {
            $engines = $this->getCachedEngines($request);
440
        } else {
441
            $engines = $this->actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed);
442
        }
443

444
        # Wir starten alle Suchen
445 446
        foreach ($engines as $engine) {
            $engine->startSearch($this);
447
        }
448

449 450
        $this->adjustFocus($sumas, $enabledSearchengines);

451 452 453 454 455
        /* Wir warten auf die Antwort der Suchmaschinen
         * Die Verbindung steht zu diesem Zeitpunkt und auch unsere Requests wurden schon gesendet.
         * Wir zählen die Suchmaschinen, die durch den Cache beantwortet wurden:
         * $enginesToLoad zählt einerseits die Suchmaschinen auf die wir warten und andererseits
         * welche Suchmaschinen nicht rechtzeitig geantwortet haben.
456 457
         */

458
        $enginesToLoad = [];
459 460 461 462 463 464
        $canBreak      = false;
        foreach ($engines as $engine) {
            if ($engine->cached) {
                if ($overtureEnabled && ($engine->name === "overture" || $engine->name === "overtureAds")) {
                    $canBreak = true;
                }
465 466
            } else {
                $enginesToLoad[$engine->name] = false;
467 468 469 470 471 472 473 474
            }
        }

        $this->waitForResults($enginesToLoad, $overtureEnabled, $canBreak);

        $this->retrieveResults($engines);
    }

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
    # Spezielle Suchen und Sumas

    public function sumaIsSelected($suma, $request)
    {
        if ($this->fokus === "angepasst") {
            if ($request->has($suma["name"])) {
                return true;
            }
        } else {
            $types = explode(",", $suma["type"]);
            if (in_array($this->fokus, $types)) {
                return true;
            }
        }
        return false;
    }

    public function actuallyCreateSearchEngines($enabledSearchengines, $siteSearchFailed)
    {
        $engines = [];
        foreach ($enabledSearchengines as $engine) {

            # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
            if (!$siteSearchFailed
                && strlen($this->site) > 0
                && (!isset($engine['hasSiteSearch'])
                    || $engine['hasSiteSearch']->__toString() === "0")) {
                continue;
            }

            # Setze Pfad zu Parser
            $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());

            # Prüfe ob Parser vorhanden
            if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) {
510
                Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert");
511
                $this->errors[] = trans('metaGer.engines.noParser', ['engine' => $engine["name"]]);
512 513 514 515 516 517 518 519
                continue;
            }

            # Es wird versucht die Suchengine zu erstellen
            $time = microtime();
            try {
                $tmp = new $path($engine, $this);
            } catch (\ErrorException $e) {
520
                Log::error("Konnte " . $engine["name"] . " nicht abfragen. " . var_dump($e));
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
                continue;
            }

            # Ausgabe bei Debug-Modus
            if ($tmp->enabled && isset($this->debug)) {
                $this->warnings[] = $tmp->service . "   Connection_Time: " . $tmp->connection_time . "    Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000);
            }

            # Wenn die neu erstellte Engine eingeschaltet ist, wird sie der Liste hinzugefügt
            if ($tmp->isEnabled()) {
                $engines[] = $tmp;
            }
        }
        return $engines;
    }

    public function isBildersuche()
    {
        return $this->fokus !== "bilder";
    }

    public function sumaIsAdsuche($suma, $overtureEnabled)
    {
544
        $sumaName = $suma["name"]->__toString();
545
        return
546 547 548 549
            $sumaName === "qualigo"
            || $sumaName === "similar_product_ads"
            || (!$overtureEnabled && $sumaName === "overtureAds")
            || $sumaName == "rlvproduct";
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
    }

    public function sumaIsDisabled($suma)
    {
        return
        isset($suma['disabled'])
        && $suma['disabled']->__toString() === "1";
    }

    public function sumaIsOverture($suma)
    {
        return
        $suma["name"]->__toString() === "overture"
        || $suma["name"]->__toString() === "overtureAds";
    }

    public function sumaIsNotAdsuche($suma)
    {
        return
        $suma["name"]->__toString() !== "qualigo"
        && $suma["name"]->__toString() !== "similar_product_ads"
        && $suma["name"]->__toString() !== "overtureAds";
    }

    public function requestIsCached($request)
    {
        return
        $request->has('next')
        && Cache::has($request->input('next'))
        && unserialize(Cache::get($request->input('next')))['page'] > 1;
    }

    public function getCachedEngines($request)
    {
        $next       = unserialize(Cache::get($request->input('next')));
        $this->page = $next['page'];
        $engines    = $next['engines'];
        if (isset($next['startForwards'])) {
            $this->startForwards = $next['startForwards'];
        }
        if (isset($next['startBackwards'])) {
            $this->startBackwards = $next['startBackwards'];
        }
        return $engines;
    }

    public function loadMiniSucher($xml, $subcollections)
    {
        $minisucherEngine             = $xml->xpath('suma[@name="minism"]')[0];
        $subcollections               = urlencode("(" . implode(" OR ", $subcollections) . ")");
        $minisucherEngine["formData"] = str_replace("<<SUBCOLLECTIONS>>", $subcollections, $minisucherEngine["formData"]);
        $minisucherEngine["formData"] = str_replace("<<COUNT>>", sizeof($subcollections) * 10, $minisucherEngine["formData"]);
        return $minisucherEngine;
    }

605
    # Passt den Suchfokus an, falls für einen Fokus genau alle vorhandenen Sumas eingeschaltet sind
606 607
    public function adjustFocus($sumas, $enabledSearchengines)
    {
608 609
        # Findet für alle Foki die enthaltenen Sumas
        $foki = []; # [fokus][suma] => [suma]
610 611 612
        foreach ($sumas as $suma) {
            if ((!isset($suma['disabled']) || $suma['disabled'] === "") && (!isset($suma['userSelectable']) || $suma['userSelectable']->__toString() === "1")) {
                if (isset($suma['type'])) {
613 614 615
                    # Wenn foki für diese Suchmaschine angegeben sind
                    $focuses = explode(",", $suma['type']->__toString());
                    foreach ($focuses as $foc) {
616 617 618 619 620
                        if (isset($suma['minismCollection'])) {
                            $foki[$foc][] = "minism";
                        } else {
                            $foki[$foc][] = $suma['name']->__toString();
                        }
621
                    }
622
                } else {
623
                    # Wenn keine foki für diese Suchmaschine angegeben sind
624 625 626 627 628
                    if (isset($suma['minismCollection'])) {
                        $foki["andere"][] = "minism";
                    } else {
                        $foki["andere"][] = $suma['name']->__toString();
                    }
629 630 631 632
                }
            }
        }

633
        # Findet die Namen der aktuell eingeschalteten Sumas
634
        $realEngNames = [];
635
        foreach ($enabledSearchengines as $realEng) {
636
            $nam = $realEng["name"]->__toString();
637
            if ($nam !== "qualigo" && $nam !== "overtureAds" && $nam !== "rlvproduct") {
638 639 640
                $realEngNames[] = $nam;
            }
        }
641

642
        # Anschließend werden diese beiden Listen verglichen (jeweils eine der Fokuslisten für jeden Fokus), um herauszufinden ob sie vielleicht identisch sind. Ist dies der Fall, so hat der Nutzer anscheinend Suchmaschinen eines kompletten Fokus eingestellt. Der Fokus wird dementsprechend angepasst.
643
        foreach ($foki as $fok => $engines) {
644
            $isFokus      = true;
645
            $fokiEngNames = [];
646
            foreach ($engines as $eng) {
647 648
                $fokiEngNames[] = $eng;
            }
649
            # Jede eingeschaltete Engine ist für diesen Fokus geeignet
650
            foreach ($fokiEngNames as $fen) {
651 652
                # Bei Bildersuchen ist uns egal, ob alle Suchmaschinen aus dem Suchfokus eingeschaltet sind, da wir sie eh als Bildersuche anzeigen müssen
                if (!in_array($fen, $realEngNames) && $fok !== "bilder") {
653 654 655
                    $isFokus = false;
                }
            }
656
            # Jede im Fokus erwartete Engine ist auch eingeschaltet
657 658
            foreach ($realEngNames as $ren) {
                if (!in_array($ren, $fokiEngNames)) {
659 660 661
                    $isFokus = false;
                }
            }
662
            # Wenn die Listen identisch sind, setze den Fokus um
663
            if ($isFokus) {
664 665 666
                $this->fokus = $fok;
            }
        }
667
    }
668

669 670 671 672 673 674 675
    public function checkCanNotSitesearch($enabledSearchengines)
    {
        if (strlen($this->site) > 0) {
            $enginesWithSite = 0;
            foreach ($enabledSearchengines as $engine) {
                if (isset($engine['hasSiteSearch']) && $engine['hasSiteSearch']->__toString() === "1") {
                    $enginesWithSite++;
676
                }
677 678
            }
            if ($enginesWithSite === 0) {
679
                $this->errors[] = trans('metaGer.sitesearch.failed', ['site' => $this->site, 'searchLink' => $this->generateSearchLink("web", false)]);
680 681
                return true;
            } else {
682
                $this->warnings[] = trans('metaGer.sitesearch.success', ['site' => $this->site]);
683
                return false;
684 685
            }
        }
686 687 688 689
    }

    public function waitForResults($enginesToLoad, $overtureEnabled, $canBreak)
    {
690
        $loadedEngines = 0;
691
        $timeStart     = microtime(true);
692 693 694 695

        # Auf wie viele Suchmaschinen warten wir?
        $engineCount = count($enginesToLoad);

696 697
        while (true) {
            $time          = (microtime(true) - $timeStart) * 1000;
698
            $loadedEngines = intval(Redis::hlen('search.' . $this->getHashCode()));
699
            if ($overtureEnabled && (Redis::hexists('search.' . $this->getHashCode(), 'overture') || Redis::hexists('search.' . $this->getHashCode(), 'overtureAds'))) {
700
                $canBreak = true;
701
            }
702 703

            # Abbruchbedingung
704
            if ($time < 500) {
705
                if (($engineCount === 0 || $loadedEngines >= $engineCount) && $canBreak) {
706
                    break;
707 708 709
                }

            } elseif ($time >= 500 && $time < $this->time) {
710
                if (($engineCount === 0 || ($loadedEngines / ($engineCount * 1.0)) >= 0.8) && $canBreak) {
711
                    break;
712 713 714
                }

            } else {
715 716 717 718
                break;
            }
            usleep(50000);
        }
719 720 721 722 723 724 725

        # Wir haben nun so lange wie möglich gewartet. Wir registrieren nun noch die Suchmaschinen, die geanwortet haben.
        $answered = Redis::hgetall('search.' . $this->getHashCode());
        foreach ($answered as $key => $value) {
            $enginesToLoad[$key] = true;
        }
        $this->enginesToLoad = $enginesToLoad;
726
    }
727

728 729 730
    public function retrieveResults($engines)
    {
        # Von geladenen Engines die Ergebnisse holen
731 732 733
        foreach ($engines as $engine) {
            if (!$engine->loaded) {
                try {
734
                    $engine->retrieveResults($this);
735
                } catch (\ErrorException $e) {
736 737 738 739
                    Log::error($e);
                }
            }
        }
740

741
        # Nicht fertige Engines verwefen
742 743
        foreach ($engines as $engine) {
            if (!$engine->loaded) {
744
                $engine->shutdown();
745
            }
746 747 748
        }

        $this->engines = $engines;
749 750
    }

751 752 753
/*
 * Ende Suchmaschinenerstellung und Ergebniserhalt
 */
Karl's avatar
Karl committed
754

755 756
    public function parseFormData(Request $request)
    {
757
        # Sichert, dass der request in UTF-8 formatiert ist
758 759 760 761 762 763 764 765
        if ($request->input('encoding', '') !== "utf8") {
            # In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen
            $input = $request->all();
            foreach ($input as $key => $value) {
                $input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1");
            }
            $request->replace($input);
        }
766
        $this->url = $request->url();
767
        # Zunächst überprüfen wir die eingegebenen Einstellungen:
768
        # Fokus
769
        $this->fokus = $request->input('focus', 'web');
770
        # Suma-File
771
        if (App::isLocale("en")) {
772
            $this->sumaFile = config_path() . "/sumas.xml";
773
        } else {
774 775
            $this->sumaFile = config_path() . "/sumas.xml";
        }
776
        if (!file_exists($this->sumaFile)) {
777
            die(trans('metaGer.formdata.cantLoad'));
778
        }
779
        # Sucheingabe
780
        $this->eingabe = trim($request->input('eingabe', ''));
781
        if (strlen($this->eingabe) === 0) {
782
            $this->warnings[] = trans('metaGer.formdata.noSearch');
783 784
        }
        $this->q = $this->eingabe;
785
        # IP
786
        $this->ip = $request->ip();
787
        # Language
788
        if (isset($_SERVER['HTTP_LANGUAGE'])) {
789
            $this->language = $_SERVER['HTTP_LANGUAGE'];
790
        } else {
791 792 793 794
            $this->language = "";
        }
        # Category
        $this->category = $request->input('category', '');
795
        # Request Times
796 797
        $this->time = $request->input('time', 1000);
        # Page
798
        $this->page = 1;
799 800
        # Lang
        $this->lang = $request->input('lang', 'all');
801 802
        if ($this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all") {
            $this->lang = "all";
803
        }
804
        $this->agent  = new Agent();
805
        $this->mobile = $this->agent->isMobile();
806
        # Sprüche
807
        $this->sprueche = $request->input('sprueche', 'off');
808
        if ($this->sprueche === "off") {
809
            $this->sprueche = true;
810
        } else {
811
            $this->sprueche = false;
812
        }
813 814
        # Theme
        $this->theme = preg_replace("/[^[:alnum:][:space:]]/u", '', $request->input('theme', 'none'));
815 816 817
        # Ergebnisse pro Seite:
        $this->resultCount = $request->input('resultCount', '20');
        # Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
818 819
        if ($request->has('dart')) {
            $this->time       = 10000;
820
            $this->warnings[] = trans('metaGer.formdata.dartEurope');
821
        }
822 823
        if ($this->time <= 500 || $this->time > 20000) {
            $this->time = 1000;
824
        }
825 826 827 828 829 830 831 832 833
        if ($request->has('minism') && ($request->has('fportal') || $request->has('harvest'))) {
            $input    = $request->all();
            $newInput = [];
            foreach ($input as $key => $value) {
                if ($key !== "fportal" && $key !== "harvest") {
                    $newInput[$key] = $value;
                }
            }
            $request->replace($newInput);
834
        }
835 836
        if (App::isLocale("en")) {
            $this->sprueche = "off";
837
        }
838 839
        if ($this->resultCount <= 0 || $this->resultCount > 200) {
            $this->resultCount = 1000;
840
        }
841 842 843 844 845 846
        if ($request->has('onenewspageAll') || $request->has('onenewspageGermanyAll')) {
            $this->time  = 5000;
            $this->cache = "cache";
        }
        if ($request->has('tab')) {
            if ($request->input('tab') === "off") {
847
                $this->tab = "_blank";
848
            } else {
849 850
                $this->tab = "_self";
            }
851
        } else {
852 853
            $this->tab = "_blank";
        }
854
        if ($request->has('password')) {
855
            $this->password = $request->input('password');
856 857
        }
        if ($request->has('quicktips')) {
858
            $this->quicktips = false;
859
        } else {
860
            $this->quicktips = true;
861
        }
862
        $this->out = $request->input('out', "html");
863
        # Standard output format html
864
        if ($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style") {
865
            $this->out = "html";
866
        }
867
        $this->request = $request;
868 869 870 871
    }

    public function checkSpecialSearches(Request $request)
    {
872 873 874 875 876 877 878 879 880 881 882
        $query = $this->q;
        if ($request->has('site')) {
            $site = $request->input('site');
        } else {
            $site = "";
        }
        $this->searchCheckSitesearch($query, $site);
        $this->searchCheckHostBlacklist($query);
        $this->searchCheckDomainBlacklist($query);
        $this->searchCheckStopwords($query);
        $this->searchCheckPhrase($query);
883 884
    }

885
    public function searchCheckSitesearch($query, $site)
886
    {
887
        if (preg_match("/(.*)\bsite:(\S+)(.*)/si", $query, $match)) {
888 889 890
            $this->site = $match[2];
            $this->q    = $match[1] . $match[3];
        }
891 892
        if ($site !== "") {
            $this->site = $site;
893
        }
894
    }
895

896
    public function searchCheckHostBlacklist($query)
897
    {
898
        while (preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $query, $match)) {
899
            $this->hostBlacklist[] = $match[3];
900 901
            $query                 = $match[1] . $match[4];
            $this->q               = $query;
902 903 904 905 906 907 908
        }
        if (sizeof($this->hostBlacklist) > 0) {
            $hostString = "";
            foreach ($this->hostBlacklist as $host) {
                $hostString .= $host . ", ";
            }
            $hostString       = rtrim($hostString, ", ");
909
            $this->warnings[] = trans('metaGer.formdata.hostBlacklist', ['host' => $hostString]);
910
        }
911
    }
912

913
    public function searchCheckDomainBlacklist($query)
914
    {
915
        while (preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $query, $match)) {
916
            $this->domainBlacklist[] = $match[3];
917 918
            $query                   = $match[1] . $match[4];
            $this->q                 = $query;
919 920 921 922 923 924 925
        }
        if (sizeof($this->domainBlacklist) > 0) {
            $domainString = "";
            foreach ($this->domainBlacklist as $domain) {
                $domainString .= $domain . ", ";
            }
            $domainString     = rtrim($domainString, ", ");
926
            $this->warnings[] = trans('metaGer.formdata.domainBlacklist', ['domain' => $domainString]);
927
        }