Searchengine.php 12.7 KB
Newer Older
1
2
<?php

3
namespace App\Models;
4

5
use App\Jobs\Searcher;
6
use App\MetaGer;
7
8
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
9
use Log;
10
use Illuminate\Support\Facades\Redis;
11

12
abstract class Searchengine
13
{
14
    use DispatchesJobs;
15

Karl's avatar
Karl committed
16
17
18
    public $ch; # Curl Handle zum erhalten der Ergebnisse
    public $getString = ""; # Der String für die Get-Anfrage
    public $engine; # Die ursprüngliche Engine XML
Dominik Hebeler's avatar
Dominik Hebeler committed
19
20
21
22
23
24
    public $enabled  = true; # true, wenn die Suchmaschine nicht explizit disabled ist
    public $results  = []; # Die geladenen Ergebnisse
    public $ads      = []; # Die geladenen Werbungen
    public $products = []; # Die geladenen Produkte
    public $loaded   = false; # wahr, sobald die Ergebnisse geladen wurden
    public $cached   = false;
Karl's avatar
Karl committed
25
26
27
28
29
30
31
32
33
34
35
36

    public $ip; # Die IP aus der metager
    public $gefVon; # Der HTML-Code für die Verlinkung des Suchanbieters
    public $uses; # Die Anzahl der Nutzungen dieser Suchmaschine
    public $homepage; # Die Homepage dieser Suchmaschine
    public $name; # Der Name dieser Suchmaschine
    public $disabled; # Ob diese Suchmaschine ausgeschaltet ist
    public $useragent; # Der HTTP Useragent
    public $startTime; # Die Zeit der Erstellung dieser Suchmaschine
    public $hash; # Der Hash-Wert dieser Suchmaschine

    public $fp; # Wird für Artefakte benötigt
Karl's avatar
Karl committed
37
38
    public $socketNumber    = null; # Wird für Artefakte benötigt
    public $counter         = 0; # Wird eventuell für Artefakte benötigt
Karl's avatar
Karl committed
39
40
    public $write_time      = 0; # Wird eventuell für Artefakte benötigt
    public $connection_time = 0; # Wird eventuell für Artefakte benötigt
41
42
43

    public function __construct(\SimpleXMLElement $engine, MetaGer $metager)
    {
Karl's avatar
Karl committed
44
        # Versucht möglichst viele attribute aus dem engine XML zu laden
45
46
47
        foreach ($engine->attributes() as $key => $value) {
            $this->$key = $value->__toString();
        }
Karl's avatar
Karl committed
48
49

        # Standardhomepage metager.de
50
51
52
53
        if (!isset($this->homepage)) {
            $this->homepage = "https://metager.de";
        }

Karl's avatar
Karl committed
54
        # Speichert die XML der Engine
55
        $this->engine = $engine->asXML();
56

Karl's avatar
Karl committed
57
        # Cache Standarddauer 60
58
59
60
61
        if (!isset($this->cacheDuration)) {
            $this->cacheDuration = 60;
        }

Karl's avatar
Karl committed
62
63
        $this->enabled = true;

64
65
66
67
68
69
70
71
72
73
74
        # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
        if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
            # In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
            $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet.");
        } elseif (isset($this->disabled) && strtotime($this->disabled) > time()) {
            $this->enabled = false;
            return;
        }

        $this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1";
        $this->ip        = $metager->getIp();
75
        $this->gefVon    = "<a href=\"" . $this->homepage . "\" target=\"_blank\" rel=\"noopener\">" . $this->displayName . "</a>";
76
77
        $this->startTime = microtime();

Karl's avatar
Karl committed
78
        # Suchstring generieren
79
80
81
82
83
84
85
86
87
88
89
        $q = "";
        if (isset($this->hasSiteSearch) && $this->hasSiteSearch === "1") {
            if (strlen($metager->getSite()) === 0) {
                $q = $metager->getQ();
            } else {
                $q = $metager->getQ() . " site:" . $metager->getSite();
            }

        } else {
            $q = $metager->getQ();
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
90
        $this->getString  = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
91
92
        $this->hash       = md5($this->host . $this->getString . $this->port . $this->name);
        $this->resultHash = $metager->getHashCode();
93
        $this->canCache   = $metager->canCache();
Phil Höfer's avatar
Phil Höfer committed
94
        if (!isset($this->additionalHeaders)) {$this->additionalHeaders = "";}
95
96
97
98
    }

    abstract public function loadResults($result);

Karl's avatar
Karl committed
99
    # ???
100
101
    public function getNext(MetaGer $metager, $result)
    {
102
103
104

    }

Karl's avatar
Karl committed
105
    # Prüft, ob die Suche bereits gecached ist, ansonsted wird sie als Job dispatched
106
107
    public function startSearch(\App\MetaGer $metager)
    {
108
        if ($this->canCache && Cache::has($this->hash) && 0 == 1) {
109
            $this->cached = true;
110
            $this->retrieveResults($metager);
111
        } else {
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
            // We will push the confirmation of the submission to the Result Hash
            Redis::hset('search.' . $this->resultHash, $this->name, "waiting");
            // We need to submit a action that one of our workers can understand
            // The missions are submitted to a redis queue in the following string format
            // <ResultHash>;<URL to fetch>
            // With <ResultHash> being the Hash Value where the fetcher will store the result.
            // and <URL to fetch> being the full URL to the searchengine
            $url = "";
            if($this->port === "443"){
                $url = "https://";
            }else{
                $url = "http://";
            }
            $url .= $this->host . $this->getString;
            $mission = $this->resultHash . ";" . $url;
            // Submit this mission to the corresponding Redis Queue
            // Since each Searcher is dedicated to one specific search engine
            // each Searcher has it's own queue lying under the redis key <name>.queue
            Redis::rpush($this->name . ".queue", $mission);

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
            /**
            * We have Searcher processes running for MetaGer
            * Each Searcher is dedicated to one specific Searchengine and fetches it's results.
            * We can have multiple Searchers for each engine, if needed.
            * At this point we need to decide, whether we need to start a new Searcher process or
            * if we have enough of them running.
            * The information for that is provided through the redis system. Each running searcher 
            * gives information how long it has waited to be given the last fetcher job.
            * The longer this time value is, the less frequent the search engine is used and the less
            * searcher of that type we need.
            * But if it's too low, i.e. 100ms, then the searcher is near to it's full workload and needs assistence.
            **/
            $needSearcher = false;
            $searcherData = Redis::hgetall($this->name . ".stats");

            // We now have an array of statistical data from the searchers
            // Each searcher has one entry in it.
            // So if it's empty, then we have currently no searcher running and 
            // of course need to spawn a new one.
            if(sizeof($searcherData) === 0){
                $needSearcher = true;
            }else{
                // There we go:
                // There's at least one Fetcher running for this search engine.
                // Now we have to check if the current count is enough to fetch all the
                // searches or if it needs help.
                // Let's hardcode a minimum of 100ms between every search job.
                die(var_dump($searcherData));
            }
            
            if($needSearcher){
163
164
                $this->dispatch(new Searcher($this->name));
            }
165
166
167
        }
    }

Karl's avatar
Karl committed
168
    # Ruft die Ranking-Funktion aller Ergebnisse auf.
169
    public function rank($eingabe)
170
171
    {
        foreach ($this->results as $result) {
172
            $result->rank($eingabe);
173
174
175
        }
    }

Karl's avatar
Karl committed
176
    # Magic ???
177
178
179
180
181
182
183
184
185
    private function setStatistic($key, $val)
    {

        $oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses;
        $newVal = ($oldVal + max($val, 0)) / $this->uses;
        Redis::hset($this->name, $key, $newVal);
        $this->$key = $newVal;
    }

Karl's avatar
Karl committed
186
    # Entfernt wenn gesetzt das disabled="1" für diese Suchmaschine aus der sumas.xml
187
188
189
190
191
192
    public function enable($sumaFile, $message)
    {
        Log::info($message);
        $xml = simplexml_load_file($sumaFile);
        unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']);
        $xml->saveXML($sumaFile);
Karl's avatar
Karl committed
193
        $this->enabled = true;
194
195
196
197
198
199
200
    }

    public function closeFp()
    {
        fclose($this->fp);
    }

Karl's avatar
Karl committed
201
    # Öffnet einen neuen Socket für diese Engine
202
203
204
205
206
207
208
209
210
211
212
    public function getSocket()
    {
        $number = Redis::hget('search.' . $this->hash, $this->name);
        if ($number === null) {
            die("test");
            return null;
        } else {
            return pfsockopen($this->getHost() . ":" . $this->port . "/$number", $this->port, $errstr, $errno, 1);
        }
    }

Karl's avatar
Karl committed
213
    # Fragt die Ergebnisse von Redis ab und lädt Sie
214
    public function retrieveResults(MetaGer $metager)
215
216
217
    {
        if ($this->loaded) {
            return true;
218
219
        }

220
        $body = "";
221
        if ($this->canCache && $this->cacheDuration > 0 && Cache::has($this->hash) && 0 === 1) {
222
223
224
            $body = Cache::get($this->hash);
        } elseif (Redis::hexists('search.' . $this->resultHash, $this->name)) {
            $body = Redis::hget('search.' . $this->resultHash, $this->name);
225
            if ($this->canCache && $this->cacheDuration > 0 && 0 === 1) {
226
227
228
229
230
231
                Cache::put($this->hash, $body, $this->cacheDuration);
            }

        }
        if ($body !== "") {
            $this->loadResults($body);
232
            $this->getNext($metager, $body);
233
234
235
236
237
238
239
240
241
242
243
244
245
            $this->loaded = true;
            Redis::hdel('search.' . $this->hash, $this->name);
            return true;
        } else {
            return false;
        }
    }

    public function shutdown()
    {
        Redis::del($this->host . "." . $this->socketNumber);
    }

Karl's avatar
Karl committed
246
    # Erstellt den für die Get-Anfrage genutzten Host-Link
247
248
249
250
251
252
253
254
255
256
257
258
    protected function getHost()
    {
        $return = "";
        if ($this->port === "443") {
            $return .= "tls://";
        } else {
            $return .= "tcp://";
        }
        $return .= $this->host;
        return $return;
    }

Karl's avatar
Karl committed
259
    # Erstellt den für die Get-Anfrage genutzten String
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
    private function generateGetString($query, $url, $language, $category)
    {
        $getString = "";

        # Skript:
        if (strlen($this->skript) > 0) {
            $getString .= $this->skript;
        } else {
            $getString .= "/";
        }

        # FormData:
        if (strlen($this->formData) > 0) {
            $getString .= "?" . $this->formData;
        }

        # Wir müssen noch einige Platzhalter in dem GET-String ersetzen:
Karl's avatar
Karl committed
277
        # Useragent
278
279
280
281
        if (strpos($getString, "<<USERAGENT>>")) {
            $getString = str_replace("<<USERAGENT>>", $this->urlEncode($this->useragent), $getString);
        }

Karl's avatar
Karl committed
282
        # Query
283
284
285
286
        if (strpos($getString, "<<QUERY>>")) {
            $getString = str_replace("<<QUERY>>", $this->urlEncode($query), $getString);
        }

Karl's avatar
Karl committed
287
        # IP
288
289
290
291
        if (strpos($getString, "<<IP>>")) {
            $getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString);
        }

Karl's avatar
Karl committed
292
        # Language
293
294
295
296
        if (strpos($getString, "<<LANGUAGE>>")) {
            $getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString);
        }

Karl's avatar
Karl committed
297
        # Category
298
299
300
301
        if (strpos($getString, "<<CATEGORY>>")) {
            $getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString);
        }

Karl's avatar
Karl committed
302
        # Affildata
303
304
305
306
307
308
        if (strpos($getString, "<<AFFILDATA>>")) {
            $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString);
        }
        return $getString;
    }

Karl's avatar
Karl committed
309
    # Wandelt einen String nach aktuell gesetztem inputEncoding dieser Searchengine in URL-Format um
310
311
312
313
314
315
316
317
318
    protected function urlEncode($string)
    {
        if (isset($this->inputEncoding)) {
            return urlencode(mb_convert_encoding($string, $this->inputEncoding));
        } else {
            return urlencode($string);
        }
    }

Karl's avatar
Karl committed
319
    # Liefert Sonderdaten für Yahoo
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
    private function getOvertureAffilData($url)
    {
        $affil_data = 'ip=' . $this->ip;
        $affil_data .= '&ua=' . $this->useragent;
        if (isset($_SERVER['HTTP_X_FORWARDED_FOR'])) {
            $affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR'];
        }
        $affilDataValue = $this->urlEncode($affil_data);
        # Wir benötigen die ServeUrl:
        $serveUrl = $this->urlEncode($url);

        return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl;
    }

    public function isEnabled()
    {
        return $this->enabled;
    }
Karl's avatar
Karl committed
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359

    # Artefaktmethoden

    public function getCurlInfo()
    {
        return curl_getinfo($this->ch);
    }

    public function getCurlErrors()
    {
        return curl_errno($this->ch);
    }

    public function addCurlHandle($mh)
    {
        curl_multi_add_handle($mh, $this->ch);
    }

    public function removeCurlHandle($mh)
    {
        curl_multi_remove_handle($mh, $this->ch);
    }
360
}