Searchengine.php 11.6 KB
Newer Older
1
2
<?php

3
namespace App\Models;
4

5
use App\Jobs\Searcher;
6
use App\MetaGer;
7
8
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
9
use Log;
10
use Illuminate\Support\Facades\Redis;
11

12
abstract class Searchengine
13
{
14
    use DispatchesJobs;
15

Karl's avatar
Karl committed
16
17
18
    public $ch; # Curl Handle zum erhalten der Ergebnisse
    public $getString = ""; # Der String für die Get-Anfrage
    public $engine; # Die ursprüngliche Engine XML
Dominik Hebeler's avatar
Dominik Hebeler committed
19
20
21
22
23
24
    public $enabled  = true; # true, wenn die Suchmaschine nicht explizit disabled ist
    public $results  = []; # Die geladenen Ergebnisse
    public $ads      = []; # Die geladenen Werbungen
    public $products = []; # Die geladenen Produkte
    public $loaded   = false; # wahr, sobald die Ergebnisse geladen wurden
    public $cached   = false;
Karl's avatar
Karl committed
25
26
27
28
29
30
31
32
33
34
35
36

    public $ip; # Die IP aus der metager
    public $gefVon; # Der HTML-Code für die Verlinkung des Suchanbieters
    public $uses; # Die Anzahl der Nutzungen dieser Suchmaschine
    public $homepage; # Die Homepage dieser Suchmaschine
    public $name; # Der Name dieser Suchmaschine
    public $disabled; # Ob diese Suchmaschine ausgeschaltet ist
    public $useragent; # Der HTTP Useragent
    public $startTime; # Die Zeit der Erstellung dieser Suchmaschine
    public $hash; # Der Hash-Wert dieser Suchmaschine

    public $fp; # Wird für Artefakte benötigt
Karl's avatar
Karl committed
37
38
    public $socketNumber    = null; # Wird für Artefakte benötigt
    public $counter         = 0; # Wird eventuell für Artefakte benötigt
Karl's avatar
Karl committed
39
40
    public $write_time      = 0; # Wird eventuell für Artefakte benötigt
    public $connection_time = 0; # Wird eventuell für Artefakte benötigt
41
42
43

    public function __construct(\SimpleXMLElement $engine, MetaGer $metager)
    {
Karl's avatar
Karl committed
44
        # Versucht möglichst viele attribute aus dem engine XML zu laden
45
46
47
        foreach ($engine->attributes() as $key => $value) {
            $this->$key = $value->__toString();
        }
Karl's avatar
Karl committed
48
49

        # Standardhomepage metager.de
50
51
52
53
        if (!isset($this->homepage)) {
            $this->homepage = "https://metager.de";
        }

Karl's avatar
Karl committed
54
        # Speichert die XML der Engine
55
        $this->engine = $engine->asXML();
56

Karl's avatar
Karl committed
57
        # Cache Standarddauer 60
58
59
60
61
        if (!isset($this->cacheDuration)) {
            $this->cacheDuration = 60;
        }

Karl's avatar
Karl committed
62
63
        $this->enabled = true;

64
65
66
67
68
69
70
71
72
73
74
        # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
        if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
            # In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
            $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet.");
        } elseif (isset($this->disabled) && strtotime($this->disabled) > time()) {
            $this->enabled = false;
            return;
        }

        $this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1";
        $this->ip        = $metager->getIp();
75
        $this->gefVon    = "<a href=\"" . $this->homepage . "\" target=\"_blank\" rel=\"noopener\">" . $this->displayName . "</a>";
76
77
        $this->startTime = microtime();

Karl's avatar
Karl committed
78
        # Suchstring generieren
79
80
81
82
83
84
85
86
87
88
89
        $q = "";
        if (isset($this->hasSiteSearch) && $this->hasSiteSearch === "1") {
            if (strlen($metager->getSite()) === 0) {
                $q = $metager->getQ();
            } else {
                $q = $metager->getQ() . " site:" . $metager->getSite();
            }

        } else {
            $q = $metager->getQ();
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
90
        $this->getString  = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
91
92
        $this->hash       = md5($this->host . $this->getString . $this->port . $this->name);
        $this->resultHash = $metager->getHashCode();
93
        $this->canCache   = $metager->canCache();
Phil Höfer's avatar
Phil Höfer committed
94
        if (!isset($this->additionalHeaders)) {$this->additionalHeaders = "";}
95
96
97
98
    }

    abstract public function loadResults($result);

Karl's avatar
Karl committed
99
    # ???
100
101
    public function getNext(MetaGer $metager, $result)
    {
102
103
104

    }

Karl's avatar
Karl committed
105
    # Prüft, ob die Suche bereits gecached ist, ansonsted wird sie als Job dispatched
106
107
    public function startSearch(\App\MetaGer $metager)
    {
108
        if ($this->canCache && Cache::has($this->hash) && 0 == 1) {
109
            $this->cached = true;
110
            $this->retrieveResults($metager);
111
        } else {
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
            // We will push the confirmation of the submission to the Result Hash
            Redis::hset('search.' . $this->resultHash, $this->name, "waiting");
            // We need to submit a action that one of our workers can understand
            // The missions are submitted to a redis queue in the following string format
            // <ResultHash>;<URL to fetch>
            // With <ResultHash> being the Hash Value where the fetcher will store the result.
            // and <URL to fetch> being the full URL to the searchengine
            $url = "";
            if($this->port === "443"){
                $url = "https://";
            }else{
                $url = "http://";
            }
            $url .= $this->host . $this->getString;
            $mission = $this->resultHash . ";" . $url;
            // Submit this mission to the corresponding Redis Queue
            // Since each Searcher is dedicated to one specific search engine
            // each Searcher has it's own queue lying under the redis key <name>.queue
            Redis::rpush($this->name . ".queue", $mission);

            // If there is no Searcher process for this engine running at this time, we start one
            if(Redis::get($this->name) === NULL){
                Log::info("Starting Searcher");
                /* Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet:
                 *  Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen
                 *  nacheinander abgeschickt.
                 *  Sollen diese Parallel verarbeitet werden, muss ein anderer QUEUE_DRIVER verwendet werden.
                 *  siehe auch: https://laravel.com/docs/5.2/queues
                 */
                $this->dispatch(new Searcher($this->name));
            }
143
144
145
        }
    }

Karl's avatar
Karl committed
146
    # Ruft die Ranking-Funktion aller Ergebnisse auf.
147
    public function rank($eingabe)
148
149
    {
        foreach ($this->results as $result) {
150
            $result->rank($eingabe);
151
152
153
        }
    }

Karl's avatar
Karl committed
154
    # Magic ???
155
156
157
158
159
160
161
162
163
    private function setStatistic($key, $val)
    {

        $oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses;
        $newVal = ($oldVal + max($val, 0)) / $this->uses;
        Redis::hset($this->name, $key, $newVal);
        $this->$key = $newVal;
    }

Karl's avatar
Karl committed
164
    # Entfernt wenn gesetzt das disabled="1" für diese Suchmaschine aus der sumas.xml
165
166
167
168
169
170
    public function enable($sumaFile, $message)
    {
        Log::info($message);
        $xml = simplexml_load_file($sumaFile);
        unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']);
        $xml->saveXML($sumaFile);
Karl's avatar
Karl committed
171
        $this->enabled = true;
172
173
174
175
176
177
178
    }

    public function closeFp()
    {
        fclose($this->fp);
    }

Karl's avatar
Karl committed
179
    # Öffnet einen neuen Socket für diese Engine
180
181
182
183
184
185
186
187
188
189
190
    public function getSocket()
    {
        $number = Redis::hget('search.' . $this->hash, $this->name);
        if ($number === null) {
            die("test");
            return null;
        } else {
            return pfsockopen($this->getHost() . ":" . $this->port . "/$number", $this->port, $errstr, $errno, 1);
        }
    }

Karl's avatar
Karl committed
191
    # Fragt die Ergebnisse von Redis ab und lädt Sie
192
    public function retrieveResults(MetaGer $metager)
193
194
195
    {
        if ($this->loaded) {
            return true;
196
197
        }

198
        $body = "";
199
        if ($this->canCache && $this->cacheDuration > 0 && Cache::has($this->hash) && 0 === 1) {
200
201
202
            $body = Cache::get($this->hash);
        } elseif (Redis::hexists('search.' . $this->resultHash, $this->name)) {
            $body = Redis::hget('search.' . $this->resultHash, $this->name);
203
            if ($this->canCache && $this->cacheDuration > 0 && 0 === 1) {
204
205
206
207
208
209
                Cache::put($this->hash, $body, $this->cacheDuration);
            }

        }
        if ($body !== "") {
            $this->loadResults($body);
210
            $this->getNext($metager, $body);
211
212
213
214
215
216
217
218
219
220
221
222
223
            $this->loaded = true;
            Redis::hdel('search.' . $this->hash, $this->name);
            return true;
        } else {
            return false;
        }
    }

    public function shutdown()
    {
        Redis::del($this->host . "." . $this->socketNumber);
    }

Karl's avatar
Karl committed
224
    # Erstellt den für die Get-Anfrage genutzten Host-Link
225
226
227
228
229
230
231
232
233
234
235
236
    protected function getHost()
    {
        $return = "";
        if ($this->port === "443") {
            $return .= "tls://";
        } else {
            $return .= "tcp://";
        }
        $return .= $this->host;
        return $return;
    }

Karl's avatar
Karl committed
237
    # Erstellt den für die Get-Anfrage genutzten String
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
    private function generateGetString($query, $url, $language, $category)
    {
        $getString = "";

        # Skript:
        if (strlen($this->skript) > 0) {
            $getString .= $this->skript;
        } else {
            $getString .= "/";
        }

        # FormData:
        if (strlen($this->formData) > 0) {
            $getString .= "?" . $this->formData;
        }

        # Wir müssen noch einige Platzhalter in dem GET-String ersetzen:
Karl's avatar
Karl committed
255
        # Useragent
256
257
258
259
        if (strpos($getString, "<<USERAGENT>>")) {
            $getString = str_replace("<<USERAGENT>>", $this->urlEncode($this->useragent), $getString);
        }

Karl's avatar
Karl committed
260
        # Query
261
262
263
264
        if (strpos($getString, "<<QUERY>>")) {
            $getString = str_replace("<<QUERY>>", $this->urlEncode($query), $getString);
        }

Karl's avatar
Karl committed
265
        # IP
266
267
268
269
        if (strpos($getString, "<<IP>>")) {
            $getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString);
        }

Karl's avatar
Karl committed
270
        # Language
271
272
273
274
        if (strpos($getString, "<<LANGUAGE>>")) {
            $getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString);
        }

Karl's avatar
Karl committed
275
        # Category
276
277
278
279
        if (strpos($getString, "<<CATEGORY>>")) {
            $getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString);
        }

Karl's avatar
Karl committed
280
        # Affildata
281
282
283
284
285
286
        if (strpos($getString, "<<AFFILDATA>>")) {
            $getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString);
        }
        return $getString;
    }

Karl's avatar
Karl committed
287
    # Wandelt einen String nach aktuell gesetztem inputEncoding dieser Searchengine in URL-Format um
288
289
290
291
292
293
294
295
296
    protected function urlEncode($string)
    {
        if (isset($this->inputEncoding)) {
            return urlencode(mb_convert_encoding($string, $this->inputEncoding));
        } else {
            return urlencode($string);
        }
    }

Karl's avatar
Karl committed
297
    # Liefert Sonderdaten für Yahoo
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
    private function getOvertureAffilData($url)
    {
        $affil_data = 'ip=' . $this->ip;
        $affil_data .= '&ua=' . $this->useragent;
        if (isset($_SERVER['HTTP_X_FORWARDED_FOR'])) {
            $affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR'];
        }
        $affilDataValue = $this->urlEncode($affil_data);
        # Wir benötigen die ServeUrl:
        $serveUrl = $this->urlEncode($url);

        return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl;
    }

    public function isEnabled()
    {
        return $this->enabled;
    }
Karl's avatar
Karl committed
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

    # Artefaktmethoden

    public function getCurlInfo()
    {
        return curl_getinfo($this->ch);
    }

    public function getCurlErrors()
    {
        return curl_errno($this->ch);
    }

    public function addCurlHandle($mh)
    {
        curl_multi_add_handle($mh, $this->ch);
    }

    public function removeCurlHandle($mh)
    {
        curl_multi_remove_handle($mh, $this->ch);
    }
338
}