Searchengine.php 11.2 KB
Newer Older
1 2
<?php

3
namespace App\Models;
4

5
use App\Jobs\Searcher;
6
use App\MetaGer;
7 8
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
9
use Illuminate\Support\Facades\Redis;
10

11
abstract class Searchengine
12
{
13
    use DispatchesJobs;
14

Karl's avatar
Karl committed
15 16
    public $getString = ""; # Der String für die Get-Anfrage
    public $engine; # Die ursprüngliche Engine XML
17
    public $totalResults = 0; # How many Results the Searchengine has found
Dominik Hebeler's avatar
Dominik Hebeler committed
18 19
    public $results = []; # Die geladenen Ergebnisse
    public $ads = []; # Die geladenen Werbungen
Dominik Hebeler's avatar
Dominik Hebeler committed
20
    public $products = []; # Die geladenen Produkte
Dominik Hebeler's avatar
Dominik Hebeler committed
21 22
    public $loaded = false; # wahr, sobald die Ergebnisse geladen wurden
    public $cached = false;
Karl's avatar
Karl committed
23 24 25 26 27 28 29 30 31 32

    public $ip; # Die IP aus der metager
    public $uses; # Die Anzahl der Nutzungen dieser Suchmaschine
    public $homepage; # Die Homepage dieser Suchmaschine
    public $name; # Der Name dieser Suchmaschine
    public $disabled; # Ob diese Suchmaschine ausgeschaltet ist
    public $useragent; # Der HTTP Useragent
    public $startTime; # Die Zeit der Erstellung dieser Suchmaschine
    public $hash; # Der Hash-Wert dieser Suchmaschine

33
    private $username; # Username für HTTP-Auth (falls angegeben)
34 35
    private $password; # Passwort für HTTP-Auth (falls angegeben)

36 37
    private $headers; # Headers to add

Karl's avatar
Karl committed
38
    public $fp; # Wird für Artefakte benötigt
Dominik Hebeler's avatar
Dominik Hebeler committed
39 40 41
    public $socketNumber = null; # Wird für Artefakte benötigt
    public $counter = 0; # Wird eventuell für Artefakte benötigt
    public $write_time = 0; # Wird eventuell für Artefakte benötigt
Karl's avatar
Karl committed
42
    public $connection_time = 0; # Wird eventuell für Artefakte benötigt
43

44
    public function __construct($name, \stdClass $engine, MetaGer $metager)
45
    {
46 47
        $this->engine = $engine;
        $this->name = $name;
48

Karl's avatar
Karl committed
49
        # Cache Standarddauer 60
Dominik Hebeler's avatar
Dominik Hebeler committed
50
        $this->cacheDuration = 60;
51
        if (isset($engine->{"cache-duration"}) && $engine->{"cache-duration"} !== -1) {
Dominik Hebeler's avatar
Dominik Hebeler committed
52
            $this->cacheDuration = $engine->{"cache-duration"};
53
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
54

Dominik Hebeler's avatar
Dominik Hebeler committed
55
        $this->useragent = $metager->getUserAgent();
Dominik Hebeler's avatar
Dominik Hebeler committed
56
        $this->ip = $metager->getIp();
57
        $this->startTime = microtime();
58 59 60 61 62
        # check for http Auth
        if (!empty($this->engine->{"http-auth-credentials"}->username) && !empty($this->engine->{"http-auth-credentials"}->password)) {
            $this->username = $this->engine->{"http-auth-credentials"}->username;
            $this->password = $this->engine->{"http-auth-credentials"}->password;
        }
63

64
        $this->headers = $this->engine->{"request-header"};
65

66 67 68 69
        # Suchstring generieren
        $q = $metager->getQ();
        $filters = $metager->getSumaFile()->filter;
        foreach ($metager->getQueryFilter() as $queryFilter => $filter) {
Dominik Hebeler's avatar
Dominik Hebeler committed
70
            $filterOptions = $filters->{"query-filter"}->$queryFilter;
71 72 73
            $filterOptionsEngine = $filterOptions->sumas->{$this->name};
            $query = $filterOptionsEngine->prefix . $filter . $filterOptionsEngine->suffix;
            $q = $query . " " . $q;
74
        }
75

Dominik Hebeler's avatar
Dominik Hebeler committed
76 77
        # Parse enabled Parameter-Filter
        foreach ($metager->getParameterFilter() as $filterName => $filter) {
78 79
            $inputParameter = $filter->value;

Dominik Hebeler's avatar
Dominik Hebeler committed
80 81 82 83 84 85 86 87
            if (empty($inputParameter) || empty($filter->sumas->{$name}->values->{$inputParameter})) {
                continue;
            }
            $engineParameterKey = $filter->sumas->{$name}->{"get-parameter"};
            $engineParameterValue = $filter->sumas->{$name}->values->{$inputParameter};
            $this->engine->{"get-parameter"}->{$engineParameterKey} = $engineParameterValue;
        }

88
        $this->getString = $this->generateGetString($q);
Karl Hasselbring's avatar
Karl Hasselbring committed
89
        $this->updateHash();
90
        $this->resultHash = $metager->getSearchUid();
Dominik Hebeler's avatar
Dominik Hebeler committed
91
        $this->canCache = $metager->canCache();
92 93 94 95
    }

    abstract public function loadResults($result);

96
    # Standardimplementierung der getNext Funktion, damit diese immer verwendet werden kann
97
    public function getNext(MetaGer $metager, $result)
Karl Hasselbring's avatar
Karl Hasselbring committed
98
    { }
99

Karl's avatar
Karl committed
100
    # Prüft, ob die Suche bereits gecached ist, ansonsted wird sie als Job dispatched
101 102
    public function startSearch(\App\MetaGer $metager)
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
103
        if ($this->canCache && Cache::has($this->hash)) {
104
            $this->cached = true;
105
            $this->retrieveResults($metager);
106
        } else {
107
            $redis = Redis::connection(env('REDIS_RESULT_CONNECTION'));
108
            // We will push the confirmation of the submission to the Result Hash
109
            $redis->hset($metager->getRedisEngineResult() . $this->name, "status", "waiting");
110
            $redis->expire($metager->getRedisEngineResult() . $this->name, env('REDIS_RESULT_CACHE_DURATION'));
111

112 113 114 115 116
            // We need to submit a action that one of our workers can understand
            // The missions are submitted to a redis queue in the following string format
            // <ResultHash>;<URL to fetch>
            // With <ResultHash> being the Hash Value where the fetcher will store the result.
            // and <URL to fetch> being the full URL to the searchengine
117

118
            $url = "";
119
            if ($this->engine->port === 443) {
120
                $url = "https://";
Dominik Hebeler's avatar
Dominik Hebeler committed
121
            } else {
122 123
                $url = "http://";
            }
124 125 126
            $url .= $this->engine->host;
            if ($this->engine->port !== 80 && $this->engine->port !== 443) {
                $url .= ":" . $this->engine->port;
Dominik Hebeler's avatar
Dominik Hebeler committed
127 128
            }
            $url .= $this->getString;
Dominik Hebeler's avatar
Dominik Hebeler committed
129
            $url = base64_encode($url);
130

Dominik Hebeler's avatar
Dominik Hebeler committed
131
            $mission = $this->resultHash . ";" . $url . ";" . $metager->getTime();
132 133 134 135 136
            // Submit this mission to the corresponding Redis Queue
            // Since each Searcher is dedicated to one specific search engine
            // each Searcher has it's own queue lying under the redis key <name>.queue
            Redis::rpush($this->name . ".queue", $mission);

137 138 139 140 141 142
            // The request is not cached and will be submitted to the searchengine
            // We need to check if the number of requests to this engine are limited
            if (!empty($this->engine->{"monthly-requests"})) {
                Redis::incr("monthlyRequests:" . $this->name);
            }

143
            /**
Dominik Hebeler's avatar
Dominik Hebeler committed
144 145 146 147 148 149 150 151 152 153 154
             * We have Searcher processes running for MetaGer
             * Each Searcher is dedicated to one specific Searchengine and fetches it's results.
             * We can have multiple Searchers for each engine, if needed.
             * At this point we need to decide, whether we need to start a new Searcher process or
             * if we have enough of them running.
             * The information for that is provided through the redis system. Each running searcher
             * gives information how long it has waited to be given the last fetcher job.
             * The longer this time value is, the less frequent the search engine is used and the less
             * searcher of that type we need.
             * But if it's too low, i.e. 100ms, then the searcher is near to it's full workload and needs assistence.
             **/
155 156 157 158 159
            $needSearcher = false;
            $searcherData = Redis::hgetall($this->name . ".stats");

            // We now have an array of statistical data from the searchers
            // Each searcher has one entry in it.
Dominik Hebeler's avatar
Dominik Hebeler committed
160
            // So if it's empty, then we have currently no searcher running and
161
            // of course need to spawn a new one.
Dominik Hebeler's avatar
Dominik Hebeler committed
162
            if (sizeof($searcherData) === 0) {
163
                $needSearcher = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
164
            } else {
165 166 167 168 169
                // There we go:
                // There's at least one Fetcher running for this search engine.
                // Now we have to check if the current count is enough to fetch all the
                // searches or if it needs help.
                // Let's hardcode a minimum of 100ms between every search job.
Dominik Hebeler's avatar
Dominik Hebeler committed
170 171
                // First calculate the median of all Times
                $median = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
172
                foreach ($searcherData as $pid => $data) {
Dominik Hebeler's avatar
Dominik Hebeler committed
173 174 175 176
                    $data = explode(";", $data);
                    $median += floatval($data[1]);
                }
                $median /= sizeof($searcherData);
Dominik Hebeler's avatar
Dominik Hebeler committed
177
                if ($median < .1) {
Dominik Hebeler's avatar
Dominik Hebeler committed
178 179
                    $needSearcher = true;
                }
180
            }
Dominik Hebeler's avatar
Dominik Hebeler committed
181
            if ($needSearcher && Redis::get($this->name) !== "locked") {
Dominik Hebeler's avatar
Dominik Hebeler committed
182
                Redis::set($this->name, "locked");
183
                $this->dispatch(new Searcher($this->name, $this->username, $this->password, $this->headers));
184
            }
185 186 187
        }
    }

Karl's avatar
Karl committed
188
    # Ruft die Ranking-Funktion aller Ergebnisse auf.
189
    public function rank($eingabe)
190 191
    {
        foreach ($this->results as $result) {
192
            $result->rank($eingabe);
193 194 195
        }
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
196 197
    public function setResultHash($hash)
    {
198 199 200
        $this->resultHash = $hash;
    }

Karl Hasselbring's avatar
Karl Hasselbring committed
201 202 203 204 205
    public function updateHash()
    {
        $this->hash = md5($this->engine->host . $this->getString . $this->engine->port . $this->name);
    }

Karl's avatar
Karl committed
206
    # Fragt die Ergebnisse von Redis ab und lädt Sie
207
    public function retrieveResults(MetaGer $metager)
208 209 210
    {
        if ($this->loaded) {
            return true;
211 212
        }

213
        $body = "";
214
        $redis = Redis::connection(env('REDIS_RESULT_CONNECTION'));
215

Dominik Hebeler's avatar
Dominik Hebeler committed
216
        if ($this->canCache && $this->cacheDuration > 0 && Cache::has($this->hash)) {
217
            $body = Cache::get($this->hash);
218 219
        } elseif ($redis->hexists($metager->getRedisEngineResult() . $this->name, "response")) {
            $body = $redis->hget($metager->getRedisEngineResult() . $this->name, "response");
Dominik Hebeler's avatar
Dominik Hebeler committed
220
            if ($this->canCache && $this->cacheDuration > 0) {
221 222 223
                Cache::put($this->hash, $body, $this->cacheDuration);
            }
        }
224
        if ($body !== "" && $body !== "connected" && $body !== "waiting") {
225
            $this->loadResults($body);
226
            $this->getNext($metager, $body);
227 228 229 230 231 232 233
            $this->loaded = true;
            return true;
        } else {
            return false;
        }
    }

Karl's avatar
Karl committed
234
    # Erstellt den für die Get-Anfrage genutzten String
235
    protected function generateGetString($query)
236 237 238 239
    {
        $getString = "";

        # Skript:
240 241
        if (!empty($this->engine->path)) {
            $getString .= $this->engine->path;
242 243 244 245
        } else {
            $getString .= "/";
        }

246 247 248 249
        $getString .= "?";
        $parameter = [];
        foreach ($this->engine->{"get-parameter"} as $key => $value) {
            $parameter[] = $this->urlEncode($key) . "=" . $this->urlEncode($value);
250
        }
251
        $getString .= implode("&", $parameter);
252

253 254
        # Append the Query String
        $getString .= "&" . $this->engine->{"query-parameter"} . "=" . $this->urlEncode($query);
255

Karl Hasselbring's avatar
Karl Hasselbring committed
256 257
        $getString .= $this->getDynamicParamsString();

258 259 260
        return $getString;
    }

Karl's avatar
Karl committed
261
    # Wandelt einen String nach aktuell gesetztem inputEncoding dieser Searchengine in URL-Format um
262 263 264 265 266 267 268 269
    protected function urlEncode($string)
    {
        if (isset($this->inputEncoding)) {
            return urlencode(mb_convert_encoding($string, $this->inputEncoding));
        } else {
            return urlencode($string);
        }
    }
Karl Hasselbring's avatar
Karl Hasselbring committed
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286

    private function getDynamicParamsString()
    {
        $paramString = "";

        $params = $this->getDynamicParams();
        foreach ($params as $key => $value) {
            $paramString .= sprintf("&%s=%s", urlencode($key), urlencode($value));
        }

        return $paramString;
    }

    protected function getDynamicParams()
    {
        return [];
    }
287
}