RequestFetcher.php 7.42 KB
Newer Older
1
2
3
4
5
6
<?php

namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
Dominik Hebeler's avatar
Dominik Hebeler committed
7
use Log;
8

Dominik Hebeler's avatar
Dominik Hebeler committed
9
class RequestFetcher extends Command
10
11
12
13
14
15
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
16
    protected $signature = 'requests:fetcher';
17
18
19
20
21
22

    /**
     * The console command description.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
23
    protected $description = 'This commands fetches requests to the installed search engines';
24
25

    protected $shouldRun = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
26
    protected $multicurl = null;
27
    protected $oldMultiCurl = null;
28
    protected $maxFetchedDocuments = 10000;
29
    protected $fetchedDocuments = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
30
    protected $proxyhost, $proxyuser, $proxypassword;
31
32
33
34
35
36
37
38
39

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
Dominik Hebeler's avatar
Dominik Hebeler committed
40
41
42
43
44
45
        $this->multicurl = curl_multi_init();
        $this->proxyhost = env("PROXY_HOST", "");
        $this->proxyport = env("PROXY_PORT", "");
        $this->proxyuser = env("PROXY_USER", "");
        $this->proxypassword = env("PROXY_PASSWORD", "");

46
47
48
49
50
51
52
53
54
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
55

56
        $pidFile = "/tmp/fetcher";
57
58
59
        pcntl_signal(SIGINT, [$this, "sig_handler"]);
        pcntl_signal(SIGTERM, [$this, "sig_handler"]);
        pcntl_signal(SIGHUP, [$this, "sig_handler"]);
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
        // Redis might not be available now
        for ($count = 0; $count < 10; $count++) {
            try {
                Redis::connection();
                break;
            } catch (\Predis\Connection\ConnectionException $e) {
                if ($count >= 9) {
                    // If its not available after 10 seconds we will exit
                    return;
                }
                sleep(1);
            }
        }

75
76
77
78
79
80
        touch($pidFile);

        if (!file_exists($pidFile)) {
            return;
        }

81
        try {
Dominik Hebeler's avatar
Dominik Hebeler committed
82
            $blocking = false;
83
            while ($this->shouldRun) {
Dominik Hebeler's avatar
Dominik Hebeler committed
84
85
86
                $status = curl_multi_exec($this->multicurl, $active);
                $currentJob = null;
                if (!$blocking) {
87
                    $currentJob = Redis::lpop(\App\MetaGer::FETCHQUEUE_KEY);
Dominik Hebeler's avatar
Dominik Hebeler committed
88
                } else {
89
                    $currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
Dominik Hebeler's avatar
Dominik Hebeler committed
90
91
                    if (!empty($currentJob)) {
                        $currentJob = $currentJob[1];
92
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
93
94
95
96
97
                }

                if (!empty($currentJob)) {
                    $currentJob = json_decode($currentJob, true);
                    $ch = $this->getCurlHandle($currentJob);
98
99
100
101
102
                    if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
                        $this->shouldRun = false;
                        Log::error("Couldn't add Handle to multicurl");
                        break;
                    }
103
104
105
106
107
                    $this->fetchedDocuments++;
                    if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
                        Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
                        $this->oldMultiCurl = $this->multicurl;
                        $this->multicurl = curl_multi_init();
108
                        $this->fetchedDocuments = 0;
109
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
110
111
112
113
                    $blocking = false;
                    $active = true;
                }

114
115
116
                $answerRead = $this->readMultiCurl($this->multicurl);
                if ($this->oldMultiCurl != null) {
                    $this->readMultiCurl($this->oldMultiCurl);
117
118
119
120
121
122
123
124
                    $messagesLeft = -1;
                    if (curl_multi_info_read($this->oldMultiCurl, $messagesLeft) === false) {
                        if ($messagesLeft = 0) {
                            Log::debug("Removing finished multicurl handle");
                            curl_multi_close($this->oldMultiCurl);
                            $this->oldMultiCurl = null;
                        }
                    }
125
                }
126

Dominik Hebeler's avatar
Dominik Hebeler committed
127
128
                if (!$active && !$answerRead) {
                    $blocking = true;
129
130
                } else {
                    usleep(50 * 1000);
131
132
133
                }
            }
        } finally {
134
            unlink($pidFile);
Dominik Hebeler's avatar
Dominik Hebeler committed
135
            curl_multi_close($this->multicurl);
136
137
138
        }
    }

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
    private function readMultiCurl($mc)
    {
        $answerRead = false;
        while (($info = curl_multi_info_read($mc)) !== false) {
            try {
                $answerRead = true;
                $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
                $infos = explode(";", $infos);
                $resulthash = $infos[0];
                $cacheDurationMinutes = intval($infos[1]);
                $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
                $body = "";

                $error = curl_error($info["handle"]);
                if (!empty($error)) {
                    Log::error($error);
                }

                if ($responseCode !== 200) {
                    Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
                } else {
                    $body = \curl_multi_getcontent($info["handle"]);
                }

                Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
                    $pipe->set($resulthash, $body);
                    $pipe->expire($resulthash, 60);
                });
            } finally {
168
                curl_close($info["handle"]);
169
170
171
172
173
174
                \curl_multi_remove_handle($mc, $info["handle"]);
            }
        }
        return $answerRead;
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
175
    private function getCurlHandle($job)
176
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
177
178
179
180
181
182
183
184
        $ch = curl_init();

        curl_setopt_array($ch, array(
            CURLOPT_URL => $job["url"],
            CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
            CURLOPT_FOLLOWLOCATION => true,
Dominik Hebeler's avatar
Dominik Hebeler committed
185
            CURLOPT_CONNECTTIMEOUT => 2,
Dominik Hebeler's avatar
Dominik Hebeler committed
186
            CURLOPT_MAXCONNECTS => 500,
Dominik Hebeler's avatar
Dominik Hebeler committed
187
188
189
            CURLOPT_LOW_SPEED_LIMIT => 50000,
            CURLOPT_LOW_SPEED_TIME => 2,
            CURLOPT_TIMEOUT => 3,
Dominik Hebeler's avatar
Dominik Hebeler committed
190
191
192
193
194
195
        ));

        if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
            curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
            curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
            curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
196
            curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
Dominik Hebeler's avatar
Dominik Hebeler committed
197
198
199
200
201
202
        }

        if (!empty($job["username"]) && !empty($job["password"])) {
            curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
        }

Dominik Hebeler's avatar
Dominik Hebeler committed
203
        if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) {
Dominik Hebeler's avatar
Dominik Hebeler committed
204
205
206
207
208
209
210
            $headers = [];
            foreach ($job["headers"] as $key => $value) {
                $headers[] = $key . ":" . $value;
            }
            # Headers are in the Form:
            # <key>:<value>;<key>:<value>
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
211
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
212
213

        return $ch;
214
215
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
216
    public function sig_handler($sig)
217
218
219
220
221
222
    {
        $this->shouldRun = false;
        echo ("Terminating Process\n");
    }

}