RequestFetcher.php 7.72 KB
Newer Older
1
2
3
4
5
6
<?php

namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
Dominik Hebeler's avatar
Dominik Hebeler committed
7
use Log;
8

Dominik Hebeler's avatar
Dominik Hebeler committed
9
class RequestFetcher extends Command
10
11
12
13
14
15
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
16
    protected $signature = 'requests:fetcher';
17
18
19
20
21
22

    /**
     * The console command description.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
23
    protected $description = 'This commands fetches requests to the installed search engines';
24
25

    protected $shouldRun = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
26
    protected $multicurl = null;
27
    protected $oldMultiCurl = null;
28
    protected $maxFetchedDocuments = 10000;
29
    protected $fetchedDocuments = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
30
    protected $proxyhost, $proxyuser, $proxypassword;
31
32
33
34
35
36
37
38
39

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
Dominik Hebeler's avatar
Dominik Hebeler committed
40
41
42
43
44
45
        $this->multicurl = curl_multi_init();
        $this->proxyhost = env("PROXY_HOST", "");
        $this->proxyport = env("PROXY_PORT", "");
        $this->proxyuser = env("PROXY_USER", "");
        $this->proxypassword = env("PROXY_PASSWORD", "");

46
47
48
49
50
51
52
53
54
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
55

56
        $pidFile = "/tmp/fetcher";
57
58
59
        pcntl_signal(SIGINT, [$this, "sig_handler"]);
        pcntl_signal(SIGTERM, [$this, "sig_handler"]);
        pcntl_signal(SIGHUP, [$this, "sig_handler"]);
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
        // Redis might not be available now
        for ($count = 0; $count < 10; $count++) {
            try {
                Redis::connection();
                break;
            } catch (\Predis\Connection\ConnectionException $e) {
                if ($count >= 9) {
                    // If its not available after 10 seconds we will exit
                    return;
                }
                sleep(1);
            }
        }

75
76
77
78
79
80
        touch($pidFile);

        if (!file_exists($pidFile)) {
            return;
        }

81
        try {
Dominik Hebeler's avatar
Dominik Hebeler committed
82
            $blocking = false;
83
            while ($this->shouldRun) {
Dominik Hebeler's avatar
Dominik Hebeler committed
84
                $status = curl_multi_exec($this->multicurl, $active);
85
                $currentJobs = [];
Dominik Hebeler's avatar
Dominik Hebeler committed
86
                if (!$blocking) {
87
88
89
90
91
                    $elements = Redis::pipeline(function($redis){
                        $redis->lrange(\App\MetaGer::FETCHQUEUE_KEY, 0, -1);
                        $redis->del(\App\MetaGer::FETCHQUEUE_KEY);
                    });
                    $currentJobs = $elements[0];
Dominik Hebeler's avatar
Dominik Hebeler committed
92
                } else {
93
                    $currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
Dominik Hebeler's avatar
Dominik Hebeler committed
94
                    if (!empty($currentJob)) {
95
                        $currentJobs[] = $currentJob[1];
96
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
97
98
                }

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
                if (sizeof($currentJobs) > 0) {
                    foreach($currentJobs as $currentJob){
                        $currentJob = json_decode($currentJob, true);
                        $ch = $this->getCurlHandle($currentJob);
                        if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
                            $this->shouldRun = false;
                            Log::error("Couldn't add Handle to multicurl");
                            break;
                        }
                        $this->fetchedDocuments++;
                        if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
                            Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
                            $this->oldMultiCurl = $this->multicurl;
                            $this->multicurl = curl_multi_init();
                            $this->fetchedDocuments = 0;
                        }
                        $blocking = false;
                        $active = true;
117
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
118
119
                }

120
121
122
                $answerRead = $this->readMultiCurl($this->multicurl);
                if ($this->oldMultiCurl != null) {
                    $this->readMultiCurl($this->oldMultiCurl);
123
124
125
126
127
128
129
130
                    $messagesLeft = -1;
                    if (curl_multi_info_read($this->oldMultiCurl, $messagesLeft) === false) {
                        if ($messagesLeft = 0) {
                            Log::debug("Removing finished multicurl handle");
                            curl_multi_close($this->oldMultiCurl);
                            $this->oldMultiCurl = null;
                        }
                    }
131
                }
132

Dominik Hebeler's avatar
Dominik Hebeler committed
133
134
                if (!$active && !$answerRead) {
                    $blocking = true;
135
136
                } else {
                    usleep(50 * 1000);
137
138
139
                }
            }
        } finally {
140
            unlink($pidFile);
Dominik Hebeler's avatar
Dominik Hebeler committed
141
            curl_multi_close($this->multicurl);
142
143
144
        }
    }

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    private function readMultiCurl($mc)
    {
        $answerRead = false;
        while (($info = curl_multi_info_read($mc)) !== false) {
            try {
                $answerRead = true;
                $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
                $infos = explode(";", $infos);
                $resulthash = $infos[0];
                $cacheDurationMinutes = intval($infos[1]);
                $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
                $body = "";

                $error = curl_error($info["handle"]);
                if (!empty($error)) {
                    Log::error($error);
                }

                if ($responseCode !== 200) {
                    Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
                } else {
                    $body = \curl_multi_getcontent($info["handle"]);
                }

                Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
                    $pipe->set($resulthash, $body);
                    $pipe->expire($resulthash, 60);
                });
            } finally {
                \curl_multi_remove_handle($mc, $info["handle"]);
            }
        }
        return $answerRead;
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
180
    private function getCurlHandle($job)
181
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
182
183
184
185
186
187
188
189
        $ch = curl_init();

        curl_setopt_array($ch, array(
            CURLOPT_URL => $job["url"],
            CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
            CURLOPT_FOLLOWLOCATION => true,
Dominik Hebeler's avatar
Dominik Hebeler committed
190
            CURLOPT_CONNECTTIMEOUT => 2,
Dominik Hebeler's avatar
Dominik Hebeler committed
191
            CURLOPT_MAXCONNECTS => 500,
Dominik Hebeler's avatar
Dominik Hebeler committed
192
            CURLOPT_LOW_SPEED_LIMIT => 50000,
Dominik Hebeler's avatar
Dominik Hebeler committed
193
194
            CURLOPT_LOW_SPEED_TIME => 5,
            CURLOPT_TIMEOUT => 7,
Dominik Hebeler's avatar
Dominik Hebeler committed
195
196
197
198
199
200
        ));

        if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
            curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
            curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
            curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
201
            curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
Dominik Hebeler's avatar
Dominik Hebeler committed
202
203
204
205
206
207
        }

        if (!empty($job["username"]) && !empty($job["password"])) {
            curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
        }

Dominik Hebeler's avatar
Dominik Hebeler committed
208
        if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) {
Dominik Hebeler's avatar
Dominik Hebeler committed
209
210
211
212
213
214
215
            $headers = [];
            foreach ($job["headers"] as $key => $value) {
                $headers[] = $key . ":" . $value;
            }
            # Headers are in the Form:
            # <key>:<value>;<key>:<value>
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
216
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
217
218

        return $ch;
219
220
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
221
    public function sig_handler($sig)
222
223
224
225
226
227
    {
        $this->shouldRun = false;
        echo ("Terminating Process\n");
    }

}