RequestFetcher.php 7.37 KB
Newer Older
1
2
3
4
5
6
<?php

namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
Dominik Hebeler's avatar
Dominik Hebeler committed
7
use Log;
8

Dominik Hebeler's avatar
Dominik Hebeler committed
9
class RequestFetcher extends Command
10
11
12
13
14
15
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
16
    protected $signature = 'requests:fetcher';
17
18
19
20
21
22

    /**
     * The console command description.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
23
    protected $description = 'This commands fetches requests to the installed search engines';
24
25

    protected $shouldRun = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
26
    protected $multicurl = null;
27
    protected $oldMultiCurl = null;
28
    protected $maxFetchedDocuments = 10000;
29
    protected $fetchedDocuments = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
30
    protected $proxyhost, $proxyuser, $proxypassword;
31
32
33
34
35
36
37
38
39

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
Dominik Hebeler's avatar
Dominik Hebeler committed
40
41
42
43
44
45
        $this->multicurl = curl_multi_init();
        $this->proxyhost = env("PROXY_HOST", "");
        $this->proxyport = env("PROXY_PORT", "");
        $this->proxyuser = env("PROXY_USER", "");
        $this->proxypassword = env("PROXY_PASSWORD", "");

46
47
48
49
50
51
52
53
54
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
55

56
        $pidFile = "/tmp/fetcher";
57
58
59
        pcntl_signal(SIGINT, [$this, "sig_handler"]);
        pcntl_signal(SIGTERM, [$this, "sig_handler"]);
        pcntl_signal(SIGHUP, [$this, "sig_handler"]);
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
        // Redis might not be available now
        for ($count = 0; $count < 10; $count++) {
            try {
                Redis::connection();
                break;
            } catch (\Predis\Connection\ConnectionException $e) {
                if ($count >= 9) {
                    // If its not available after 10 seconds we will exit
                    return;
                }
                sleep(1);
            }
        }

75
76
77
78
79
80
        touch($pidFile);

        if (!file_exists($pidFile)) {
            return;
        }

81
        try {
Dominik Hebeler's avatar
Dominik Hebeler committed
82
            $blocking = false;
83
            while ($this->shouldRun) {
Dominik Hebeler's avatar
Dominik Hebeler committed
84
85
86
                $status = curl_multi_exec($this->multicurl, $active);
                $currentJob = null;
                if (!$blocking) {
87
                    $currentJob = Redis::lpop(\App\MetaGer::FETCHQUEUE_KEY);
Dominik Hebeler's avatar
Dominik Hebeler committed
88
                } else {
89
                    $currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
Dominik Hebeler's avatar
Dominik Hebeler committed
90
91
                    if (!empty($currentJob)) {
                        $currentJob = $currentJob[1];
92
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
93
94
95
96
97
                }

                if (!empty($currentJob)) {
                    $currentJob = json_decode($currentJob, true);
                    $ch = $this->getCurlHandle($currentJob);
98
99
100
101
102
                    if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
                        $this->shouldRun = false;
                        Log::error("Couldn't add Handle to multicurl");
                        break;
                    }
103
104
105
106
107
                    $this->fetchedDocuments++;
                    if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
                        Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
                        $this->oldMultiCurl = $this->multicurl;
                        $this->multicurl = curl_multi_init();
108
                        $this->fetchedDocuments = 0;
109
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
110
111
112
113
                    $blocking = false;
                    $active = true;
                }

114
115
116
                $answerRead = $this->readMultiCurl($this->multicurl);
                if ($this->oldMultiCurl != null) {
                    $this->readMultiCurl($this->oldMultiCurl);
117
118
119
120
121
122
123
124
                    $messagesLeft = -1;
                    if (curl_multi_info_read($this->oldMultiCurl, $messagesLeft) === false) {
                        if ($messagesLeft = 0) {
                            Log::debug("Removing finished multicurl handle");
                            curl_multi_close($this->oldMultiCurl);
                            $this->oldMultiCurl = null;
                        }
                    }
125
                }
126

Dominik Hebeler's avatar
Dominik Hebeler committed
127
128
                if (!$active && !$answerRead) {
                    $blocking = true;
129
130
                } else {
                    usleep(50 * 1000);
131
132
133
                }
            }
        } finally {
134
            unlink($pidFile);
Dominik Hebeler's avatar
Dominik Hebeler committed
135
            curl_multi_close($this->multicurl);
136
137
138
        }
    }

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    private function readMultiCurl($mc)
    {
        $answerRead = false;
        while (($info = curl_multi_info_read($mc)) !== false) {
            try {
                $answerRead = true;
                $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
                $infos = explode(";", $infos);
                $resulthash = $infos[0];
                $cacheDurationMinutes = intval($infos[1]);
                $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
                $body = "";

                $error = curl_error($info["handle"]);
                if (!empty($error)) {
                    Log::error($error);
                }

                if ($responseCode !== 200) {
                    Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
                } else {
                    $body = \curl_multi_getcontent($info["handle"]);
                }

                Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
                    $pipe->set($resulthash, $body);
                    $pipe->expire($resulthash, 60);
                });
            } finally {
                \curl_multi_remove_handle($mc, $info["handle"]);
            }
        }
        return $answerRead;
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
174
    private function getCurlHandle($job)
175
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
176
177
178
179
180
181
182
183
        $ch = curl_init();

        curl_setopt_array($ch, array(
            CURLOPT_URL => $job["url"],
            CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
            CURLOPT_FOLLOWLOCATION => true,
Dominik Hebeler's avatar
Dominik Hebeler committed
184
            CURLOPT_CONNECTTIMEOUT => 2,
Dominik Hebeler's avatar
Dominik Hebeler committed
185
            CURLOPT_MAXCONNECTS => 500,
Dominik Hebeler's avatar
Dominik Hebeler committed
186
            CURLOPT_LOW_SPEED_LIMIT => 50000,
Dominik Hebeler's avatar
Dominik Hebeler committed
187
188
            CURLOPT_LOW_SPEED_TIME => 5,
            CURLOPT_TIMEOUT => 7,
Dominik Hebeler's avatar
Dominik Hebeler committed
189
190
191
192
193
194
        ));

        if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
            curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
            curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
            curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
195
            curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
Dominik Hebeler's avatar
Dominik Hebeler committed
196
197
198
199
200
201
        }

        if (!empty($job["username"]) && !empty($job["password"])) {
            curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
        }

Dominik Hebeler's avatar
Dominik Hebeler committed
202
        if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) {
Dominik Hebeler's avatar
Dominik Hebeler committed
203
204
205
206
207
208
209
            $headers = [];
            foreach ($job["headers"] as $key => $value) {
                $headers[] = $key . ":" . $value;
            }
            # Headers are in the Form:
            # <key>:<value>;<key>:<value>
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
210
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
211
212

        return $ch;
213
214
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
215
    public function sig_handler($sig)
216
217
218
219
220
221
    {
        $this->shouldRun = false;
        echo ("Terminating Process\n");
    }

}