Result.php 6.74 KB
Newer Older
1
2
<?php

3
namespace App\Models;
4

5
6


7
8
9
class Result
{
	
Dominik Hebeler's avatar
Dominik Hebeler committed
10
	function __construct ( \SimpleXMLElement $provider, $titel, $link, $anzeigeLink , $descr, $gefVon, $sourceRank, $partnershop = false, $image = "" )
11
	{
12
		$this->titel = strip_tags(trim($titel));
13
14
		$this->link = trim($link);
		$this->anzeigeLink = trim($anzeigeLink);
15
16
		$this->descr = strip_tags(trim($descr));
		$this->descr = preg_replace("/\n+/si", " ", $this->descr);
17
18
19
20
21
22
		if( strlen($this->descr) > 250 )
		{
			$this->descr = wordwrap($this->descr, 250);
			$this->descr = substr($this->descr, 0, strpos($this->descr, "\n"));

		}
23
		$this->gefVon = trim($gefVon);
24
25
26
27
28
29
30
		$this->proxyLink = $this->generateProxyLink($this->link);
		$this->sourceRank = $sourceRank;
		if($this->sourceRank <= 0 || $this->sourceRank > 20)
			$this->sourceRank = 20;
		$this->sourceRank = 20 - $this->sourceRank;
		if(isset($provider["engineBoost"]))
		{
Dominik Hebeler's avatar
Dominik Hebeler committed
31
			$this->engineBoost = floatval($provider["engineBoost"]->__toString());
32
33
34
35
36
37
38
39
40
41
42
43
		}else
		{
			$this->engineBoost = 1;
		}

		$this->valid = true;
		$this->host = @parse_url($link, PHP_URL_HOST);
		$this->strippedHost = $this->getStrippedHost($this->anzeigeLink);
		$this->strippedDomain = $this->getStrippedDomain($this->strippedHost);
		$this->strippedLink = $this->getStrippedLink($this->anzeigeLink);
		$this->rank = 0;
		$this->partnershop = $partnershop;
Dominik Hebeler's avatar
Dominik Hebeler committed
44
		$this->image = $image;
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

		#die($this->anzeigeLink . "\r\n" . $this->strippedHost);
	}

	public function rank (\App\MetaGer $metager)
	{

		$rank = 0;
		$rank += ($this->sourceRank * 0.02);

		#URL-Boost
		$link = $this->anzeigeLink;
		if(strpos($link, "http") !== 0)
		{
			$link = "http://" . $link;
		}
		$link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH);
		$tmpLi = $link;
		$tmpEingabe = $metager->getQ();
		$count = 0;
		$tmpLink = "";

		$regex = [
			"/\s+/si",
			"/http:/si",
			"/https:/si",
			"/www\./si",
			"/\//si",
			"/\./si",
			"/-/si"
			];
		foreach($regex as $reg)
		{
			$link = preg_replace($regex, "", $link);
			$tmpEingabe = preg_replace($regex, "", $tmpEingabe);
		}
		#die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>");
		foreach(str_split($tmpEingabe) as $char)
		{
84
85
			if( !$char || !$tmpEingabe || strlen($tmpEingabe) === 0 || strlen($char) === 0 )
				continue;
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
			if(strpos(strtolower($tmpLink), strtolower($char)) >= 0)
			{
				$count++;
				$tmpLink = str_replace(urlencode($char), "", $tmpLink);
			}
			if(strlen($this->descr) > 80 && strlen($link) > 0)
			{
				$rank += $count /((strlen($link)) * 60);
			}
		}

		# Boost für Vorkommen der Suchwörter:
		$maxRank = 0.1;
		$tmpTitle = $this->titel;
		$tmpDescription = $this->descr;
		$isWithin = false;
		$tmpRank = 0;
		$tmpEingabe = $metager->getQ();
		$tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe);
		$tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe);
		#die($tmpEingabe);
		foreach(explode(" ", trim($tmpEingabe)) as $el)
		{
109
110
			if( strlen($tmpTitle) === 0 || strlen($el) === 0 || strlen($tmpDescription) === 0 )
				continue;
111
			$el = preg_quote($el, "/");
112
			if(strlen($tmpTitle) > 0)
113
			{
114
115
116
117
118
119
				if(preg_match("/\b$el\b/si", $tmpTitle))
				{
					$tmpRank += .7 * .6 * $maxRank;
				}elseif (strpos($tmpTitle, $el) !== false) {
					$tmpRank += .3 * .6 * $maxRank;
				}
120
			}
121
			if( strlen($tmpDescription) > 0 )
122
			{
123
124
125
126
127
128
				if(preg_match("/\b$el\b/si", $tmpDescription))
				{
					$tmpRank += .7 * .4 * $maxRank;
				}elseif (strpos($tmpDescription, $el) !== false) {
					$tmpRank += .3 * .4 * $maxRank;
				}
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
			}
		}
		$tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10;
		$rank += $tmpRank;

		if($this->engineBoost > 0)
		{
			$rank *= floatval($this->engineBoost);
		}

		$this->rank = $rank;
	}

	public function getRank ()
	{
		return $this->rank;
	}

	public function isValid (\App\MetaGer $metager)
	{
		# Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists:
		if(in_array($this->strippedHost, $metager->getUserHostBlacklist())
			|| in_array($this->strippedDomain, $metager->getUserDomainBlacklist()))
			return false;
		
		# Jetzt unsere URL und Domain Blacklist
		if($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist())))
		{
			return false;
		}

160
161
		# Nun der Eventuelle Sprachfilter
		if( $metager->getLang() !== "all" )
162
		{
163
164
165
			$text = $this->titel . " " . $this->descr;
			$path = app_path() . "/Models/lang.pl";
			$lang = exec("echo '$text' | $path");
166

167
			if( $metager->getLang() !== $lang )
168
169
170
				return false;
		}

171
172
		

173
174
175
		# Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus:
		foreach($metager->getStopWords() as $stopWord)
		{
Phil Höfer's avatar
Phil Höfer committed
176
			$text = $this->titel . " " . $this->descr;
177
178
179
180
181
182
			if(stripos($text, $stopWord) !== false)
			{
				return false;
			}
		}

183
184
185
186
187
188
189
190
		# Die Strinsuche:
		$text = strtolower($this->titel) . " " . strtolower($this->descr);
		foreach($metager->getPhrases() as $phrase)
		{
			if(strpos($text, $phrase) === FALSE)
				return false;
		}

191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
		# Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden
		# und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben
		# Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch:
		if($metager->getSite() === "" &&
			strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
			strpos($this->strippedHost, "twitter.com") === false &&
			strpos($this->strippedHost, "www.ladenpreis.net") === false &&
			strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
			strpos($this->strippedHost, "www.onenewspage.com") === false)
		{
			$count = $metager->getHostCount($this->strippedHost);
			if($count >= 3)
			{
				return false;
			}
		}

		# Unabhängig davon unser Dublettenfilter:
		if($metager->addLink($this->strippedLink))
		{
			$metager->addHostCount($this->strippedHost);
			return true;
		}else
		{
			return false;
		}
	}

219
	private function getStrippedHost ($link)
220
221
222
223
224
225
226
	{
		if(strpos($link, "http") !== 0)
			$link = "http://" . $link;
		$link = @parse_url($link, PHP_URL_HOST);
		$link = preg_replace("/^www\./si", "", $link);
		return $link;
	}
227
	private function getStrippedLink ($link)
228
229
230
231
232
233
234
235
	{
		if(strpos($link, "http") !== 0)
			$link = "http://" . $link;
		$host = $this->strippedHost;
		$path = @parse_url($link , PHP_URL_PATH);
		return $host . $path;
	}

236
	private function getStrippedDomain ($link)
237
238
239
240
241
242
243
244
245
246
	{
		if(preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match))
		{
			return $match[1];
		}else
		{
			return $link;
		}		
	}

247
	private function generateProxyLink ($link)
248
249
250
251
252
253
254
255
	{
		if(!$link)
			return "";
		$tmp = $link;
		$tmp = preg_replace("/\r?\n$/s", "", $tmp);
		$tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp);
		return "https://proxy.suma-ev.de/cgi-bin/nph-proxy.cgi/en/I0/" . $tmp;
		
256
	}
Phil Höfer's avatar
Phil Höfer committed
257
}