From 5faca281b1c591caa1402a399c64dc67ffc44cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Phil=20H=C3=B6fer?= Date: Fri, 14 Jan 2022 12:11:23 +0000 Subject: [PATCH 1/2] Implement porn filter for Scopia --- app/Models/parserSkripte/Scopia.php | 80 +++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/app/Models/parserSkripte/Scopia.php b/app/Models/parserSkripte/Scopia.php index 8403d07a..6c9960ab 100644 --- a/app/Models/parserSkripte/Scopia.php +++ b/app/Models/parserSkripte/Scopia.php @@ -36,16 +36,19 @@ class Scopia extends Searchengine $anzeigeLink = $link; $descr = $result->description->__toString(); $this->counter++; - $this->results[] = new \App\Models\Result( - $this->engine, - $title, - $link, - $anzeigeLink, - $descr, - $this->engine->{"display-name"}, - $this->engine->homepage, - $this->counter - ); + if(! $this->containsPornContent($title.$descr)) { //see note at filtering method + $this->results[] = new \App\Models\Result( + $this->engine, + $title, + $link, + $anzeigeLink, + $descr, + $this->engine->{"display-name"}, + $this->engine->homepage, + $this->counter + ); + } + } } catch (\Exception $e) { Log::error("A problem occurred parsing results from $this->name:"); @@ -54,6 +57,63 @@ class Scopia extends Searchengine } } + private function containsPornContent($text) { + // Returns true if pornographic content is detected + // We noticed scopia often serving pornographic results for non-pornographic queries. After much deliberation we decided to filter pornographic from scopia. Those will have to be supplied by other search engines. + + $words = [ + "fisting" => 60, + "live cam" => 60, + "fick" => 60, + "anal" => 60, + "dildo" => 60, + "masturbat" => 60, + "gangbang" => 60, + "fotze" => 60, + "porn" => 50, + "anus" => 50, + "penetration" => 50, + "cuckold" => 50, + "orgasmus" => 50, + "milf" => 50, + "dilf" => 50, + "voyeur" => 40, + "fuck" => 40, + "nude" => 40, + "muschi" => 40, + "sex" => 40, + "nackt" => 40, + "amateur" => 30, + "schlampe" => 30, + "eroti" => 30, + "dick" => 30, + "teen" => 30, + "hardcore" => 30, + "fetisch" => 30, + "pussy" => 30, + "pussies" => 30, + "cheat" => 20, + "gratis" => 20, + "geil" => 20, + "video" => 10, + "girl" => 10, + "boy" => 10, + "weib" => 10, + "titt" => 10, + "bikini" => 10, + "hot " => 10, + "pics" => 10, + "free" => 10, + ]; + $acc = 0; + foreach($words as $word => $score) { + if (stristr($text,$word)) { + $acc += $score; + } + } + return $acc >= 100; + } + public function getNext(\App\MetaGer $metager, $result) { $result = html_entity_decode($result); -- GitLab From 3a2b92c5758d4c57ef18f189355740129c064322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Phil=20H=C3=B6fer?= Date: Fri, 14 Jan 2022 12:23:59 +0000 Subject: [PATCH 2/2] adjust porn filter and fix typo --- app/Models/parserSkripte/Scopia.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/Models/parserSkripte/Scopia.php b/app/Models/parserSkripte/Scopia.php index 6c9960ab..937e0347 100644 --- a/app/Models/parserSkripte/Scopia.php +++ b/app/Models/parserSkripte/Scopia.php @@ -59,11 +59,12 @@ class Scopia extends Searchengine private function containsPornContent($text) { // Returns true if pornographic content is detected - // We noticed scopia often serving pornographic results for non-pornographic queries. After much deliberation we decided to filter pornographic from scopia. Those will have to be supplied by other search engines. + // We noticed scopia often serving pornographic results for non-pornographic queries. After much deliberation we decided to filter pornographic results from scopia. Those will have to be supplied by other search engines. $words = [ "fisting" => 60, "live cam" => 60, + "telefonsex" => 60, "fick" => 60, "anal" => 60, "dildo" => 60, @@ -84,6 +85,7 @@ class Scopia extends Searchengine "sex" => 40, "nackt" => 40, "amateur" => 30, + "webcam" => 30, "schlampe" => 30, "eroti" => 30, "dick" => 30, -- GitLab