Skip to content

Commit

Permalink
Merge pull request #3480 from rbayet/feat-tracker-ignore-invalidate-b…
Browse files Browse the repository at this point in the history
…ot-events

[Tracker] Allow ignoring bots generated tracker hits
  • Loading branch information
rbayet authored Jan 9, 2025
2 parents 01b203e + 299a8fb commit 29d827d
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 7 deletions.
78 changes: 78 additions & 0 deletions src/module-elasticsuite-tracker/Helper/BotDetector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<?php
/**
* DISCLAIMER
*
* Do not edit or add to this file if you wish to upgrade this module to newer versions in the future.
*
* @category Smile
* @package Smile\ElasticsuiteTracker
* @author Richard BAYET <[email protected]>
* @copyright 2025 Smile
* @license Open Software License ("OSL") v. 3.0
*/

namespace Smile\ElasticsuiteTracker\Helper;

use Magento\Framework\App\Helper\AbstractHelper;
use Magento\Framework\App\Helper\Context;
use Magento\Framework\App\Request\Http;

/**
* BotDetector helper.
*
* @category Smile
* @package Smile\ElasticsuiteTracker
*/
class BotDetector extends AbstractHelper
{
/**
* @var Http
*/
protected $request;

/**
* @var array
*/
protected $botUserAgents = [];

/**
* Constructor.
*
* @param Context $context Context.
* @param Http $request HTTP request.
* @param array $botUserAgents Bot user agents list.
*/
public function __construct(Context $context, Http $request, $botUserAgents = [])
{
parent::__construct($context);
$this->request = $request;
$this->botUserAgents = $botUserAgents;
}

/**
* Get the current user agent from the request
*
* @return string
*/
public function getUserAgent()
{
return $this->request->getHeader('User-Agent') ?: '';
}

/**
* Check if the current user agent belongs to a bot
*
* @return bool
*/
public function isBot()
{
$userAgent = strtolower($this->getUserAgent());
foreach ($this->botUserAgents as $bot) {
if (strpos($userAgent, $bot) !== false) {
return true;
}
}

return false;
}
}
16 changes: 16 additions & 0 deletions src/module-elasticsuite-tracker/Helper/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ class Data extends \Magento\Framework\App\Helper\AbstractHelper
*/
const CONFIG_IS_HEADLESS_MODE_XPATH = 'smile_elasticsuite_tracker/general/is_headless_mode';

/**
* Whether to filter/ignore tracker hits coming from known bots configuration path
* @var string
*/
const CONFIG_IS_FILTERING_BOT_HITS_XPATH = 'smile_elasticsuite_tracker/general/filter_bot_hits';

/**
* Anonymization status configuration path
* @var string
Expand Down Expand Up @@ -249,6 +255,16 @@ public function getEventsQueueCleanupDelay()
return (int) $this->scopeConfig->getValue(self::CONFIG_QUEUE_CLEANUP_DELAY_XPATH);
}

/**
* Returns true if tracker hits generated by known bots should be preemptively ignored.
*
* @return bool
*/
public function isFilteringBotHits()
{
return $this->scopeConfig->isSetFlag(self::CONFIG_IS_FILTERING_BOT_HITS_XPATH);
}

/**
* Return the current tracker visitor id
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,32 @@ class TrackingService implements \Smile\ElasticsuiteTracker\Api\CustomerTracking
*/
private $eventQueue;

/**
* @var \Smile\ElasticsuiteTracker\Helper\BotDetector
*/
private $botDetector;

/**
* Constructor.
*
* @param \Smile\ElasticsuiteTracker\Model\ResourceModel\CustomerLink $customerLinkResource Resource model.
* @param \Smile\ElasticsuiteTracker\Helper\Data $helper Tracking Helper.
* @param \Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue Event Queue.
* @param \Magento\Customer\Model\Session $customerSession Customer Session.
* @param \Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector Bot detector.
*/
public function __construct(
\Smile\ElasticsuiteTracker\Model\ResourceModel\CustomerLink $customerLinkResource,
\Smile\ElasticsuiteTracker\Helper\Data $helper,
\Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue,
\Magento\Customer\Model\Session $customerSession
\Magento\Customer\Model\Session $customerSession,
\Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector
) {
$this->customerLinkResource = $customerLinkResource;
$this->helper = $helper;
$this->customerSession = $customerSession;
$this->eventQueue = $eventQueue;
$this->botDetector = $botDetector;
}

/**
Expand All @@ -76,6 +84,9 @@ public function hit($eventData): void
public function addEvent($eventData)
{
if ($this->helper->isEnabled()) {
if ($this->helper->isFilteringBotHits() && $this->botDetector->isBot()) {
return;
}
$this->addCustomerLink($eventData);
$this->eventQueue->addEvent($eventData);
}
Expand Down
5 changes: 5 additions & 0 deletions src/module-elasticsuite-tracker/etc/adminhtml/system.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
<field id="enabled">1</field>
</depends>
</field>
<field id="filter_bot_hits" translate="label comment" type="select" sortOrder="40" showInDefault="1" showInWebsite="0" showInStore="0" canRestore="1">
<label>Filter bot hits</label>
<source_model>Magento\Config\Model\Config\Source\Yesno</source_model>
<comment><![CDATA[If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.]]></comment>
</field>
</group>
<group id="session" translate="label" type="text" sortOrder="20" showInDefault="1" showInWebsite="0" showInStore="0">
<label>Session Configuration</label>
Expand Down
1 change: 1 addition & 0 deletions src/module-elasticsuite-tracker/etc/config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
<retention_delay>12</retention_delay>
<use_api>0</use_api>
<is_headless_mode>0</is_headless_mode>
<filter_bot_hits>1</filter_bot_hits>
</general>
<session>
<visit_cookie_name>STUID</visit_cookie_name>
Expand Down
27 changes: 27 additions & 0 deletions src/module-elasticsuite-tracker/etc/di.xml
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,31 @@
</argument>
</arguments>
</type>

<type name="Smile\ElasticsuiteTracker\Helper\BotDetector">
<arguments>
<argument name="botUserAgents" xsi:type="array">
<item name="Googlebot" xsi:type="string">googlebot</item>
<item name="Adsbot-Google" xsi:type="string">adsbot-google</item>
<item name="Feedfetcher-Google" xsi:type="string">feedfetcher-google</item>
<item name="Mediapartners-Google" xsi:type="string">mediapartners-google</item>
<item name="MediapartnersGoogle" xsi:type="string">mediapartners (googlebot)</item>
<item name="APIs-Google" xsi:type="string">apis-google</item>
<item name="Google-InspectionTool" xsi:type="string">google-inspectiontool</item>
<item name="Storebot-Google" xsi:type="string">storebot-google</item>
<item name="GoogleOther" xsi:type="string">googleother</item>
<item name="bingbot" xsi:type="string">bingbot</item>
<item name="YahooSlurp" xsi:type="string">slurp</item>
<item name="LinkedIn" xsi:type="string">linkedinbot</item>
<item name="Yandex" xsi:type="string">yandex</item>
<item name="Baiduspider" xsi:type="string">baiduspider</item>
<item name="openai" xsi:type="string">openai</item>
<item name="duckduckbot" xsi:type="string">duckduckbot</item>
<item name="facebot" xsi:type="string">facebot</item>
<item name="twitterbot" xsi:type="string">twitterbot</item>
<item name="AlexaIaArchiver" xsi:type="string">ia_archiver</item>
</argument>
</arguments>
</type>

</config>
5 changes: 3 additions & 2 deletions src/module-elasticsuite-tracker/i18n/de_DE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@
"Delay","Verzögerung"
"In days. (Default is 7 days.)<br />Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In Tagen. (Standard ist 7 Tage.)<br />Ungültige Tracker-Ereignisse, die älter als diese Verzögerung sind, werden periodisch aus der Warteschlange für Ereignisse entfernt."
"You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Sie können diesen Button anklicken, um alle ungültigen Tracker-Ereignisse unabhängig von der obigen Verzögerungseinstellung sofort aus der Indexierungsphase zu entfernen."
"",""
"",""
"Bot handling","Handhabung von Robotern"
"Filter bot hits","Filtern Sie Robotertreffer"
"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Wenn diese Option aktiviert ist, wird verhindert, dass bekannte IA- oder von Suchmaschinen-Crawler-Bots generierte Tracker-Treffer aufgezeichnet werden, um zu verhindern, dass Ihre Verhaltensdaten durch manchmal völlig inkohärente Paginierungs- oder Ergebniszählungsdaten verunreinigt werden."
5 changes: 3 additions & 2 deletions src/module-elasticsuite-tracker/i18n/en_US.csv
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ Enabled,Enabled
"Delay","Delay"
"In days. (Default is 7 days.)<br />Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In days. (Default is 7 days.)<br />Invalid tracker events older than this delay are periodically removed from the events indexing queue."
"You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above."


"Bot Handling","Bot Handling"
"Filter bot hits","Filter bot hits"
"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data."
3 changes: 3 additions & 0 deletions src/module-elasticsuite-tracker/i18n/fr_FR.csv
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ Tracking,Tracking
"Delay","Délai"
"In days. (Default is 7 days.)<br />Invalid tracker events older than this delay are periodically removed from the events indexing queue.","En jours. (Par défaut 7 jours.)<br />Les évènements invalides du tracker plus anciens que ce délai seront périodiquement supprimés de la file d'attente d'indexation des évènements."
"You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Vous pouvez cliquer sur ce bouton pour supprimer immédiatement tous les évènements invalides du tracker de la file d'attente d'indexation. Le paramètre ""Délai"" ci-dessus est ignoré."
"Bot Handling","Gestion des Robots"
"Filter bot hits","Filtrer les hits de robots"
"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Si activé, les hits du tracker générés par des robots de moteurs de recherche ou d'IA ne seront pas enregistrés afin d'empêcher la pollution de vos données comportementales par des données de nombre de résultats ou de pagination parfois totalement incohérents."
5 changes: 3 additions & 2 deletions src/module-elasticsuite-tracker/i18n/nl_NL.csv
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@
"Delay","Vertraging"
"In days. (Default is 7 days.)<br />Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In dagen. (Standaard is 7 dagen.)<br />Ongeldige tracker events ouder dan deze vertraging worden periodiek verwijderd uit de wachtrij voor indexering van evenementen."
"You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","U kunt op deze knop klikken om onmiddellijk alle ongeldige trackergebeurtenissen uit de indexeringswachtrij te verwijderen, ongeacht de hierboven ingestelde vertraging."
"",""
"",""
"Bot Handling","Robotbediening"
"Filter bot hits","Filter robottreffers"
"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Indien ingeschakeld, voorkomt het dat door IA of zoekmachines gegenereerde tracker-hits worden geregistreerd om te voorkomen dat uw gedragsgegevens worden vervuild door soms totaal onsamenhangende paginering of gegevens over het aantal resultaten."

0 comments on commit 29d827d

Please sign in to comment.