From 38a2cd40f50ae590d8c2a6040db9b065a639ef95 Mon Sep 17 00:00:00 2001 From: Vadym Honcharuk Date: Mon, 16 Dec 2024 18:22:53 +0200 Subject: [PATCH 1/6] [Indices] Feature #3453: Add detection for closed indices --- .../Block/Adminhtml/Analysis/Analyzer.php | 1 + .../Grid/Column/Renderer/IndexStatus.php | 2 + .../Model/IndexStatsProvider.php | 9 +-- .../Model/IndexStatusProvider.php | 59 ++++++++++++++++++- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/module-elasticsuite-indices/Block/Adminhtml/Analysis/Analyzer.php b/src/module-elasticsuite-indices/Block/Adminhtml/Analysis/Analyzer.php index 7aea42a8d..df3454785 100644 --- a/src/module-elasticsuite-indices/Block/Adminhtml/Analysis/Analyzer.php +++ b/src/module-elasticsuite-indices/Block/Adminhtml/Analysis/Analyzer.php @@ -72,6 +72,7 @@ public function getElasticSuiteIndices(): ?array IndexStatus::GHOST_STATUS, IndexStatus::EXTERNAL_STATUS, IndexStatus::UNDEFINED_STATUS, + IndexStatus::CLOSED_STATUS, ]; $indices = []; diff --git a/src/module-elasticsuite-indices/Block/Widget/Grid/Column/Renderer/IndexStatus.php b/src/module-elasticsuite-indices/Block/Widget/Grid/Column/Renderer/IndexStatus.php index 08909119e..068468a10 100644 --- a/src/module-elasticsuite-indices/Block/Widget/Grid/Column/Renderer/IndexStatus.php +++ b/src/module-elasticsuite-indices/Block/Widget/Grid/Column/Renderer/IndexStatus.php @@ -35,6 +35,7 @@ class IndexStatus extends AbstractRenderer public const REBUILDING_STATUS = 'rebuilding'; public const GHOST_STATUS = 'ghost'; public const EXTERNAL_STATUS = 'external'; + public const CLOSED_STATUS = 'closed'; public const UNDEFINED_STATUS = 'undefined'; /** @@ -45,6 +46,7 @@ class IndexStatus extends AbstractRenderer self::REBUILDING_STATUS => self::SEVERITY_MINOR, self::GHOST_STATUS => self::SEVERITY_CRITICAL, self::EXTERNAL_STATUS => self::SEVERITY_EXTERNAL, + self::CLOSED_STATUS => self::SEVERITY_MINOR, self::UNDEFINED_STATUS => self::SEVERITY_UNDEFINED, ]; diff --git a/src/module-elasticsuite-indices/Model/IndexStatsProvider.php b/src/module-elasticsuite-indices/Model/IndexStatsProvider.php index 417f38c96..1dbf83caf 100644 --- a/src/module-elasticsuite-indices/Model/IndexStatsProvider.php +++ b/src/module-elasticsuite-indices/Model/IndexStatsProvider.php @@ -120,9 +120,10 @@ public function deleteIndex($indexName): void public function indexStats($indexName, $alias): array { $data = [ - 'index_name' => $indexName, - 'index_alias' => $alias, - 'size' => 'undefined', + 'index_name' => $indexName, + 'index_alias' => $alias, + 'number_of_documents' => 'undefined', + 'size' => 'undefined', ]; try { @@ -144,7 +145,7 @@ public function indexStats($indexName, $alias): array sprintf('Error when loading/parsing statistics for index "%s"', $indexName), ['exception' => $e] ); - $data['index_status'] = IndexStatus::UNDEFINED_STATUS; + $data['index_status'] = IndexStatus::CLOSED_STATUS; } return $data; diff --git a/src/module-elasticsuite-indices/Model/IndexStatusProvider.php b/src/module-elasticsuite-indices/Model/IndexStatusProvider.php index 10a828127..0337c6531 100644 --- a/src/module-elasticsuite-indices/Model/IndexStatusProvider.php +++ b/src/module-elasticsuite-indices/Model/IndexStatusProvider.php @@ -13,7 +13,10 @@ */ namespace Smile\ElasticsuiteIndices\Model; +use Exception; +use Psr\Log\LoggerInterface; use Magento\Framework\DataObject; +use Smile\ElasticsuiteCore\Api\Client\ClientInterface; use Smile\ElasticsuiteCore\Helper\IndexSettings as IndexSettingsHelper; use Smile\ElasticsuiteIndices\Block\Widget\Grid\Column\Renderer\IndexStatus; use Smile\ElasticsuiteIndices\Model\ResourceModel\StoreIndices\CollectionFactory as StoreIndicesCollectionFactory; @@ -45,6 +48,16 @@ class IndexStatusProvider */ private const SECONDS_IN_DAY = 86400; + /** + * @var ClientInterface + */ + private $client; + + /** + * @var LoggerInterface + */ + private $logger; + /** * @var IndexSettingsHelper */ @@ -63,22 +76,28 @@ class IndexStatusProvider /** * Constructor. * + * @param ClientInterface $client ES client. * @param IndexSettingsHelper $indexSettingsHelper Index settings helper. * @param StoreIndicesCollectionFactory $storeIndicesFactory Store indices collection. * @param WorkingIndexerCollectionFactory $indexerCollectionFactory Working indexers collection. + * @param LoggerInterface $logger Logger. */ public function __construct( + ClientInterface $client, IndexSettingsHelper $indexSettingsHelper, StoreIndicesCollectionFactory $storeIndicesFactory, - WorkingIndexerCollectionFactory $indexerCollectionFactory + WorkingIndexerCollectionFactory $indexerCollectionFactory, + LoggerInterface $logger ) { + $this->client = $client; $this->indexSettingsHelper = $indexSettingsHelper; $this->storeIndices = $storeIndicesFactory->create()->getItems(); $this->workingIndexers = $indexerCollectionFactory->create()->getItems(); + $this->logger = $logger; } /** - * Get a index status. + * Get an index status. * * @param string $indexName Index name. * @param string $alias Index alias. @@ -93,6 +112,10 @@ public function getIndexStatus($indexName, $alias): string return IndexStatus::EXTERNAL_STATUS; } + if ($this->isClosed($indexName)) { + return IndexStatus::CLOSED_STATUS; + } + if ($this->isRebuilding($indexName, $indexDate)) { return IndexStatus::REBUILDING_STATUS; } @@ -151,6 +174,38 @@ private function isExternal(string $indexName): bool return true; } + /** + * Returns if the index is closed. + * + * @param string $indexName Index name. + * + * @return bool + */ + private function isClosed(string $indexName): bool + { + try { + // Ensure the index is NOT External before checking for Closed status. + if ($this->isExternal($indexName)) { + return false; + } + + // Attempt to fetch index stats or metadata to check its status. + $indexStats = $this->client->indexStats($indexName); + + // If we successfully retrieved index stats and no error occurs, the index is not closed. + return false; + } catch (Exception $e) { + // Log the error (optional for better diagnostics). + $this->logger->error( + sprintf('Error fetching index stats for "%s": %s', $indexName, $e->getMessage()) + ); + + // If an error occurs, it's safer to assume the index could be closed, or the stats are unavailable. + // Returning true here means the index is likely closed or inaccessible. + return true; + } + } + /** * Returns if index is ghost. * From 974fd381d5d45e8b8934087025e07658c7bca4d7 Mon Sep 17 00:00:00 2001 From: Vadym Honcharuk Date: Mon, 16 Dec 2024 18:30:41 +0200 Subject: [PATCH 2/6] [Indices] Feature #3453: phpmd warning --- src/module-elasticsuite-indices/Model/IndexStatusProvider.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/module-elasticsuite-indices/Model/IndexStatusProvider.php b/src/module-elasticsuite-indices/Model/IndexStatusProvider.php index 0337c6531..b8c1aca8f 100644 --- a/src/module-elasticsuite-indices/Model/IndexStatusProvider.php +++ b/src/module-elasticsuite-indices/Model/IndexStatusProvider.php @@ -180,6 +180,7 @@ private function isExternal(string $indexName): bool * @param string $indexName Index name. * * @return bool + * @SuppressWarnings(PHPMD.UnusedLocalVariable) */ private function isClosed(string $indexName): bool { From 5d30e259617e9d973b7a86f3961b7b4fe0ea5c26 Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Thu, 19 Dec 2024 18:08:58 +0100 Subject: [PATCH 3/6] [Analytics] Date picker vs switchers (store, customer group, company) --- .../Report/CustomerCompanySelector.php | 19 ++++++++++++++ .../Report/CustomerGroupSelector.php | 19 ++++++++++++++ .../report/customer_company_selector.phtml | 8 ++++-- .../report/customer_group_selector.phtml | 8 ++++-- .../report/date_range_switcher.phtml | 2 +- .../js/report/customer-company-selector.js | 25 ++++--------------- .../web/js/report/customer-group-selector.js | 25 ++++--------------- 7 files changed, 61 insertions(+), 45 deletions(-) diff --git a/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerCompanySelector.php b/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerCompanySelector.php index 8f88247bf..13a3fa46a 100644 --- a/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerCompanySelector.php +++ b/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerCompanySelector.php @@ -20,6 +20,7 @@ use Magento\Framework\Module\Manager as ModuleManager; use Magento\Framework\View\Element\Template; use Magento\Framework\View\Element\Template\Context; +use Smile\ElasticsuiteAnalytics\Model\Report\Context as ReportContext; /** * Block used to display customer company selector in reports. @@ -47,6 +48,11 @@ class CustomerCompanySelector extends Template */ protected $searchCriteriaBuilder; + /** + * @var ReportContext + */ + protected $reportContext; + /** * @var \Magento\Company\Api\CompanyRepositoryInterface|null */ @@ -61,6 +67,7 @@ class CustomerCompanySelector extends Template * @param ModuleManager $moduleManager Module manager. * @param ScopeConfigInterface $scopeConfig Scope configuration. * @param SearchCriteriaBuilder $searchCriteriaBuilder The search criteria builder. + * @param ReportContext $reportContext Report context. * @param array $data Additional block data. * @throws LocalizedException */ @@ -69,10 +76,12 @@ public function __construct( ModuleManager $moduleManager, ScopeConfigInterface $scopeConfig, SearchCriteriaBuilder $searchCriteriaBuilder, + ReportContext $reportContext, array $data = [] ) { $this->scopeConfig = $scopeConfig; $this->searchCriteriaBuilder = $searchCriteriaBuilder; + $this->reportContext = $reportContext; // Check if Magento_Company module is enabled before attempting to load the repository. if ($moduleManager->isEnabled('Magento_Company')) { @@ -117,4 +126,14 @@ public function getCompaniesList() return []; } + + /** + * Get customer company ID. + * + * @return mixed + */ + public function getCustomerCompanyId() + { + return $this->reportContext->getCustomerCompanyId(); + } } diff --git a/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerGroupSelector.php b/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerGroupSelector.php index 99170e56d..01612d896 100644 --- a/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerGroupSelector.php +++ b/src/module-elasticsuite-analytics/Block/Adminhtml/Report/CustomerGroupSelector.php @@ -15,6 +15,7 @@ use Magento\Framework\View\Element\Template; use Magento\Customer\Model\ResourceModel\Group\CollectionFactory; +use Smile\ElasticsuiteAnalytics\Model\Report\Context as ReportContext; /** * Block used to display customer group selector in reports. @@ -32,19 +33,27 @@ class CustomerGroupSelector extends Template */ protected $customerGroupCollectionFactory; + /** + * @var ReportContext + */ + protected $reportContext; + /** * CustomerGroupSelector constructor. * * @param Template\Context $context The context of the template. * @param CollectionFactory $customerGroupCollectionFactory Factory for creating customer group collection. + * @param ReportContext $reportContext Report context. * @param array $data Additional block data. */ public function __construct( Template\Context $context, CollectionFactory $customerGroupCollectionFactory, + ReportContext $reportContext, array $data = [] ) { $this->customerGroupCollectionFactory = $customerGroupCollectionFactory; + $this->reportContext = $reportContext; parent::__construct($context, $data); } @@ -57,4 +66,14 @@ public function getCustomerGroups() { return $this->customerGroupCollectionFactory->create()->toOptionArray(); } + + /** + * Get customer group ID. + * + * @return mixed + */ + public function getCurrentCustomerGroupId() + { + return $this->reportContext->getCustomerGroupId(); + } } diff --git a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_company_selector.phtml b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_company_selector.phtml index b871b4230..fdd977b02 100644 --- a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_company_selector.phtml +++ b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_company_selector.phtml @@ -17,6 +17,8 @@ /** * @var Smile\ElasticsuiteAnalytics\Block\Adminhtml\Report\CustomerCompanySelector $block */ +$baseUrl = $block->getUrl('*/*/*', ['_current' => true, 'company_id' => '__company_id__']); +$companyId = $block->getCustomerCompanyId(); ?> isCompanyEnabled()): ?> getCompaniesList(); ?> @@ -25,7 +27,7 @@ @@ -33,7 +35,9 @@ diff --git a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_group_selector.phtml b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_group_selector.phtml index b004c3bc5..ded0655aa 100644 --- a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_group_selector.phtml +++ b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/customer_group_selector.phtml @@ -18,13 +18,15 @@ * @var Smile\ElasticsuiteAnalytics\Block\Adminhtml\Report\CustomerGroupSelector $block */ $customerGroups = $block->getCustomerGroups(); +$customerGroupId = $block->getCurrentCustomerGroupId(); +$baseUrl = $block->getUrl('*/*/*', ['_current' => true, 'customer_group' => '__customer_group__']); ?>
@@ -32,7 +34,9 @@ $customerGroups = $block->getCustomerGroups(); diff --git a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/date_range_switcher.phtml b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/date_range_switcher.phtml index 5f0310592..e5bc62740 100644 --- a/src/module-elasticsuite-analytics/view/adminhtml/templates/report/date_range_switcher.phtml +++ b/src/module-elasticsuite-analytics/view/adminhtml/templates/report/date_range_switcher.phtml @@ -57,7 +57,7 @@ require(['jquery', 'mage/calendar', 'mage/adminhtml/tools'], function ($) { $('#getJsId('date-range-picker'); ?>').dateRange(getJsConfig(); ?>); $('#getJsId('date-range-picker', 'apply'); ?>').on('click', function() { - var url = "getUrl('*/*/*', ['from' => '__from__', 'to' => '__to__']); ?>" + let url = "getUrl('*/*/*', ['_current' => true, 'from' => '__from__', 'to' => '__to__']); ?>" .replace('__from__', Base64.encode($("#getJsId('date-range-picker', 'from'); ?>")[0].value)) .replace('__to__', Base64.encode($("#getJsId('date-range-picker', 'to'); ?>")[0].value)); window.location = url; diff --git a/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-company-selector.js b/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-company-selector.js index 40c5bb880..aba5e83ed 100644 --- a/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-company-selector.js +++ b/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-company-selector.js @@ -12,31 +12,16 @@ */ define('Smile_ElasticsuiteAnalytics/js/report/customer-company-selector', [ - 'jquery', - 'mage/url' -], function($, urlBuilder) { + 'jquery' +], function($) { 'use strict'; - return function() { - // On document ready, set the selected value in the company dropdown. - $(document).ready(function() { - var urlParams = new URLSearchParams(window.location.search); - var selectedCompany = urlParams.get('company_id'); - - if (selectedCompany) { - $('#company_id').val(selectedCompany); - } - }); - + return function(config) { // Handle the company dropdown value change. $('#company_id').on('change', function() { - var selectedCompany = $(this).val(); - var newUrl = new URL(window.location.href); - - newUrl.searchParams.set('company_id', selectedCompany); + let selectedCompany = $(this).val(); - // Redirect to the new URL with the company filter. - window.location.href = newUrl.href; + window.location = config.baseUrl.replace('__company_id__', selectedCompany); }); }; }); diff --git a/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-group-selector.js b/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-group-selector.js index 54089bc0e..b2b96548f 100644 --- a/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-group-selector.js +++ b/src/module-elasticsuite-analytics/view/adminhtml/web/js/report/customer-group-selector.js @@ -12,31 +12,16 @@ */ define('Smile_ElasticsuiteAnalytics/js/report/customer-group-selector', [ - 'jquery', - 'mage/url' -], function($, urlBuilder) { + 'jquery' +], function($) { 'use strict'; - return function() { - // !On document ready, set the selected value in the customer group dropdown. - $(document).ready(function() { - var urlParams = new URLSearchParams(window.location.search); - var selectedGroup = urlParams.get('customer_group'); - - if (selectedGroup) { - $('#customer_group').val(selectedGroup); - } - }); - + return function(config) { // Handle the customer group dropdown value change. $('#customer_group').on('change', function() { - var selectedGroup = $(this).val(); - var newUrl = new URL(window.location.href); - - newUrl.searchParams.set('customer_group', selectedGroup); + let selectedGroup = $(this).val(); - // Redirect to the new URL with the customer group filter. - window.location.href = newUrl.href; + window.location = config.baseUrl.replace('__customer_group__', selectedGroup); }); }; }); From f038bab9f8a9cabd7c86d93acdce80bad1035186 Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Tue, 7 Jan 2025 12:30:41 +0100 Subject: [PATCH 4/6] [Analytics] Ignore filtered search pages for avg result count --- .../Usage/Terms/AggregationProvider.php | 40 +++++++++++++++++-- .../Model/Search/Usage/Terms/Report.php | 10 ++++- .../SpellcheckedTerms/AggregationProvider.php | 2 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/AggregationProvider.php b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/AggregationProvider.php index 6d4b15c2a..99403f0fc 100644 --- a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/AggregationProvider.php +++ b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/AggregationProvider.php @@ -16,6 +16,7 @@ use Smile\ElasticsuiteCore\Search\Request\BucketInterface; use Smile\ElasticsuiteCore\Search\Request\MetricInterface; use Smile\ElasticsuiteAnalytics\Model\Report\AggregationProviderInterface; +use Smile\ElasticsuiteCore\Search\Request\QueryInterface; /** * Default AggregationProvider @@ -84,6 +85,7 @@ public function getAggregation() 'name' => 'search_terms', 'metrics' => $this->getMetrics(), 'pipelines' => $this->getPipelines(), + 'childBuckets' => [$this->getFilteredResultCountMetric()], 'sortOrder' => ['unique_sessions' => 'desc'], 'size' => $this->helper->getMaxSearchTerms(), ]; @@ -105,9 +107,7 @@ protected function getMetrics() $this->metricFactory->create( ['name' => 'unique_visitors', 'field' => 'session.vid', 'type' => MetricInterface::TYPE_CARDINALITY] ), - $this->metricFactory->create( - ['name' => 'result_count', 'field' => 'page.product_list.product_count', 'type' => MetricInterface::TYPE_AVG] - ), + // Metrics result_count moved to a sub-aggregation to ignore filtered search pages. ]; return $metrics; @@ -124,4 +124,38 @@ protected function getPipelines() return $pipelines; } + + /** + * Return aggregation providing the filtered (no filtered search pages) result/product count metrics. + * + * @return BucketInterface + */ + protected function getFilteredResultCountMetric() + { + return $this->aggregationFactory->create( + BucketInterface::TYPE_METRIC, + [ + 'name' => 'result_count', + 'metricType' => MetricInterface::TYPE_AVG, + 'field' => 'page.product_list.product_count', + 'filter' => $this->queryFactory->create( + QueryInterface::TYPE_BOOL, + [ + 'mustNot' => [ + $this->queryFactory->create( + QueryInterface::TYPE_NESTED, + [ + 'path' => 'page.product_list.filters', + 'query' => $this->queryFactory->create( + QueryInterface::TYPE_EXISTS, + ['field' => 'page.product_list.filters'] + ), + ] + ), + ], + ] + ), + ] + ); + } } diff --git a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/Report.php b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/Report.php index 4745dac0c..cc903780e 100644 --- a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/Report.php +++ b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/Report.php @@ -47,6 +47,7 @@ public function __construct( /** * {@inheritdoc} + * @SuppressWarnings(PHPMD.CyclomaticComplexity) */ protected function processResponse(\Smile\ElasticsuiteCore\Search\Adapter\Elasticsuite\Response\QueryResponse $response) { @@ -63,7 +64,14 @@ protected function processResponse(\Smile\ElasticsuiteCore\Search\Adapter\Elasti 'conversion_rate' => number_format(0, 2), ]; if (array_key_exists('result_count', $value->getMetrics())) { - $data[$searchTerm]['result_count'] = round((float) $value->getMetrics()['result_count'] ?: 0); + $resultCountMetrics = $value->getMetrics()['result_count']; + if (is_array($resultCountMetrics) + && array_key_exists('result_count', $resultCountMetrics) + && array_key_exists('value', $resultCountMetrics['result_count']) + ) { + $resultCountMetrics = $resultCountMetrics['result_count']['value'] ?: 0; + } + $data[$searchTerm]['result_count'] = round((float) $resultCountMetrics ?: 0); } } } diff --git a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/SpellcheckedTerms/AggregationProvider.php b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/SpellcheckedTerms/AggregationProvider.php index 773747f3b..38ba420ed 100644 --- a/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/SpellcheckedTerms/AggregationProvider.php +++ b/src/module-elasticsuite-analytics/Model/Search/Usage/Terms/SpellcheckedTerms/AggregationProvider.php @@ -35,7 +35,7 @@ protected function getPipelines() PipelineInterface::TYPE_BUCKET_SELECTOR, [ 'name' => 'result_count_filter', - 'bucketsPath' => ['avg_result_count' => 'result_count'], + 'bucketsPath' => ['avg_result_count' => 'result_count.result_count'], 'script' => 'params.avg_result_count > 0', ] ), From 299a8fbfd90d3882649bbc3f19b480ab06809235 Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Thu, 9 Jan 2025 10:29:15 +0100 Subject: [PATCH 5/6] [Tracker] Allow ignoring bots generated tracker hits --- .../Helper/BotDetector.php | 78 +++++++++++++++++++ .../Helper/Data.php | 16 ++++ .../Model/Customer/TrackingService.php | 13 +++- .../etc/adminhtml/system.xml | 5 ++ .../etc/config.xml | 1 + src/module-elasticsuite-tracker/etc/di.xml | 27 +++++++ .../i18n/de_DE.csv | 5 +- .../i18n/en_US.csv | 5 +- .../i18n/fr_FR.csv | 3 + .../i18n/nl_NL.csv | 5 +- 10 files changed, 151 insertions(+), 7 deletions(-) create mode 100644 src/module-elasticsuite-tracker/Helper/BotDetector.php diff --git a/src/module-elasticsuite-tracker/Helper/BotDetector.php b/src/module-elasticsuite-tracker/Helper/BotDetector.php new file mode 100644 index 000000000..5dde35e6a --- /dev/null +++ b/src/module-elasticsuite-tracker/Helper/BotDetector.php @@ -0,0 +1,78 @@ + + * @copyright 2025 Smile + * @license Open Software License ("OSL") v. 3.0 + */ + +namespace Smile\ElasticsuiteTracker\Helper; + +use Magento\Framework\App\Helper\AbstractHelper; +use Magento\Framework\App\Helper\Context; +use Magento\Framework\App\Request\Http; + +/** + * BotDetector helper. + * + * @category Smile + * @package Smile\ElasticsuiteTracker + */ +class BotDetector extends AbstractHelper +{ + /** + * @var Http + */ + protected $request; + + /** + * @var array + */ + protected $botUserAgents = []; + + /** + * Constructor. + * + * @param Context $context Context. + * @param Http $request HTTP request. + * @param array $botUserAgents Bot user agents list. + */ + public function __construct(Context $context, Http $request, $botUserAgents = []) + { + parent::__construct($context); + $this->request = $request; + $this->botUserAgents = $botUserAgents; + } + + /** + * Get the current user agent from the request + * + * @return string + */ + public function getUserAgent() + { + return $this->request->getHeader('User-Agent') ?: ''; + } + + /** + * Check if the current user agent belongs to a bot + * + * @return bool + */ + public function isBot() + { + $userAgent = strtolower($this->getUserAgent()); + foreach ($this->botUserAgents as $bot) { + if (strpos($userAgent, $bot) !== false) { + return true; + } + } + + return false; + } +} diff --git a/src/module-elasticsuite-tracker/Helper/Data.php b/src/module-elasticsuite-tracker/Helper/Data.php index bd2012b89..80b976668 100644 --- a/src/module-elasticsuite-tracker/Helper/Data.php +++ b/src/module-elasticsuite-tracker/Helper/Data.php @@ -72,6 +72,12 @@ class Data extends \Magento\Framework\App\Helper\AbstractHelper */ const CONFIG_IS_HEADLESS_MODE_XPATH = 'smile_elasticsuite_tracker/general/is_headless_mode'; + /** + * Whether to filter/ignore tracker hits coming from known bots configuration path + * @var string + */ + const CONFIG_IS_FILTERING_BOT_HITS_XPATH = 'smile_elasticsuite_tracker/general/filter_bot_hits'; + /** * Anonymization status configuration path * @var string @@ -249,6 +255,16 @@ public function getEventsQueueCleanupDelay() return (int) $this->scopeConfig->getValue(self::CONFIG_QUEUE_CLEANUP_DELAY_XPATH); } + /** + * Returns true if tracker hits generated by known bots should be preemptively ignored. + * + * @return bool + */ + public function isFilteringBotHits() + { + return $this->scopeConfig->isSetFlag(self::CONFIG_IS_FILTERING_BOT_HITS_XPATH); + } + /** * Return the current tracker visitor id * diff --git a/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php b/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php index 087c625c6..05802d20d 100644 --- a/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php +++ b/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php @@ -42,6 +42,11 @@ class TrackingService implements \Smile\ElasticsuiteTracker\Api\CustomerTracking */ private $eventQueue; + /** + * @var \Smile\ElasticsuiteTracker\Helper\BotDetector + */ + private $botDetector; + /** * Constructor. * @@ -49,17 +54,20 @@ class TrackingService implements \Smile\ElasticsuiteTracker\Api\CustomerTracking * @param \Smile\ElasticsuiteTracker\Helper\Data $helper Tracking Helper. * @param \Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue Event Queue. * @param \Magento\Customer\Model\Session $customerSession Customer Session. + * @param \Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector Bot detector. */ public function __construct( \Smile\ElasticsuiteTracker\Model\ResourceModel\CustomerLink $customerLinkResource, \Smile\ElasticsuiteTracker\Helper\Data $helper, \Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue, - \Magento\Customer\Model\Session $customerSession + \Magento\Customer\Model\Session $customerSession, + \Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector ) { $this->customerLinkResource = $customerLinkResource; $this->helper = $helper; $this->customerSession = $customerSession; $this->eventQueue = $eventQueue; + $this->botDetector = $botDetector; } /** @@ -76,6 +84,9 @@ public function hit($eventData): void public function addEvent($eventData) { if ($this->helper->isEnabled()) { + if ($this->helper->isFilteringBotHits() && $this->botDetector->isBot()) { + return; + } $this->addCustomerLink($eventData); $this->eventQueue->addEvent($eventData); } diff --git a/src/module-elasticsuite-tracker/etc/adminhtml/system.xml b/src/module-elasticsuite-tracker/etc/adminhtml/system.xml index 4fc72dd28..aebb3db47 100644 --- a/src/module-elasticsuite-tracker/etc/adminhtml/system.xml +++ b/src/module-elasticsuite-tracker/etc/adminhtml/system.xml @@ -50,6 +50,11 @@ 1 + + + Magento\Config\Model\Config\Source\Yesno + + diff --git a/src/module-elasticsuite-tracker/etc/config.xml b/src/module-elasticsuite-tracker/etc/config.xml index f8265f677..584030a1a 100644 --- a/src/module-elasticsuite-tracker/etc/config.xml +++ b/src/module-elasticsuite-tracker/etc/config.xml @@ -23,6 +23,7 @@ 12 0 0 + 1 STUID diff --git a/src/module-elasticsuite-tracker/etc/di.xml b/src/module-elasticsuite-tracker/etc/di.xml index ef6066da5..3ea5732da 100644 --- a/src/module-elasticsuite-tracker/etc/di.xml +++ b/src/module-elasticsuite-tracker/etc/di.xml @@ -130,4 +130,31 @@ + + + + + googlebot + adsbot-google + feedfetcher-google + mediapartners-google + mediapartners (googlebot) + apis-google + google-inspectiontool + storebot-google + googleother + bingbot + slurp + linkedinbot + yandex + baiduspider + openai + duckduckbot + facebot + twitterbot + ia_archiver + + + + diff --git a/src/module-elasticsuite-tracker/i18n/de_DE.csv b/src/module-elasticsuite-tracker/i18n/de_DE.csv index 82b7e2271..24e787fba 100644 --- a/src/module-elasticsuite-tracker/i18n/de_DE.csv +++ b/src/module-elasticsuite-tracker/i18n/de_DE.csv @@ -33,5 +33,6 @@ "Delay","Verzögerung" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In Tagen. (Standard ist 7 Tage.)
Ungültige Tracker-Ereignisse, die älter als diese Verzögerung sind, werden periodisch aus der Warteschlange für Ereignisse entfernt." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Sie können diesen Button anklicken, um alle ungültigen Tracker-Ereignisse unabhängig von der obigen Verzögerungseinstellung sofort aus der Indexierungsphase zu entfernen." -"","" -"","" +"Bot handling","Handhabung von Robotern" +"Filter bot hits","Filtern Sie Robotertreffer" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Wenn diese Option aktiviert ist, wird verhindert, dass bekannte IA- oder von Suchmaschinen-Crawler-Bots generierte Tracker-Treffer aufgezeichnet werden, um zu verhindern, dass Ihre Verhaltensdaten durch manchmal völlig inkohärente Paginierungs- oder Ergebniszählungsdaten verunreinigt werden." diff --git a/src/module-elasticsuite-tracker/i18n/en_US.csv b/src/module-elasticsuite-tracker/i18n/en_US.csv index 5ed831dcd..63ac348ff 100644 --- a/src/module-elasticsuite-tracker/i18n/en_US.csv +++ b/src/module-elasticsuite-tracker/i18n/en_US.csv @@ -33,5 +33,6 @@ Enabled,Enabled "Delay","Delay" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above." - - +"Bot Handling","Bot Handling" +"Filter bot hits","Filter bot hits" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data." diff --git a/src/module-elasticsuite-tracker/i18n/fr_FR.csv b/src/module-elasticsuite-tracker/i18n/fr_FR.csv index ca1be9fe6..73027beef 100644 --- a/src/module-elasticsuite-tracker/i18n/fr_FR.csv +++ b/src/module-elasticsuite-tracker/i18n/fr_FR.csv @@ -33,3 +33,6 @@ Tracking,Tracking "Delay","Délai" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","En jours. (Par défaut 7 jours.)
Les évènements invalides du tracker plus anciens que ce délai seront périodiquement supprimés de la file d'attente d'indexation des évènements." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Vous pouvez cliquer sur ce bouton pour supprimer immédiatement tous les évènements invalides du tracker de la file d'attente d'indexation. Le paramètre ""Délai"" ci-dessus est ignoré." +"Bot Handling","Gestion des Robots" +"Filter bot hits","Filtrer les hits de robots" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Si activé, les hits du tracker générés par des robots de moteurs de recherche ou d'IA ne seront pas enregistrés afin d'empêcher la pollution de vos données comportementales par des données de nombre de résultats ou de pagination parfois totalement incohérents." diff --git a/src/module-elasticsuite-tracker/i18n/nl_NL.csv b/src/module-elasticsuite-tracker/i18n/nl_NL.csv index 96cc67297..742d08108 100644 --- a/src/module-elasticsuite-tracker/i18n/nl_NL.csv +++ b/src/module-elasticsuite-tracker/i18n/nl_NL.csv @@ -33,5 +33,6 @@ "Delay","Vertraging" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In dagen. (Standaard is 7 dagen.)
Ongeldige tracker events ouder dan deze vertraging worden periodiek verwijderd uit de wachtrij voor indexering van evenementen." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","U kunt op deze knop klikken om onmiddellijk alle ongeldige trackergebeurtenissen uit de indexeringswachtrij te verwijderen, ongeacht de hierboven ingestelde vertraging." -"","" -"","" +"Bot Handling","Robotbediening" +"Filter bot hits","Filter robottreffers" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Indien ingeschakeld, voorkomt het dat door IA of zoekmachines gegenereerde tracker-hits worden geregistreerd om te voorkomen dat uw gedragsgegevens worden vervuild door soms totaal onsamenhangende paginering of gegevens over het aantal resultaten." From 0184c7d2943c65624535f695799024670d84a9c6 Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Thu, 9 Jan 2025 16:00:19 +0100 Subject: [PATCH 6/6] [Tracker] Adding additional Facebook bots --- src/module-elasticsuite-tracker/etc/di.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/module-elasticsuite-tracker/etc/di.xml b/src/module-elasticsuite-tracker/etc/di.xml index 3ea5732da..10ce8df7d 100644 --- a/src/module-elasticsuite-tracker/etc/di.xml +++ b/src/module-elasticsuite-tracker/etc/di.xml @@ -150,7 +150,9 @@ baiduspider openai duckduckbot - facebot + facebot + facebookexternalhit + facebookcatalog twitterbot ia_archiver