Skip to content

Commit 810bff3

Browse files
Merge pull request #470 from magento-performance/ACPT-671
ACPT-671: Improve performance of image hashing in import
2 parents b7fa18d + 50b6cb7 commit 810bff3

File tree

1 file changed

+105
-49
lines changed
  • app/code/Magento/CatalogImportExport/Model/Import

1 file changed

+105
-49
lines changed

app/code/Magento/CatalogImportExport/Model/Import/Product.php

Lines changed: 105 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ class Product extends AbstractEntity
4949
{
5050
private const DEFAULT_GLOBAL_MULTIPLE_VALUE_SEPARATOR = ',';
5151
public const CONFIG_KEY_PRODUCT_TYPES = 'global/importexport/import_product_types';
52-
private const HASH_ALGORITHM = 'sha256';
5352

5453
/**
5554
* Size of bunch - part of products to save in one step.
@@ -264,6 +263,11 @@ class Product extends AbstractEntity
264263
*/
265264
protected $_mediaGalleryAttributeId = null;
266265

266+
/**
267+
* @var string
268+
*/
269+
private $hashAlgorithm = 'crc32c';
270+
267271
/**
268272
* @var array
269273
* @codingStandardsIgnoreStart
@@ -904,7 +908,7 @@ public function __construct(
904908
$this->linkProcessor = $linkProcessor ?? ObjectManager::getInstance()
905909
->get(LinkProcessor::class);
906910
$this->linkProcessor->addNameToIds($this->_linkNameToId);
907-
911+
$this->hashAlgorithm = (version_compare(PHP_VERSION, '8.1.0') >= 0) ? 'xxh128' : 'crc32c';
908912
parent::__construct(
909913
$jsonHelper,
910914
$importExportData,
@@ -1572,6 +1576,7 @@ protected function _saveProducts()
15721576
$priceIsGlobal = $this->_catalogData->isPriceGlobal();
15731577
$previousType = null;
15741578
$prevAttributeSet = null;
1579+
$productMediaPath = $this->getProductMediaPath();
15751580
while ($bunch = $this->_dataSourceModel->getNextBunch()) {
15761581
$entityRowsIn = [];
15771582
$entityRowsUp = [];
@@ -1583,7 +1588,6 @@ protected function _saveProducts()
15831588
$imagesForChangeVisibility = [];
15841589
$uploadedImages = [];
15851590
$existingImages = $this->getExistingImages($bunch);
1586-
$this->addImageHashes($existingImages);
15871591
$attributes = [];
15881592
foreach ($bunch as $rowNum => $rowData) {
15891593
try {
@@ -1630,6 +1634,7 @@ protected function _saveProducts()
16301634
$rowData,
16311635
$storeId,
16321636
$existingImages,
1637+
$productMediaPath,
16331638
$uploadedImages,
16341639
$imagesForChangeVisibility,
16351640
$labelsForUpdate,
@@ -1683,7 +1688,6 @@ protected function _saveProducts()
16831688
private function saveProductEntityPhase(array $rowData, array &$entityRowsUp, array &$entityRowsIn) : void
16841689
{
16851690
$rowSku = $rowData[self::COL_SKU];
1686-
// 1. Entity phase
16871691
if ($this->isSkuExist($rowSku)) {
16881692
// existing row
16891693
if (isset($rowData['attribute_set_code'])) {
@@ -1801,6 +1805,7 @@ private function saveProductTierPricesPhase(array $rowData, bool $priceIsGlobal,
18011805
* @param array $rowData
18021806
* @param int $storeId
18031807
* @param array $existingImages
1808+
* @param string $productMediaPath
18041809
* @param array $uploadedImages
18051810
* @param array $imagesForChangeVisibility
18061811
* @param array $labelsForUpdate
@@ -1815,6 +1820,7 @@ private function saveProductMediaGalleryPhase(
18151820
array &$rowData,
18161821
int $storeId,
18171822
array $existingImages,
1823+
string $productMediaPath,
18181824
array &$uploadedImages,
18191825
array &$imagesForChangeVisibility,
18201826
array &$labelsForUpdate,
@@ -1848,10 +1854,11 @@ private function saveProductMediaGalleryPhase(
18481854
$position = 0;
18491855
foreach ($rowImages as $column => $columnImages) {
18501856
foreach ($columnImages as $columnImageKey => $columnImage) {
1851-
$hash = filter_var($columnImage, FILTER_VALIDATE_URL)
1852-
? $this->getRemoteFileHash($columnImage)
1853-
: $this->getFileHash($this->joinFilePaths($this->getUploader()->getTmpDir(), $columnImage));
1854-
$uploadedFile = $this->findImageByHash($rowExistingImages, $hash);
1857+
$uploadedFile = $this->findImageByColumnImage(
1858+
$productMediaPath,
1859+
$rowExistingImages,
1860+
$columnImage
1861+
);
18551862
if (!$uploadedFile && !isset($uploadedImages[$columnImage])) {
18561863
$uploadedFile = $this->uploadMediaFiles($columnImage);
18571864
$uploadedFile = $uploadedFile ?: $this->getSystemFile($columnImage);
@@ -1966,7 +1973,7 @@ private function saveProductAttributesPhase(
19661973
$productType = $previousType;
19671974
}
19681975
if ($productType === null) {
1969-
throw new Skip('Unknown Product Type');
1976+
throw new Skip(__('Unknown Product Type'));
19701977
}
19711978
}
19721979
$productTypeModel = $this->_productTypeModels[$productType];
@@ -2034,54 +2041,34 @@ private function saveProductAttributesPhase(
20342041
}
20352042

20362043
/**
2037-
* Returns image hash by path
2044+
* Returns image content by path
20382045
*
20392046
* @param string $path
20402047
* @return string
20412048
* @throws \Magento\Framework\Exception\FileSystemException
20422049
*/
2043-
private function getFileHash(string $path): string
2050+
private function getFileContent(string $path): string
20442051
{
2045-
$content = '';
20462052
if ($this->_mediaDirectory->isFile($path)
20472053
&& $this->_mediaDirectory->isReadable($path)
20482054
) {
2049-
$content = $this->_mediaDirectory->readFile($path);
2055+
return $this->_mediaDirectory->readFile($path);
20502056
}
2051-
return $content ? hash(self::HASH_ALGORITHM, $content) : '';
2057+
return '';
20522058
}
20532059

20542060
/**
2055-
* Returns hash for remote file
2061+
* Returns content for remote file
20562062
*
20572063
* @param string $filename
20582064
* @return string
20592065
*/
2060-
private function getRemoteFileHash(string $filename): string
2061-
{
2062-
$hash = hash_file(self::HASH_ALGORITHM, $filename);
2063-
return $hash !== false ? $hash : '';
2064-
}
2065-
2066-
/**
2067-
* Generate hashes for existing images for comparison with newly uploaded images.
2068-
*
2069-
* @param array $images
2070-
* @return void
2071-
*/
2072-
private function addImageHashes(array &$images): void
2066+
private function getRemoteFileContent(string $filename): string
20732067
{
2074-
$productMediaPath = $this->getProductMediaPath();
2075-
foreach ($images as $storeId => $skus) {
2076-
foreach ($skus as $sku => $files) {
2077-
foreach ($files as $path => $file) {
2078-
$hash = $this->getFileHash($this->joinFilePaths($productMediaPath, $file['value']));
2079-
if ($hash) {
2080-
$images[$storeId][$sku][$path]['hash'] = $hash;
2081-
}
2082-
}
2083-
}
2084-
}
2068+
// phpcs:disable Magento2.Functions.DiscouragedFunction
2069+
$content = file_get_contents($filename);
2070+
// phpcs:enable Magento2.Functions.DiscouragedFunction
2071+
return $content !== false ? $content : '';
20852072
}
20862073

20872074
/**
@@ -3328,24 +3315,93 @@ private function getRowExistingStockItem(array $rowData): StockItemInterface
33283315
}
33293316

33303317
/**
3331-
* Returns image that matches the provided hash
3318+
* Returns image that matches the provided image content
33323319
*
3320+
* @param string $productMediaPath
33333321
* @param array $images
3334-
* @param string $hash
3322+
* @param string $columnImage
33353323
* @return string
33363324
*/
3337-
private function findImageByHash(array $images, string $hash): string
3325+
private function findImageByColumnImage(string $productMediaPath, array &$images, string $columnImage): string
33383326
{
3339-
$value = '';
3340-
if ($hash) {
3341-
foreach ($images as $image) {
3342-
if (isset($image['hash']) && $image['hash'] === $hash) {
3343-
$value = $image['value'];
3344-
break;
3327+
$content = filter_var($columnImage, FILTER_VALIDATE_URL)
3328+
? $this->getRemoteFileContent($columnImage)
3329+
: $this->getFileContent($this->joinFilePaths($this->getUploader()->getTmpDir(), $columnImage));
3330+
if (!$content) {
3331+
return '';
3332+
}
3333+
if ($this->shouldUseHash($images)) {
3334+
return $this->findImageByColumnImageUsingHash($productMediaPath, $images, $content);
3335+
} else {
3336+
return $this->findImageByColumnImageUsingContent($productMediaPath, $images, $content);
3337+
}
3338+
}
3339+
3340+
/**
3341+
* Returns image that matches the provided image content using hash
3342+
*
3343+
* @param string $productMediaPath
3344+
* @param array $images
3345+
* @param string $content
3346+
* @return string
3347+
*/
3348+
private function findImageByColumnImageUsingHash(string $productMediaPath, array &$images, string $content): string
3349+
{
3350+
$hash = hash($this->hashAlgorithm, $content);
3351+
foreach ($images as &$image) {
3352+
if (!isset($image['hash'])) {
3353+
$imageContent = $this->getFileContent($this->joinFilePaths($productMediaPath, $image['value']));
3354+
if (!$imageContent) {
3355+
$image['hash'] = '';
3356+
continue;
33453357
}
3358+
$image['hash'] = hash($this->hashAlgorithm, $imageContent);
3359+
}
3360+
if (!empty($image['hash']) && $image['hash'] === $hash) {
3361+
return $image['value'];
33463362
}
33473363
}
3348-
return $value;
3364+
return '';
3365+
}
3366+
3367+
/**
3368+
* Returns image that matches the provided image content using content
3369+
*
3370+
* @param string $productMediaPath
3371+
* @param array $images
3372+
* @param string $content
3373+
* @return string
3374+
*/
3375+
private function findImageByColumnImageUsingContent(
3376+
string $productMediaPath,
3377+
array &$images,
3378+
string $content
3379+
): string {
3380+
foreach ($images as &$image) {
3381+
if (!isset($image['content'])) {
3382+
$image['content'] = $this->getFileContent(
3383+
$this->joinFilePaths($productMediaPath, $image['value'])
3384+
);
3385+
}
3386+
if ($content === $image['content']) {
3387+
return $image['value'];
3388+
}
3389+
}
3390+
return '';
3391+
}
3392+
3393+
/**
3394+
* Returns true if we should use hash instead of just comparing content
3395+
*
3396+
* @param array $images
3397+
* @return bool
3398+
*/
3399+
private function shouldUseHash(array $images): bool
3400+
{
3401+
if (count($images) > 100) {
3402+
return true;
3403+
}
3404+
return false;
33493405
}
33503406

33513407
/**

0 commit comments

Comments
 (0)