Skip to content

Commit eed0a75

Browse files
ACPT-671: Improve performance of image hashing in import #470
1 parent 5f3baa0 commit eed0a75

File tree

1 file changed

+65
-41
lines changed
  • app/code/Magento/CatalogImportExport/Model/Import

1 file changed

+65
-41
lines changed

app/code/Magento/CatalogImportExport/Model/Import/Product.php

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,7 @@ protected function _saveProducts()
15681568
$priceIsGlobal = $this->_catalogData->isPriceGlobal();
15691569
$previousType = null;
15701570
$prevAttributeSet = null;
1571+
$productMediaPath = $this->getProductMediaPath();
15711572
while ($bunch = $this->_dataSourceModel->getNextBunch()) {
15721573
$entityRowsIn = [];
15731574
$entityRowsUp = [];
@@ -1579,7 +1580,6 @@ protected function _saveProducts()
15791580
$imagesForChangeVisibility = [];
15801581
$uploadedImages = [];
15811582
$existingImages = $this->getExistingImages($bunch);
1582-
$this->addImageHashes($existingImages);
15831583
$attributes = [];
15841584
foreach ($bunch as $rowNum => $rowData) {
15851585
try {
@@ -1626,6 +1626,7 @@ protected function _saveProducts()
16261626
$rowData,
16271627
$storeId,
16281628
$existingImages,
1629+
$productMediaPath,
16291630
$uploadedImages,
16301631
$imagesForChangeVisibility,
16311632
$labelsForUpdate,
@@ -1811,6 +1812,7 @@ private function saveProductMediaGalleryPhase(
18111812
array &$rowData,
18121813
int $storeId,
18131814
array $existingImages,
1815+
string $productMediaPath,
18141816
array &$uploadedImages,
18151817
array &$imagesForChangeVisibility,
18161818
array &$labelsForUpdate,
@@ -1844,10 +1846,11 @@ private function saveProductMediaGalleryPhase(
18441846
$position = 0;
18451847
foreach ($rowImages as $column => $columnImages) {
18461848
foreach ($columnImages as $columnImageKey => $columnImage) {
1847-
$hash = filter_var($columnImage, FILTER_VALIDATE_URL)
1848-
? $this->getRemoteFileHash($columnImage)
1849-
: $this->getFileHash($this->joinFilePaths($this->getUploader()->getTmpDir(), $columnImage));
1850-
$uploadedFile = $this->findImageByHash($rowExistingImages, $hash);
1849+
$uploadedFile = $this->findImageByColumnImage(
1850+
$productMediaPath,
1851+
$rowExistingImages,
1852+
$columnImage
1853+
);
18511854
if (!$uploadedFile && !isset($uploadedImages[$columnImage])) {
18521855
$uploadedFile = $this->uploadMediaFiles($columnImage);
18531856
$uploadedFile = $uploadedFile ?: $this->getSystemFile($columnImage);
@@ -2030,54 +2033,33 @@ private function saveProductAttributesPhase(
20302033
}
20312034

20322035
/**
2033-
* Returns image hash by path
2036+
* Returns image content by path
20342037
*
20352038
* @param string $path
20362039
* @return string
20372040
* @throws \Magento\Framework\Exception\FileSystemException
20382041
*/
2039-
private function getFileHash(string $path): string
2042+
private function getFileContent(string $path): string
20402043
{
20412044
$content = '';
20422045
if ($this->_mediaDirectory->isFile($path)
20432046
&& $this->_mediaDirectory->isReadable($path)
20442047
) {
20452048
$content = $this->_mediaDirectory->readFile($path);
20462049
}
2047-
return $content ? hash(self::HASH_ALGORITHM, $content) : '';
2050+
return $content;
20482051
}
20492052

20502053
/**
2051-
* Returns hash for remote file
2054+
* Returns content for remote file
20522055
*
20532056
* @param string $filename
20542057
* @return string
20552058
*/
2056-
private function getRemoteFileHash(string $filename): string
2059+
private function getRemoteFileContent(string $filename): string
20572060
{
2058-
$hash = hash_file(self::HASH_ALGORITHM, $filename);
2059-
return $hash !== false ? $hash : '';
2060-
}
2061-
2062-
/**
2063-
* Generate hashes for existing images for comparison with newly uploaded images.
2064-
*
2065-
* @param array $images
2066-
* @return void
2067-
*/
2068-
private function addImageHashes(array &$images): void
2069-
{
2070-
$productMediaPath = $this->getProductMediaPath();
2071-
foreach ($images as $storeId => $skus) {
2072-
foreach ($skus as $sku => $files) {
2073-
foreach ($files as $path => $file) {
2074-
$hash = $this->getFileHash($this->joinFilePaths($productMediaPath, $file['value']));
2075-
if ($hash) {
2076-
$images[$storeId][$sku][$path]['hash'] = $hash;
2077-
}
2078-
}
2079-
}
2080-
}
2061+
$content = file_get_contents($filename);
2062+
return $content !== false ? $content : '';
20812063
}
20822064

20832065
/**
@@ -3325,26 +3307,68 @@ private function getRowExistingStockItem(array $rowData): StockItemInterface
33253307
}
33263308

33273309
/**
3328-
* Returns image that matches the provided hash
3310+
* Returns image that matches the provided image content
33293311
*
3312+
* @param string $productMediaPath
33303313
* @param array $images
3331-
* @param string $hash
3314+
* @param string $columnImage
33323315
* @return string
33333316
*/
3334-
private function findImageByHash(array $images, string $hash): string
3317+
private function findImageByColumnImage(string $productMediaPath, array &$images, string $columnImage): string
33353318
{
3319+
$content = filter_var($columnImage, FILTER_VALIDATE_URL)
3320+
? $this->getRemoteFileContent($columnImage)
3321+
: $this->getFileContent($this->joinFilePaths($this->getUploader()->getTmpDir(), $columnImage));
33363322
$value = '';
3337-
if ($hash) {
3338-
foreach ($images as $image) {
3339-
if (isset($image['hash']) && $image['hash'] === $hash) {
3340-
$value = $image['value'];
3341-
break;
3323+
if ($content) {
3324+
$useHash = $this->shouldUseHash($images);
3325+
if ($useHash) {
3326+
$hash = hash(self::HASH_ALGORITHM, $content);
3327+
}
3328+
foreach ($images as &$image) {
3329+
if ($useHash) {
3330+
if (!isset($image['hash'])) {
3331+
$imageContent = $this->getFileContent($this->joinFilePaths($productMediaPath, $image['value']));
3332+
if (!$imageContent) {
3333+
$image['hash'] = '';
3334+
continue;
3335+
}
3336+
$image['hash'] = hash(self::HASH_ALGORITHM, $imageContent);
3337+
}
3338+
if (isset($image['hash']) && $image['hash'] === $hash) {
3339+
$value = $image['value'];
3340+
break;
3341+
}
3342+
} else {
3343+
if (!isset($image['content'])) {
3344+
$image['content'] = $this->getFileContent(
3345+
$this->joinFilePaths($productMediaPath, $image['value'])
3346+
);
3347+
}
3348+
if ($content === $image['content']) {
3349+
$value = $image['value'];
3350+
break;
3351+
}
33423352
}
33433353
}
33443354
}
33453355
return $value;
33463356
}
33473357

3358+
/**
3359+
* Returns true if we should use hash instead of just comparing content
3360+
*
3361+
* @param array $images
3362+
* @return bool
3363+
*/
3364+
private function shouldUseHash(array $images): bool
3365+
{
3366+
if (count($images) > 100) {
3367+
return true;
3368+
}
3369+
return false;
3370+
}
3371+
33483372
/**
33493373
* Returns product media
33503374
*

0 commit comments

Comments
 (0)