|
3 | 3 | * Copyright © 2013-2017 Magento, Inc. All rights reserved.
|
4 | 4 | * See COPYING.txt for license details.
|
5 | 5 | */
|
| 6 | + |
6 | 7 | namespace Magento\Search\Model;
|
7 | 8 |
|
8 | 9 | use Magento\Search\Api\SynonymAnalyzerInterface;
|
9 | 10 |
|
| 11 | +/** |
| 12 | + * SynonymAnalyzer responsible for search of synonyms matching a word or a phrase. |
| 13 | + */ |
10 | 14 | class SynonymAnalyzer implements SynonymAnalyzerInterface
|
11 | 15 | {
|
12 | 16 | /**
|
@@ -42,55 +46,119 @@ public function __construct(SynonymReader $synReader)
|
42 | 46 | */
|
43 | 47 | public function getSynonymsForPhrase($phrase)
|
44 | 48 | {
|
45 |
| - $synGroups = []; |
| 49 | + $result = []; |
46 | 50 |
|
47 |
| - if (empty($phrase)) { |
48 |
| - return $synGroups; |
| 51 | + if (empty(trim($phrase))) { |
| 52 | + return $result; |
49 | 53 | }
|
50 | 54 |
|
51 |
| - $rows = $this->synReaderModel->loadByPhrase($phrase)->getData(); |
52 |
| - $synonyms = []; |
53 |
| - foreach ($rows as $row) { |
54 |
| - $synonyms [] = $row['synonyms']; |
55 |
| - } |
| 55 | + $synonymGroups = $this->getSynonymGroupsByPhrase($phrase); |
| 56 | + |
| 57 | + // Replace multiple spaces in a row with the only one space |
| 58 | + $phrase = preg_replace("/ {2,}/", " ", $phrase); |
56 | 59 |
|
57 | 60 | // Go through every returned record looking for presence of the actual phrase. If there were no matching
|
58 | 61 | // records found in DB then create a new entry for it in the returned array
|
59 | 62 | $words = explode(' ', $phrase);
|
60 |
| - foreach ($words as $w) { |
61 |
| - $position = $this->findInArray($w, $synonyms); |
62 |
| - if ($position !== false) { |
63 |
| - $synGroups[] = explode(',', $synonyms[$position]); |
64 |
| - } else { |
65 |
| - // No synonyms were found. Return the original word in this position |
66 |
| - $synGroups[] = [$w]; |
| 63 | + |
| 64 | + foreach ($words as $offset => $word) { |
| 65 | + $synonyms = [$word]; |
| 66 | + |
| 67 | + if ($synonymGroups) { |
| 68 | + $pattern = $this->getSearchPattern(array_slice($words, $offset)); |
| 69 | + $position = $this->findInArray($pattern, $synonymGroups); |
| 70 | + if ($position !== null) { |
| 71 | + $synonyms = explode(',', $synonymGroups[$position]); |
| 72 | + } |
67 | 73 | }
|
| 74 | + |
| 75 | + $result[] = $synonyms; |
68 | 76 | }
|
69 |
| - return $synGroups; |
| 77 | + |
| 78 | + return $result; |
70 | 79 | }
|
71 | 80 |
|
72 | 81 | /**
|
73 |
| - * Helper method to find the presence of $word in $wordsArray. If found, the particular array index is returned. |
| 82 | + * Helper method to find the matching of $pattern to $synonymGroupsToExamine. |
| 83 | + * If matches, the particular array index is returned. |
74 | 84 | * Otherwise false will be returned.
|
75 | 85 | *
|
76 |
| - * @param string $word |
77 |
| - * @param $array $wordsArray |
78 |
| - * @return boolean | int |
| 86 | + * @param string $pattern |
| 87 | + * @param array $synonymGroupsToExamine |
| 88 | + * @return int|null |
79 | 89 | */
|
80 |
| - private function findInArray($word, $wordsArray) |
| 90 | + private function findInArray($pattern, array $synonymGroupsToExamine) |
81 | 91 | {
|
82 |
| - if (empty($wordsArray)) { |
83 |
| - return false; |
84 |
| - } |
85 | 92 | $position = 0;
|
86 |
| - foreach ($wordsArray as $wordsLine) { |
87 |
| - $pattern = '/^' . $word . ',|,' . $word . ',|,' . $word . '$/'; |
88 |
| - $rv = preg_match($pattern, $wordsLine); |
89 |
| - if ($rv != 0) { |
| 93 | + foreach ($synonymGroupsToExamine as $synonymGroup) { |
| 94 | + $matchingResultCode = preg_match($pattern, $synonymGroup); |
| 95 | + if ($matchingResultCode === 1) { |
90 | 96 | return $position;
|
91 | 97 | }
|
92 | 98 | $position++;
|
93 | 99 | }
|
94 |
| - return false; |
| 100 | + return null; |
| 101 | + } |
| 102 | + |
| 103 | + /** |
| 104 | + * Returns a regular expression to search for synonyms of the phrase represented as the list of words. |
| 105 | + * |
| 106 | + * Returned pattern contains expression to search for a part of the phrase from the beginning. |
| 107 | + * |
| 108 | + * For example, in the phrase "Elizabeth is the English queen" with subset from the very first word, |
| 109 | + * the method will build an expression which looking for synonyms for all these patterns: |
| 110 | + * - Elizabeth is the English queen |
| 111 | + * - Elizabeth is the English |
| 112 | + * - Elizabeth is the |
| 113 | + * - Elizabeth is |
| 114 | + * - Elizabeth |
| 115 | + * |
| 116 | + * For the same phrase on the second iteration with the first word "is" it will match for these synonyms: |
| 117 | + * - is the English queen |
| 118 | + * - is the English |
| 119 | + * - is the |
| 120 | + * - is |
| 121 | + * |
| 122 | + * The pattern looking for exact match and will not find these phrases as synonyms: |
| 123 | + * - Is there anybody in the room? |
| 124 | + * - Is the English is most popular language? |
| 125 | + * - Is the English queen Elizabeth? |
| 126 | + * |
| 127 | + * Take into account that returned pattern expects that data will be represented as comma-separated value. |
| 128 | + * |
| 129 | + * @param array $words |
| 130 | + * @return string |
| 131 | + */ |
| 132 | + private function getSearchPattern(array $words) |
| 133 | + { |
| 134 | + $patterns = []; |
| 135 | + for ($lastItem = count($words); $lastItem > 0; $lastItem--) { |
| 136 | + $phrase = implode("\s+", array_slice($words, 0, $lastItem)); |
| 137 | + $patterns[] = '^' . $phrase . ','; |
| 138 | + $patterns[] = ',' . $phrase . ','; |
| 139 | + $patterns[] = ',' . $phrase . '$'; |
| 140 | + } |
| 141 | + |
| 142 | + $pattern = '/' . implode('|', $patterns) . '/i'; |
| 143 | + return $pattern; |
| 144 | + } |
| 145 | + |
| 146 | + /** |
| 147 | + * Get all synonym groups for the phrase |
| 148 | + * |
| 149 | + * Returns an array of synonyms which are represented as comma-separated value for each item in the list |
| 150 | + * |
| 151 | + * @param string $phrase |
| 152 | + * @return string[] |
| 153 | + */ |
| 154 | + private function getSynonymGroupsByPhrase($phrase) |
| 155 | + { |
| 156 | + $result = []; |
| 157 | + |
| 158 | + $synonymGroups = $this->synReaderModel->loadByPhrase($phrase)->getData(); |
| 159 | + foreach ($synonymGroups as $row) { |
| 160 | + $result[] = $row['synonyms']; |
| 161 | + } |
| 162 | + return $result; |
95 | 163 | }
|
96 | 164 | }
|
0 commit comments