Skip to content

Commit 56faa0c

Browse files
committed
Make PHP replacement more efficient
This change makes PHP replacement queries and memory usage more efficient. It improves the primary key SELECT queries by eliminating the use of OFFSET because OFFSET requires that the database consider all rows up to OFFSET before taking rows up to the LIMIT. The new query relies on primary key conditions to more efficiently eliminate previous rows from consideration. This way, the database can use an index to identify rows with keys greater than those of the previous chunk. It improves memory usage by doing updates along the way rather than storing all a column's updates in memory until the end. At Automattic, when we limit search-replace to 4GB of memory, we sometimes exceed that limit for large sites. It's possible there are other things that contribute to high memory usage within the search-replace command, but as a first step, we can reduce memory requirements by no longer keeping all updated column values in memory simultaneously.
1 parent 5c54825 commit 56faa0c

File tree

2 files changed

+67
-16
lines changed

2 files changed

+67
-16
lines changed

features/search-replace.feature

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,7 @@ Feature: Do global search/replace
11491149
Success:
11501150
"""
11511151

1152-
Scenario: Chunking works without skipping lines
1152+
Scenario: Chunking a precise search and replace works without skipping lines
11531153
Given a WP install
11541154
And a create_sql_file.sh file:
11551155
"""
@@ -1190,3 +1190,45 @@ Feature: Do global search/replace
11901190
"""
11911191
Success: Made 0 replacements.
11921192
"""
1193+
1194+
Scenario: Chunking a regex search and replace works without skipping lines
1195+
Given a WP install
1196+
And a create_sql_file.sh file:
1197+
"""
1198+
#!/bin/bash
1199+
echo "CREATE TABLE \`wp_123_test\` (\`key\` INT(5) UNSIGNED NOT NULL AUTO_INCREMENT, \`text\` TEXT, PRIMARY KEY (\`key\`) );" > test_db.sql
1200+
echo "INSERT INTO \`wp_123_test\` (\`text\`) VALUES" >> test_db.sql
1201+
index=1
1202+
while [[ $index -le 199 ]];
1203+
do
1204+
echo "('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc')," >> test_db.sql
1205+
index=`expr $index + 1`
1206+
done
1207+
echo "('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc');" >> test_db.sql
1208+
"""
1209+
And I run `bash create_sql_file.sh`
1210+
And I run `wp db query "SOURCE test_db.sql;"`
1211+
1212+
When I run `wp search-replace --dry-run 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --regex`
1213+
Then STDOUT should contain:
1214+
"""
1215+
Success: 2000 replacements to be made.
1216+
"""
1217+
1218+
When I run `wp search-replace 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --regex`
1219+
Then STDOUT should contain:
1220+
"""
1221+
Success: Made 2000 replacements.
1222+
"""
1223+
1224+
When I run `wp search-replace --dry-run 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --regex`
1225+
Then STDOUT should contain:
1226+
"""
1227+
Success: 0 replacements to be made.
1228+
"""
1229+
1230+
When I run `wp search-replace 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --regex`
1231+
Then STDOUT should contain:
1232+
"""
1233+
Success: Made 0 replacements.
1234+
"""

src/Search_Replace_Command.php

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -537,9 +537,16 @@ private function php_handle_col( $col, $primary_keys, $table, $old, $new ) {
537537
$count = 0;
538538
$replacer = new SearchReplacer( $old, $new, $this->recurse_objects, $this->regex, $this->regex_flags, $this->regex_delimiter, null !== $this->log_handle, $this->regex_limit );
539539

540-
$table_sql = self::esc_sql_ident( $table );
541-
$col_sql = self::esc_sql_ident( $col );
542-
$where = $this->regex ? '' : " WHERE $col_sql" . $wpdb->prepare( ' LIKE BINARY %s', '%' . self::esc_like( $old ) . '%' );
540+
$table_sql = self::esc_sql_ident( $table );
541+
$col_sql = self::esc_sql_ident( $col );
542+
543+
$base_key_condition = '';
544+
$where_key = '';
545+
if ( ! $this->regex ) {
546+
$base_key_condition = "$col_sql" . $wpdb->prepare( ' LIKE BINARY %s', '%' . self::esc_like( $old ) . '%' );
547+
$where_key = "WHERE $base_key_condition";
548+
}
549+
543550
$escaped_primary_keys = self::esc_sql_ident( $primary_keys );
544551
$primary_keys_sql = implode( ',', $escaped_primary_keys );
545552
$order_by_keys = array_map(
@@ -550,17 +557,12 @@ static function ( $key ) {
550557
);
551558
$order_by_sql = 'ORDER BY ' . implode( ',', $order_by_keys );
552559
$limit = 1000;
553-
$offset = 0;
554-
555-
// Updates have to be deferred to after the chunking is completed, as
556-
// the offset will otherwise not work correctly.
557-
$updates = [];
558560

559561
// 2 errors:
560562
// - WordPress.DB.PreparedSQL.InterpolatedNotPrepared -- escaped through self::esc_sql_ident
561563
// - WordPress.CodeAnalysis.AssignmentInCondition -- no reason to do copy-paste for a single valid assignment in while
562564
// phpcs:ignore
563-
while ( $rows = $wpdb->get_results( "SELECT {$primary_keys_sql} FROM {$table_sql} {$where} {$order_by_sql} LIMIT {$limit} OFFSET {$offset}" ) ) {
565+
while ( $rows = $wpdb->get_results( "SELECT {$primary_keys_sql} FROM {$table_sql} {$where_key} {$order_by_sql} LIMIT {$limit}" ) ) {
564566
foreach ( $rows as $keys ) {
565567
$where_sql = '';
566568
foreach ( (array) $keys as $k => $v ) {
@@ -595,15 +597,22 @@ static function ( $key ) {
595597
$update_where[ $k ] = $v;
596598
}
597599

598-
$updates[] = [ $table, array( $col => $value ), $update_where ];
600+
$wpdb->update( $table, [ $col => $value ], $update_where );
599601
}
600602
}
601603

602-
$offset += $limit;
603-
}
604-
605-
foreach ( $updates as $update ) {
606-
$wpdb->update( ...$update );
604+
// Because we are ordering by primary keys from least to greatest,
605+
// we can exclude previous chunks from consideration by adding greater-than conditions
606+
// to insist the next chunk's keys must be greater than the last of this chunk's keys.
607+
$last_keys = end( $rows );
608+
$where_key_conditions = array();
609+
if ( $base_key_condition ) {
610+
$where_key_conditions[] = $base_key_condition;
611+
}
612+
foreach ( (array) $last_keys as $k => $v ) {
613+
$where_key_conditions[] = self::esc_sql_ident( $k ) . ' > ' . self::esc_sql_value( $v );
614+
}
615+
$where_key = 'WHERE ' . implode( 'AND', $where_key_conditions );
607616
}
608617

609618
if ( $this->verbose && 'table' === $this->format ) {

0 commit comments

Comments
 (0)