From 1de4a4ca407b68309d34b98ce6bcba3e6935e310 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 24 Sep 2025 15:35:44 -0400 Subject: [PATCH 1/5] Test a claude-generated modification of the SQL extension --- .../migrations/0013_RAW_rpm_evr_extension.py | 406 ++++++++++++++---- pulp_rpm/app/shared_utils.py | 53 +-- 2 files changed, 345 insertions(+), 114 deletions(-) diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index 3801dba14..130a9504d 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -1,4 +1,3 @@ - # This Migration was _not_ automatically generated. # When regenerating the migrations ever, this one _must_ be preserved. @@ -20,103 +19,354 @@ CREATE OR REPLACE FUNCTION pulp_evr_trigger() RETURNS trigger AS $$ BEGIN NEW.evr = (select ROW(coalesce(NEW.epoch::numeric,0), - pulp_rpmver_array(coalesce(NEW.version,'pulp_isempty'))::pulp_evr_array_item[], - pulp_rpmver_array(coalesce(NEW.release,'pulp_isempty'))::pulp_evr_array_item[])::pulp_evr_t); + pulp_rpmver_array(coalesce(NEW.version,''))::pulp_evr_array_item[], + pulp_rpmver_array(coalesce(NEW.release,''))::pulp_evr_array_item[])::pulp_evr_t); RETURN NEW; END; $$ language 'plpgsql'; -create or replace FUNCTION pulp_isempty(t TEXT) - RETURNS BOOLEAN as $$ - BEGIN - return t ~ '^[[:space:]]*$'; - END; -$$ language 'plpgsql'; - -create or replace FUNCTION pulp_isalphanum(ch CHAR) - RETURNS BOOLEAN as $$ - BEGIN - if ascii(ch) between ascii('a') and ascii('z') or - ascii(ch) between ascii('A') and ascii('Z') or - ascii(ch) between ascii('0') and ascii('9') - then - return TRUE; - end if; - return FALSE; - END; -$$ language 'plpgsql'; - -create or replace function pulp_isdigit(ch CHAR) - RETURNS BOOLEAN as $$ - BEGIN - if ascii(ch) between ascii('0') and ascii('9') - then - return TRUE; - end if; - return FALSE; - END ; -$$ language 'plpgsql'; - create or replace FUNCTION pulp_rpmver_array (string1 IN VARCHAR) RETURNS pulp_evr_array_item[] as $$ declare - str1 VARCHAR := string1; - digits VARCHAR(10) := '0123456789'; - lc_alpha VARCHAR(27) := 'abcdefghijklmnopqrstuvwxyz'; - uc_alpha VARCHAR(27) := 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - alpha VARCHAR(54) := lc_alpha || uc_alpha; - one VARCHAR; - isnum BOOLEAN; + input_bytes BYTEA := convert_to(string1, 'ASCII'); + one BYTEA := input_bytes; ver_array pulp_evr_array_item[] := ARRAY[]::pulp_evr_array_item[]; + m1_head BYTEA; + segm1 TEXT; + segm1_n NUMERIC := 0; + isnum BOOLEAN; + pos INTEGER; BEGIN - if str1 is NULL - then + if string1 is NULL then RAISE EXCEPTION 'VALUE_ERROR.'; end if; - one := str1; - <> - while one <> '' - loop - declare - segm1 VARCHAR; - segm1_n NUMERIC := 0; - begin - -- Throw out all non-alphanum characters - while one <> '' and not pulp_isalphanum(one) - loop - one := substr(one, 2); + -- Convert to bytes for proper ASCII handling like Python + while length(one) > 0 loop + -- Skip non-alphanumeric characters except ~ and ^ + pos := 1; + while pos <= length(one) and + not (get_byte(one, pos-1) between 48 and 57 or -- 0-9 + get_byte(one, pos-1) between 65 and 90 or -- A-Z + get_byte(one, pos-1) between 97 and 122 or -- a-z + get_byte(one, pos-1) = 126 or -- ~ + get_byte(one, pos-1) = 94) -- ^ + loop + pos := pos + 1; + end loop; + + if pos > 1 then + one := substring(one from pos); + end if; + + if length(one) = 0 then + exit; + end if; + + -- Handle tilde - it sorts before everything else + if get_byte(one, 0) = 126 then -- ~ + ver_array := array_append(ver_array, (NULL, '~')::pulp_evr_array_item); + one := substring(one from 2); + continue; + end if; + + -- Handle caret - it sorts after everything else + if get_byte(one, 0) = 94 then -- ^ + ver_array := array_append(ver_array, (NULL, '^')::pulp_evr_array_item); + one := substring(one from 2); + continue; + end if; + + -- Extract numeric or alphabetic segment + if get_byte(one, 0) between 48 and 57 then -- digit + pos := 1; + while pos < length(one) and get_byte(one, pos) between 48 and 57 loop + pos := pos + 1; end loop; - str1 := one; - if str1 <> '' and pulp_isdigit(str1) - then - str1 := ltrim(str1, digits); - isnum := true; - else - str1 := ltrim(str1, alpha); - isnum := false; - end if; - if str1 <> '' - then segm1 := substr(one, 1, length(one) - length(str1)); - else segm1 := one; - end if; + m1_head := substring(one from 1 for pos); + isnum := true; + else -- alphabetic + pos := 1; + while pos < length(one) and + (get_byte(one, pos) between 65 and 90 or get_byte(one, pos) between 97 and 122) loop + pos := pos + 1; + end loop; + m1_head := substring(one from 1 for pos); + isnum := false; + end if; - if segm1 = '' then return ver_array; end if; /* arbitrary */ - if isnum - then - segm1 := ltrim(segm1, '0'); - if segm1 <> '' then segm1_n := segm1::numeric; end if; - segm1 := NULL; + segm1 := convert_from(m1_head, 'ASCII'); + + if isnum then + -- Remove leading zeros for numeric comparison + segm1 := ltrim(segm1, '0'); + if segm1 = '' then + segm1_n := 0; else + segm1_n := segm1::numeric; end if; - ver_array := array_append(ver_array, (segm1_n, segm1)::pulp_evr_array_item); - one := str1; - end; - end loop segment_loop; + ver_array := array_append(ver_array, (segm1_n, NULL)::pulp_evr_array_item); + else + ver_array := array_append(ver_array, (NULL, segm1)::pulp_evr_array_item); + end if; + + one := substring(one from pos + 1); + end loop; return ver_array; END ; $$ language 'plpgsql'; + +-- Version comparison function that matches Python Vercmp.compare logic +CREATE OR REPLACE FUNCTION pulp_vercmp(first_array pulp_evr_array_item[], second_array pulp_evr_array_item[]) + RETURNS INTEGER AS $$ + declare + i INTEGER := 1; + max_len INTEGER := greatest(array_length(first_array, 1), array_length(second_array, 1)); + first_item pulp_evr_array_item; + second_item pulp_evr_array_item; + BEGIN + if first_array = second_array then + return 0; + end if; + + while i <= coalesce(max_len, 0) loop + -- Get current segments or null if we've run out + if i <= array_length(first_array, 1) then + first_item := first_array[i]; + else + first_item := (NULL, NULL)::pulp_evr_array_item; + end if; + + if i <= array_length(second_array, 1) then + second_item := second_array[i]; + else + second_item := (NULL, NULL)::pulp_evr_array_item; + end if; + + -- Handle tilde: ~ sorts before everything else + if first_item.s = '~' and (second_item.s != '~' OR second_item.s IS NULL) then + return -1; + elsif (first_item.s != '~' OR first_item.s IS NULL) and second_item.s = '~' then + return 1; + elsif first_item.s = '~' and second_item.s = '~' then + i := i + 1; + continue; + end if; + + -- Handle caret + if first_item.s = '^' then + if second_item.s is null and second_item.n is null then + -- first has caret but second has ended + return 1; + elsif second_item.s != '^' then + -- first has caret but second continues + return -1; + else + -- both have caret, continue + i := i + 1; + continue; + end if; + elsif second_item.s = '^' then + if first_item.s is null and first_item.n is null then + -- second has caret but first has ended + return -1; + else + -- second has caret but first continues + return 1; + end if; + end if; + + -- Both items are null (end of both arrays) + if (first_item.s is null and first_item.n is null) and + (second_item.s is null and second_item.n is null) then + return 0; + end if; + + -- One array ended but the other continues + if (first_item.s is null and first_item.n is null) then + return -1; + elsif (second_item.s is null and second_item.n is null) then + return 1; + end if; + + -- Compare numeric vs alphabetic (numeric wins) + if first_item.n is not null and second_item.s is not null then + return 1; + elsif first_item.s is not null and second_item.n is not null then + return -1; + end if; + + -- Both numeric + if first_item.n is not null and second_item.n is not null then + if first_item.n < second_item.n then + return -1; + elsif first_item.n > second_item.n then + return 1; + end if; + -- Both alphabetic + elsif first_item.s is not null and second_item.s is not null then + if first_item.s < second_item.s then + return -1; + elsif first_item.s > second_item.s then + return 1; + end if; + end if; + + i := i + 1; + end loop; + + return 0; + END; +$$ LANGUAGE 'plpgsql'; + +-- Add comparison operators for pulp_evr_t type +CREATE OR REPLACE FUNCTION pulp_evr_cmp(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS INTEGER AS $$ + declare + epoch_cmp INTEGER; + version_cmp INTEGER; + BEGIN + -- Compare epochs first + epoch_cmp := first_evr.epoch - second_evr.epoch; + if epoch_cmp != 0 then + return epoch_cmp; + end if; + + -- Compare versions + version_cmp := pulp_vercmp(first_evr.version, second_evr.version); + if version_cmp != 0 then + return version_cmp; + end if; + + -- Compare releases + return pulp_vercmp(first_evr.release, second_evr.release); + END; +$$ LANGUAGE 'plpgsql'; + +-- Create comparison operators +CREATE OR REPLACE FUNCTION pulp_evr_lt(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) < 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_le(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) <= 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_eq(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) = 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_ge(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) >= 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_gt(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) > 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_ne(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) != 0; + END; +$$ LANGUAGE 'plpgsql'; + +-- Drop existing operators if they exist +DROP OPERATOR IF EXISTS < (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS <= (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS = (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS <> (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS != (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS >= (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS > (pulp_evr_t, pulp_evr_t); + +-- Create operators with proper syntax +CREATE OPERATOR < ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_lt, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_le, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR = ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_eq, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_ne, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_ge, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR > ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_gt, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +-- Create operator class for ordering +DROP OPERATOR CLASS IF EXISTS pulp_evr_ops USING btree; +CREATE OPERATOR CLASS pulp_evr_ops + DEFAULT FOR TYPE pulp_evr_t USING btree AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + FUNCTION 1 pulp_evr_cmp(pulp_evr_t, pulp_evr_t); """ diff --git a/pulp_rpm/app/shared_utils.py b/pulp_rpm/app/shared_utils.py index 9ca531131..34102b51b 100644 --- a/pulp_rpm/app/shared_utils.py +++ b/pulp_rpm/app/shared_utils.py @@ -4,56 +4,37 @@ import typing as t from hashlib import sha256 from pathlib import Path -from collections import defaultdict import createrepo_c as cr from django.conf import settings from django.utils.dateparse import parse_datetime from importlib_resources import files from pulpcore.plugin.exceptions import InvalidSignatureError -from pulp_rpm.app.rpm_version import RpmVersion def annotate_with_age(qs): - """Provide an "age" score for each Package object in the queryset. - - Annotate the Package objects with an "age". Age is calculated by partitioning the - Packages by name and architecture and ordering the packages in each group by 'evr', - which is the relative "age" within the group. The newest package gets age=1, second - newest age=2, and so on. - + """Provide an "age" score for each Package object in the queryset. + + Annotate the Package objects with an "age". Age is calculated with a postgresql + window function which partitions the Packages by name and architecture, orders the + packages in each group by 'evr', and returns the row number of each package, which + is the relative "age" within the group. The newest package gets age=1, second newest + age=2, and so on. +` A second partition by architecture is important because there can be packages with the same name and version numbers but they are not interchangeable because they have differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch. """ - # Get packages in current queryset with their basic info - packages = list(qs.values("pk", "name", "arch", "epoch", "version", "release")) - - # Group packages by name and arch - groups = defaultdict(list) - for pkg in packages: - key = (pkg["name"], pkg["arch"]) - groups[key].append(pkg) - - # Calculate age for each group - age_mapping = {} - for group_packages in groups.values(): - # Sort by EVR (newest first) - group_packages.sort( - key=lambda p: RpmVersion(p["epoch"], p["version"], p["release"]), reverse=True + from django.db.models import Window, F + from django.db.models.functions import RowNumber + + return qs.annotate( + age=Window( + expression=RowNumber(), + partition_by=[F("name"), F("arch")], + order_by=F("evr").desc(), ) - - # Assign ages (1 = newest, 2 = second newest, etc.) - for age, pkg in enumerate(group_packages, 1): - age_mapping[pkg["pk"]] = age - - # Create a queryset with age annotation - # We'll use a CASE statement to map PKs to ages - from django.db.models import Case, When, IntegerField - - when_clauses = [When(pk=pk, then=age) for pk, age in age_mapping.items()] - - return qs.annotate(age=Case(*when_clauses, output_field=IntegerField())) + ) def format_nevra(name=None, epoch=0, version=None, release=None, arch=None): From 4356f480470598382adf76f752ee70fb59968ae0 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Mon, 29 Sep 2025 16:45:36 -0400 Subject: [PATCH 2/5] temp --- pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index 130a9504d..76557557c 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -161,7 +161,7 @@ return 1; elsif second_item.s != '^' then -- first has caret but second continues - return -1; + return 1; else -- both have caret, continue i := i + 1; @@ -173,7 +173,7 @@ return -1; else -- second has caret but first continues - return 1; + return -1; end if; end if; From cfe69700a0ed5f049335b5465f779c8c88b4c7d8 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 1 Oct 2025 09:25:22 -0400 Subject: [PATCH 3/5] temp --- .../migrations/0013_RAW_rpm_evr_extension.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index 76557557c..ade5a92f4 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -154,26 +154,28 @@ continue; end if; - -- Handle caret - if first_item.s = '^' then + -- Handle caret: ^ sorts after regular content, but context matters + if first_item.s = '^' and second_item.s = '^' then + -- both have caret, continue comparing + i := i + 1; + continue; + elsif first_item.s = '^' and second_item.s != '^' then + -- first has caret, second doesn't if second_item.s is null and second_item.n is null then - -- first has caret but second has ended - return 1; - elsif second_item.s != '^' then - -- first has caret but second continues + -- second has ended, first with caret wins return 1; else - -- both have caret, continue - i := i + 1; - continue; + -- second continues with regular content, caret loses + return -1; end if; - elsif second_item.s = '^' then + elsif first_item.s != '^' and second_item.s = '^' then + -- second has caret, first doesn't if first_item.s is null and first_item.n is null then - -- second has caret but first has ended + -- first has ended, second with caret loses return -1; else - -- second has caret but first continues - return -1; + -- first continues with regular content, caret loses + return 1; end if; end if; From 6991b092d33c3b12c5fcf88bdf6de0f32a6feb38 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 1 Oct 2025 09:48:15 -0400 Subject: [PATCH 4/5] temp --- .../app/migrations/0013_RAW_rpm_evr_extension.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index ade5a92f4..be523d27e 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -154,18 +154,14 @@ continue; end if; - -- Handle caret: ^ sorts after regular content, but context matters - if first_item.s = '^' and second_item.s = '^' then - -- both have caret, continue comparing - i := i + 1; - continue; - elsif first_item.s = '^' and second_item.s != '^' then + -- Handle caret: ^ behavior depends on whether the other version continues + if first_item.s = '^' and second_item.s != '^' then -- first has caret, second doesn't if second_item.s is null and second_item.n is null then -- second has ended, first with caret wins return 1; else - -- second continues with regular content, caret loses + -- second continues with regular content, first with caret loses return -1; end if; elsif first_item.s != '^' and second_item.s = '^' then @@ -174,9 +170,13 @@ -- first has ended, second with caret loses return -1; else - -- first continues with regular content, caret loses + -- first continues with regular content, second with caret loses return 1; end if; + elsif first_item.s = '^' and second_item.s = '^' then + -- both have caret, continue comparing + i := i + 1; + continue; end if; -- Both items are null (end of both arrays) From f494ca790e0b8abc38f5d47fa9a6e3423c9cc3e6 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 1 Oct 2025 10:32:12 -0400 Subject: [PATCH 5/5] temp --- .../migrations/0013_RAW_rpm_evr_extension.py | 241 ++++++++++++------ pulp_rpm/app/shared_utils.py | 2 +- 2 files changed, 165 insertions(+), 78 deletions(-) diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index be523d27e..5192bb651 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -117,108 +117,195 @@ END ; $$ language 'plpgsql'; --- Version comparison function that matches Python Vercmp.compare logic +-- Version comparison function that matches C rpmvercmp logic exactly CREATE OR REPLACE FUNCTION pulp_vercmp(first_array pulp_evr_array_item[], second_array pulp_evr_array_item[]) RETURNS INTEGER AS $$ declare - i INTEGER := 1; - max_len INTEGER := greatest(array_length(first_array, 1), array_length(second_array, 1)); - first_item pulp_evr_array_item; - second_item pulp_evr_array_item; + first_str TEXT := ''; + second_str TEXT := ''; + i INTEGER; BEGIN - if first_array = second_array then + -- Convert arrays back to strings for C-style processing + -- This is necessary because the C algorithm works on strings, not pre-parsed arrays + + -- Reconstruct first string + for i in 1..coalesce(array_length(first_array, 1), 0) loop + if first_array[i].n IS NOT NULL then + first_str := first_str || first_array[i].n::text; + elsif first_array[i].s IS NOT NULL then + first_str := first_str || first_array[i].s; + end if; + end loop; + + -- Reconstruct second string + for i in 1..coalesce(array_length(second_array, 1), 0) loop + if second_array[i].n IS NOT NULL then + second_str := second_str || second_array[i].n::text; + elsif second_array[i].s IS NOT NULL then + second_str := second_str || second_array[i].s; + end if; + end loop; + + -- Now call the C-style comparison + return pulp_vercmp_string(first_str, second_str); + END; +$$ LANGUAGE 'plpgsql'; + +-- Direct port of C rpmvercmp logic +CREATE OR REPLACE FUNCTION pulp_vercmp_string(str1 TEXT, str2 TEXT) + RETURNS INTEGER AS $$ + declare + one_pos INTEGER := 1; + two_pos INTEGER := 1; + str1_len INTEGER := length(str1); + str2_len INTEGER := length(str2); + one_char TEXT; + two_char TEXT; + seg1_start INTEGER; + seg2_start INTEGER; + seg1_end INTEGER; + seg2_end INTEGER; + seg1 TEXT; + seg2 TEXT; + isnum BOOLEAN; + one_len INTEGER; + two_len INTEGER; + cmp_result INTEGER; + BEGIN + -- Easy comparison to see if versions are identical + if str1 = str2 then return 0; end if; - while i <= coalesce(max_len, 0) loop - -- Get current segments or null if we've run out - if i <= array_length(first_array, 1) then - first_item := first_array[i]; - else - first_item := (NULL, NULL)::pulp_evr_array_item; - end if; + -- Loop through each version segment and compare them + while one_pos <= str1_len OR two_pos <= str2_len loop + -- Skip non-alphanumeric characters except ~ and ^ + while one_pos <= str1_len loop + one_char := substring(str1, one_pos, 1); + if (one_char >= 'a' and one_char <= 'z') or + (one_char >= 'A' and one_char <= 'Z') or + (one_char >= '0' and one_char <= '9') or + one_char = '~' or one_char = '^' then + exit; + end if; + one_pos := one_pos + 1; + end loop; - if i <= array_length(second_array, 1) then - second_item := second_array[i]; - else - second_item := (NULL, NULL)::pulp_evr_array_item; - end if; + while two_pos <= str2_len loop + two_char := substring(str2, two_pos, 1); + if (two_char >= 'a' and two_char <= 'z') or + (two_char >= 'A' and two_char <= 'Z') or + (two_char >= '0' and two_char <= '9') or + two_char = '~' or two_char = '^' then + exit; + end if; + two_pos := two_pos + 1; + end loop; - -- Handle tilde: ~ sorts before everything else - if first_item.s = '~' and (second_item.s != '~' OR second_item.s IS NULL) then - return -1; - elsif (first_item.s != '~' OR first_item.s IS NULL) and second_item.s = '~' then - return 1; - elsif first_item.s = '~' and second_item.s = '~' then - i := i + 1; + -- Get current characters (or empty if past end) + one_char := case when one_pos <= str1_len then substring(str1, one_pos, 1) else '' end; + two_char := case when two_pos <= str2_len then substring(str2, two_pos, 1) else '' end; + + -- Handle tilde separator - sorts before everything else + if one_char = '~' or two_char = '~' then + if one_char != '~' then return 1; end if; + if two_char != '~' then return -1; end if; + one_pos := one_pos + 1; + two_pos := two_pos + 1; continue; end if; - -- Handle caret: ^ behavior depends on whether the other version continues - if first_item.s = '^' and second_item.s != '^' then - -- first has caret, second doesn't - if second_item.s is null and second_item.n is null then - -- second has ended, first with caret wins - return 1; - else - -- second continues with regular content, first with caret loses - return -1; - end if; - elsif first_item.s != '^' and second_item.s = '^' then - -- second has caret, first doesn't - if first_item.s is null and first_item.n is null then - -- first has ended, second with caret loses - return -1; - else - -- first continues with regular content, second with caret loses - return 1; - end if; - elsif first_item.s = '^' and second_item.s = '^' then - -- both have caret, continue comparing - i := i + 1; + -- Handle caret separator - context dependent like C code + if one_char = '^' or two_char = '^' then + if one_pos > str1_len then return -1; end if; -- !*one + if two_pos > str2_len then return 1; end if; -- !*two + if one_char != '^' then return 1; end if; + if two_char != '^' then return -1; end if; + one_pos := one_pos + 1; + two_pos := two_pos + 1; continue; end if; - -- Both items are null (end of both arrays) - if (first_item.s is null and first_item.n is null) and - (second_item.s is null and second_item.n is null) then - return 0; + -- If we ran to the end of either, we are finished with the loop + if not (one_pos <= str1_len and two_pos <= str2_len) then + exit; end if; - -- One array ended but the other continues - if (first_item.s is null and first_item.n is null) then - return -1; - elsif (second_item.s is null and second_item.n is null) then - return 1; + -- Grab first completely alpha or completely numeric segment + seg1_start := one_pos; + seg2_start := two_pos; + + if one_char >= '0' and one_char <= '9' then + -- Numeric segment + while one_pos <= str1_len and substring(str1, one_pos, 1) >= '0' and substring(str1, one_pos, 1) <= '9' loop + one_pos := one_pos + 1; + end loop; + while two_pos <= str2_len and substring(str2, two_pos, 1) >= '0' and substring(str2, two_pos, 1) <= '9' loop + two_pos := two_pos + 1; + end loop; + isnum := true; + else + -- Alpha segment + while one_pos <= str1_len loop + one_char := substring(str1, one_pos, 1); + if not ((one_char >= 'a' and one_char <= 'z') or (one_char >= 'A' and one_char <= 'Z')) then + exit; + end if; + one_pos := one_pos + 1; + end loop; + while two_pos <= str2_len loop + two_char := substring(str2, two_pos, 1); + if not ((two_char >= 'a' and two_char <= 'z') or (two_char >= 'A' and two_char <= 'Z')) then + exit; + end if; + two_pos := two_pos + 1; + end loop; + isnum := false; end if; - -- Compare numeric vs alphabetic (numeric wins) - if first_item.n is not null and second_item.s is not null then - return 1; - elsif first_item.s is not null and second_item.n is not null then - return -1; + -- Extract the segments + seg1 := substring(str1, seg1_start, one_pos - seg1_start); + seg2 := substring(str2, seg2_start, two_pos - seg2_start); + + -- Handle empty segments (matching C logic exactly) + if seg1_start = one_pos then return -1; end if; -- arbitrary, matches C + + -- Take care of different types: numeric vs alpha + if seg2_start = two_pos then + return case when isnum then 1 else -1 end; end if; - -- Both numeric - if first_item.n is not null and second_item.n is not null then - if first_item.n < second_item.n then - return -1; - elsif first_item.n > second_item.n then - return 1; - end if; - -- Both alphabetic - elsif first_item.s is not null and second_item.s is not null then - if first_item.s < second_item.s then - return -1; - elsif first_item.s > second_item.s then - return 1; - end if; + if isnum then + -- Numeric comparison + -- Throw away leading zeros + seg1 := ltrim(seg1, '0'); + seg2 := ltrim(seg2, '0'); + + -- Whichever number has more digits wins + one_len := length(seg1); + two_len := length(seg2); + if one_len > two_len then return 1; end if; + if two_len > one_len then return -1; end if; + end if; + + -- String comparison (works for both alpha and same-length numeric) + if seg1 < seg2 then + return -1; + elsif seg1 > seg2 then + return 1; end if; + -- Equal segments, continue to next - i := i + 1; end loop; - return 0; + -- Handle end conditions + if one_pos > str1_len and two_pos > str2_len then + return 0; -- both ended + elsif one_pos > str1_len then + return -1; -- first ended, second continues + else + return 1; -- second ended, first continues + end if; END; $$ LANGUAGE 'plpgsql'; diff --git a/pulp_rpm/app/shared_utils.py b/pulp_rpm/app/shared_utils.py index 34102b51b..da676decf 100644 --- a/pulp_rpm/app/shared_utils.py +++ b/pulp_rpm/app/shared_utils.py @@ -13,7 +13,7 @@ def annotate_with_age(qs): - """Provide an "age" score for each Package object in the queryset. + """Provide an "age" score for each Package object in the queryset. Annotate the Package objects with an "age". Age is calculated with a postgresql window function which partitions the Packages by name and architecture, orders the