diff --git a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py index 3801dba14..5192bb651 100644 --- a/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py +++ b/pulp_rpm/app/migrations/0013_RAW_rpm_evr_extension.py @@ -1,4 +1,3 @@ - # This Migration was _not_ automatically generated. # When regenerating the migrations ever, this one _must_ be preserved. @@ -20,103 +19,443 @@ CREATE OR REPLACE FUNCTION pulp_evr_trigger() RETURNS trigger AS $$ BEGIN NEW.evr = (select ROW(coalesce(NEW.epoch::numeric,0), - pulp_rpmver_array(coalesce(NEW.version,'pulp_isempty'))::pulp_evr_array_item[], - pulp_rpmver_array(coalesce(NEW.release,'pulp_isempty'))::pulp_evr_array_item[])::pulp_evr_t); + pulp_rpmver_array(coalesce(NEW.version,''))::pulp_evr_array_item[], + pulp_rpmver_array(coalesce(NEW.release,''))::pulp_evr_array_item[])::pulp_evr_t); RETURN NEW; END; $$ language 'plpgsql'; -create or replace FUNCTION pulp_isempty(t TEXT) - RETURNS BOOLEAN as $$ - BEGIN - return t ~ '^[[:space:]]*$'; - END; -$$ language 'plpgsql'; - -create or replace FUNCTION pulp_isalphanum(ch CHAR) - RETURNS BOOLEAN as $$ - BEGIN - if ascii(ch) between ascii('a') and ascii('z') or - ascii(ch) between ascii('A') and ascii('Z') or - ascii(ch) between ascii('0') and ascii('9') - then - return TRUE; - end if; - return FALSE; - END; -$$ language 'plpgsql'; - -create or replace function pulp_isdigit(ch CHAR) - RETURNS BOOLEAN as $$ - BEGIN - if ascii(ch) between ascii('0') and ascii('9') - then - return TRUE; - end if; - return FALSE; - END ; -$$ language 'plpgsql'; - create or replace FUNCTION pulp_rpmver_array (string1 IN VARCHAR) RETURNS pulp_evr_array_item[] as $$ declare - str1 VARCHAR := string1; - digits VARCHAR(10) := '0123456789'; - lc_alpha VARCHAR(27) := 'abcdefghijklmnopqrstuvwxyz'; - uc_alpha VARCHAR(27) := 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - alpha VARCHAR(54) := lc_alpha || uc_alpha; - one VARCHAR; - isnum BOOLEAN; + input_bytes BYTEA := convert_to(string1, 'ASCII'); + one BYTEA := input_bytes; ver_array pulp_evr_array_item[] := ARRAY[]::pulp_evr_array_item[]; + m1_head BYTEA; + segm1 TEXT; + segm1_n NUMERIC := 0; + isnum BOOLEAN; + pos INTEGER; BEGIN - if str1 is NULL - then + if string1 is NULL then RAISE EXCEPTION 'VALUE_ERROR.'; end if; - one := str1; - <> - while one <> '' - loop - declare - segm1 VARCHAR; - segm1_n NUMERIC := 0; - begin - -- Throw out all non-alphanum characters - while one <> '' and not pulp_isalphanum(one) - loop - one := substr(one, 2); + -- Convert to bytes for proper ASCII handling like Python + while length(one) > 0 loop + -- Skip non-alphanumeric characters except ~ and ^ + pos := 1; + while pos <= length(one) and + not (get_byte(one, pos-1) between 48 and 57 or -- 0-9 + get_byte(one, pos-1) between 65 and 90 or -- A-Z + get_byte(one, pos-1) between 97 and 122 or -- a-z + get_byte(one, pos-1) = 126 or -- ~ + get_byte(one, pos-1) = 94) -- ^ + loop + pos := pos + 1; + end loop; + + if pos > 1 then + one := substring(one from pos); + end if; + + if length(one) = 0 then + exit; + end if; + + -- Handle tilde - it sorts before everything else + if get_byte(one, 0) = 126 then -- ~ + ver_array := array_append(ver_array, (NULL, '~')::pulp_evr_array_item); + one := substring(one from 2); + continue; + end if; + + -- Handle caret - it sorts after everything else + if get_byte(one, 0) = 94 then -- ^ + ver_array := array_append(ver_array, (NULL, '^')::pulp_evr_array_item); + one := substring(one from 2); + continue; + end if; + + -- Extract numeric or alphabetic segment + if get_byte(one, 0) between 48 and 57 then -- digit + pos := 1; + while pos < length(one) and get_byte(one, pos) between 48 and 57 loop + pos := pos + 1; end loop; - str1 := one; - if str1 <> '' and pulp_isdigit(str1) - then - str1 := ltrim(str1, digits); - isnum := true; - else - str1 := ltrim(str1, alpha); - isnum := false; - end if; - if str1 <> '' - then segm1 := substr(one, 1, length(one) - length(str1)); - else segm1 := one; - end if; + m1_head := substring(one from 1 for pos); + isnum := true; + else -- alphabetic + pos := 1; + while pos < length(one) and + (get_byte(one, pos) between 65 and 90 or get_byte(one, pos) between 97 and 122) loop + pos := pos + 1; + end loop; + m1_head := substring(one from 1 for pos); + isnum := false; + end if; - if segm1 = '' then return ver_array; end if; /* arbitrary */ - if isnum - then - segm1 := ltrim(segm1, '0'); - if segm1 <> '' then segm1_n := segm1::numeric; end if; - segm1 := NULL; + segm1 := convert_from(m1_head, 'ASCII'); + + if isnum then + -- Remove leading zeros for numeric comparison + segm1 := ltrim(segm1, '0'); + if segm1 = '' then + segm1_n := 0; else + segm1_n := segm1::numeric; end if; - ver_array := array_append(ver_array, (segm1_n, segm1)::pulp_evr_array_item); - one := str1; - end; - end loop segment_loop; + ver_array := array_append(ver_array, (segm1_n, NULL)::pulp_evr_array_item); + else + ver_array := array_append(ver_array, (NULL, segm1)::pulp_evr_array_item); + end if; + + one := substring(one from pos + 1); + end loop; return ver_array; END ; $$ language 'plpgsql'; + +-- Version comparison function that matches C rpmvercmp logic exactly +CREATE OR REPLACE FUNCTION pulp_vercmp(first_array pulp_evr_array_item[], second_array pulp_evr_array_item[]) + RETURNS INTEGER AS $$ + declare + first_str TEXT := ''; + second_str TEXT := ''; + i INTEGER; + BEGIN + -- Convert arrays back to strings for C-style processing + -- This is necessary because the C algorithm works on strings, not pre-parsed arrays + + -- Reconstruct first string + for i in 1..coalesce(array_length(first_array, 1), 0) loop + if first_array[i].n IS NOT NULL then + first_str := first_str || first_array[i].n::text; + elsif first_array[i].s IS NOT NULL then + first_str := first_str || first_array[i].s; + end if; + end loop; + + -- Reconstruct second string + for i in 1..coalesce(array_length(second_array, 1), 0) loop + if second_array[i].n IS NOT NULL then + second_str := second_str || second_array[i].n::text; + elsif second_array[i].s IS NOT NULL then + second_str := second_str || second_array[i].s; + end if; + end loop; + + -- Now call the C-style comparison + return pulp_vercmp_string(first_str, second_str); + END; +$$ LANGUAGE 'plpgsql'; + +-- Direct port of C rpmvercmp logic +CREATE OR REPLACE FUNCTION pulp_vercmp_string(str1 TEXT, str2 TEXT) + RETURNS INTEGER AS $$ + declare + one_pos INTEGER := 1; + two_pos INTEGER := 1; + str1_len INTEGER := length(str1); + str2_len INTEGER := length(str2); + one_char TEXT; + two_char TEXT; + seg1_start INTEGER; + seg2_start INTEGER; + seg1_end INTEGER; + seg2_end INTEGER; + seg1 TEXT; + seg2 TEXT; + isnum BOOLEAN; + one_len INTEGER; + two_len INTEGER; + cmp_result INTEGER; + BEGIN + -- Easy comparison to see if versions are identical + if str1 = str2 then + return 0; + end if; + + -- Loop through each version segment and compare them + while one_pos <= str1_len OR two_pos <= str2_len loop + -- Skip non-alphanumeric characters except ~ and ^ + while one_pos <= str1_len loop + one_char := substring(str1, one_pos, 1); + if (one_char >= 'a' and one_char <= 'z') or + (one_char >= 'A' and one_char <= 'Z') or + (one_char >= '0' and one_char <= '9') or + one_char = '~' or one_char = '^' then + exit; + end if; + one_pos := one_pos + 1; + end loop; + + while two_pos <= str2_len loop + two_char := substring(str2, two_pos, 1); + if (two_char >= 'a' and two_char <= 'z') or + (two_char >= 'A' and two_char <= 'Z') or + (two_char >= '0' and two_char <= '9') or + two_char = '~' or two_char = '^' then + exit; + end if; + two_pos := two_pos + 1; + end loop; + + -- Get current characters (or empty if past end) + one_char := case when one_pos <= str1_len then substring(str1, one_pos, 1) else '' end; + two_char := case when two_pos <= str2_len then substring(str2, two_pos, 1) else '' end; + + -- Handle tilde separator - sorts before everything else + if one_char = '~' or two_char = '~' then + if one_char != '~' then return 1; end if; + if two_char != '~' then return -1; end if; + one_pos := one_pos + 1; + two_pos := two_pos + 1; + continue; + end if; + + -- Handle caret separator - context dependent like C code + if one_char = '^' or two_char = '^' then + if one_pos > str1_len then return -1; end if; -- !*one + if two_pos > str2_len then return 1; end if; -- !*two + if one_char != '^' then return 1; end if; + if two_char != '^' then return -1; end if; + one_pos := one_pos + 1; + two_pos := two_pos + 1; + continue; + end if; + + -- If we ran to the end of either, we are finished with the loop + if not (one_pos <= str1_len and two_pos <= str2_len) then + exit; + end if; + + -- Grab first completely alpha or completely numeric segment + seg1_start := one_pos; + seg2_start := two_pos; + + if one_char >= '0' and one_char <= '9' then + -- Numeric segment + while one_pos <= str1_len and substring(str1, one_pos, 1) >= '0' and substring(str1, one_pos, 1) <= '9' loop + one_pos := one_pos + 1; + end loop; + while two_pos <= str2_len and substring(str2, two_pos, 1) >= '0' and substring(str2, two_pos, 1) <= '9' loop + two_pos := two_pos + 1; + end loop; + isnum := true; + else + -- Alpha segment + while one_pos <= str1_len loop + one_char := substring(str1, one_pos, 1); + if not ((one_char >= 'a' and one_char <= 'z') or (one_char >= 'A' and one_char <= 'Z')) then + exit; + end if; + one_pos := one_pos + 1; + end loop; + while two_pos <= str2_len loop + two_char := substring(str2, two_pos, 1); + if not ((two_char >= 'a' and two_char <= 'z') or (two_char >= 'A' and two_char <= 'Z')) then + exit; + end if; + two_pos := two_pos + 1; + end loop; + isnum := false; + end if; + + -- Extract the segments + seg1 := substring(str1, seg1_start, one_pos - seg1_start); + seg2 := substring(str2, seg2_start, two_pos - seg2_start); + + -- Handle empty segments (matching C logic exactly) + if seg1_start = one_pos then return -1; end if; -- arbitrary, matches C + + -- Take care of different types: numeric vs alpha + if seg2_start = two_pos then + return case when isnum then 1 else -1 end; + end if; + + if isnum then + -- Numeric comparison + -- Throw away leading zeros + seg1 := ltrim(seg1, '0'); + seg2 := ltrim(seg2, '0'); + + -- Whichever number has more digits wins + one_len := length(seg1); + two_len := length(seg2); + if one_len > two_len then return 1; end if; + if two_len > one_len then return -1; end if; + end if; + + -- String comparison (works for both alpha and same-length numeric) + if seg1 < seg2 then + return -1; + elsif seg1 > seg2 then + return 1; + end if; + -- Equal segments, continue to next + + end loop; + + -- Handle end conditions + if one_pos > str1_len and two_pos > str2_len then + return 0; -- both ended + elsif one_pos > str1_len then + return -1; -- first ended, second continues + else + return 1; -- second ended, first continues + end if; + END; +$$ LANGUAGE 'plpgsql'; + +-- Add comparison operators for pulp_evr_t type +CREATE OR REPLACE FUNCTION pulp_evr_cmp(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS INTEGER AS $$ + declare + epoch_cmp INTEGER; + version_cmp INTEGER; + BEGIN + -- Compare epochs first + epoch_cmp := first_evr.epoch - second_evr.epoch; + if epoch_cmp != 0 then + return epoch_cmp; + end if; + + -- Compare versions + version_cmp := pulp_vercmp(first_evr.version, second_evr.version); + if version_cmp != 0 then + return version_cmp; + end if; + + -- Compare releases + return pulp_vercmp(first_evr.release, second_evr.release); + END; +$$ LANGUAGE 'plpgsql'; + +-- Create comparison operators +CREATE OR REPLACE FUNCTION pulp_evr_lt(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) < 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_le(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) <= 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_eq(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) = 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_ge(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) >= 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_gt(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) > 0; + END; +$$ LANGUAGE 'plpgsql'; + +CREATE OR REPLACE FUNCTION pulp_evr_ne(first_evr pulp_evr_t, second_evr pulp_evr_t) + RETURNS BOOLEAN AS $$ + BEGIN + return pulp_evr_cmp(first_evr, second_evr) != 0; + END; +$$ LANGUAGE 'plpgsql'; + +-- Drop existing operators if they exist +DROP OPERATOR IF EXISTS < (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS <= (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS = (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS <> (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS != (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS >= (pulp_evr_t, pulp_evr_t); +DROP OPERATOR IF EXISTS > (pulp_evr_t, pulp_evr_t); + +-- Create operators with proper syntax +CREATE OPERATOR < ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_lt, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_le, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR = ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_eq, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_ne, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_ge, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR > ( + LEFTARG = pulp_evr_t, + RIGHTARG = pulp_evr_t, + FUNCTION = pulp_evr_gt, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +-- Create operator class for ordering +DROP OPERATOR CLASS IF EXISTS pulp_evr_ops USING btree; +CREATE OPERATOR CLASS pulp_evr_ops + DEFAULT FOR TYPE pulp_evr_t USING btree AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + FUNCTION 1 pulp_evr_cmp(pulp_evr_t, pulp_evr_t); """ diff --git a/pulp_rpm/app/shared_utils.py b/pulp_rpm/app/shared_utils.py index 9ca531131..da676decf 100644 --- a/pulp_rpm/app/shared_utils.py +++ b/pulp_rpm/app/shared_utils.py @@ -4,56 +4,37 @@ import typing as t from hashlib import sha256 from pathlib import Path -from collections import defaultdict import createrepo_c as cr from django.conf import settings from django.utils.dateparse import parse_datetime from importlib_resources import files from pulpcore.plugin.exceptions import InvalidSignatureError -from pulp_rpm.app.rpm_version import RpmVersion def annotate_with_age(qs): """Provide an "age" score for each Package object in the queryset. - Annotate the Package objects with an "age". Age is calculated by partitioning the - Packages by name and architecture and ordering the packages in each group by 'evr', - which is the relative "age" within the group. The newest package gets age=1, second - newest age=2, and so on. - + Annotate the Package objects with an "age". Age is calculated with a postgresql + window function which partitions the Packages by name and architecture, orders the + packages in each group by 'evr', and returns the row number of each package, which + is the relative "age" within the group. The newest package gets age=1, second newest + age=2, and so on. +` A second partition by architecture is important because there can be packages with the same name and version numbers but they are not interchangeable because they have differing arch, such as 'x86_64' and 'i686', or 'src' (SRPM) and any other arch. """ - # Get packages in current queryset with their basic info - packages = list(qs.values("pk", "name", "arch", "epoch", "version", "release")) - - # Group packages by name and arch - groups = defaultdict(list) - for pkg in packages: - key = (pkg["name"], pkg["arch"]) - groups[key].append(pkg) - - # Calculate age for each group - age_mapping = {} - for group_packages in groups.values(): - # Sort by EVR (newest first) - group_packages.sort( - key=lambda p: RpmVersion(p["epoch"], p["version"], p["release"]), reverse=True + from django.db.models import Window, F + from django.db.models.functions import RowNumber + + return qs.annotate( + age=Window( + expression=RowNumber(), + partition_by=[F("name"), F("arch")], + order_by=F("evr").desc(), ) - - # Assign ages (1 = newest, 2 = second newest, etc.) - for age, pkg in enumerate(group_packages, 1): - age_mapping[pkg["pk"]] = age - - # Create a queryset with age annotation - # We'll use a CASE statement to map PKs to ages - from django.db.models import Case, When, IntegerField - - when_clauses = [When(pk=pk, then=age) for pk, age in age_mapping.items()] - - return qs.annotate(age=Case(*when_clauses, output_field=IntegerField())) + ) def format_nevra(name=None, epoch=0, version=None, release=None, arch=None):