From 7fb58673acb777f1e71a78a6f087641ff237b53a Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 18 Jun 2026 17:42:28 +0530 Subject: [PATCH] fix: correct NOT STARTS WITH projection for truncated partitions --- pyiceberg/transforms.py | 11 ++++++++++- tests/test_transforms.py | 17 +++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 739e18a6e6..1b603b6bb8 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -813,7 +813,16 @@ def project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None: return _truncate_number(name, pred, self.transform(field_type)) elif isinstance(field_type, (BinaryType, StringType)): if isinstance(pred, BoundLiteralPredicate): - return _truncate_array(name, pred, self.transform(field_type)) + if isinstance(pred, BoundNotStartsWith): + literal_width = len(pred.literal.value) + if literal_width < self.width: + return pred.as_unbound(name, pred.literal.value) + elif literal_width == self.width: + return NotEqualTo(name, pred.literal.value) + else: + return None + else: + return _truncate_array(name, pred, self.transform(field_type)) def strict_project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None: field_type = pred.term.ref().field.field_type diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 96500907cf..7f01f115a7 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1026,9 +1026,22 @@ def test_projection_truncate_string_starts_with(bound_reference_str: BoundRefere def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference) -> None: + # literal_width (5) > truncate width (2): no inclusive projection possible (unsafe) + assert TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None + + +def test_projection_truncate_string_not_starts_with_shorter_literal(bound_reference_str: BoundReference) -> None: + # literal_width (2) == truncate width (2): project to != + assert TruncateTransform(2).project( + "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("he")) + ) == NotEqualTo(term="name", literal=literal("he")) + + +def test_projection_truncate_string_not_starts_with_original_literal(bound_reference_str: BoundReference) -> None: + # literal_width (1) < truncate width (2): keep original literal assert TruncateTransform(2).project( - "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello")) - ) == NotStartsWith(term="name", literal=literal("he")) + "name", BoundNotStartsWith(term=bound_reference_str, literal=literal("h")) + ) == NotStartsWith(term="name", literal=literal("h")) def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None: