Add forEach extensions to kotlin-codepoints library

Oleg · Oleg · commit d483391902eb · 2024-06-24T10:47:26.000+04:00
diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt
@@ -4,9 +4,10 @@
 )
 package de.cketti.codepoints.deluxe
 
-import de.cketti.codepoints.CodePoints
 import de.cketti.codepoints.codePointAt as intCodePointAt
 import de.cketti.codepoints.codePointBefore as intCodePointBefore
+import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint
+import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed
 
 /**
  * Returns the Unicode code point at the specified index.
@@ -54,50 +55,18 @@ fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length):
 }
 
 /**
- * Performs given [block] for each [CodePoint] in the [CharSequence]
- * between [startIndex] (inclusive) and [endIndex] (exclusive).
+ * Performs given [action] for each [CodePoint] in the [CharSequence].
  *
  * @see forEachCodePointIndexed
  */
 inline fun CharSequence.forEachCodePoint(
-    startIndex: Int = 0,
-    endIndex: Int = length,
-    block: (codePoint: CodePoint) -> Unit,
-) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) }
+    action: (codePoint: CodePoint) -> Unit,
+) = intForEachCodePoint { action(it.toCodePoint()) }
 
 /**
- * Performs given [block] for each [CodePoint] in the [CharSequence]
- * between [startIndex] (inclusive) and [endIndex] (exclusive).
+ * Performs given [action] for each [CodePoint] in the [CharSequence].
  * Provides the start index for the given codepoint
- *
- * @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`)
- * @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`)
  */
 inline fun CharSequence.forEachCodePointIndexed(
-    startIndex: Int = 0,
-    endIndex: Int = length,
-    block: (index: Int, codePoint: CodePoint) -> Unit,
-) {
-    require(startIndex <= endIndex) {
-        "startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)"
-    }
-    require(endIndex <= length) {
-        "endIndex ($endIndex) must be less than or equal to char sequence's length ($length)"
-    }
-    val str = this
-    var index = startIndex
-    while (index < endIndex) {
-        val codePointStartIndex = index
-        val firstChar = str[index]
-        index++
-        if (firstChar.isHighSurrogate() && index < endIndex) {
-            val nextChar = str[index]
-            if (nextChar.isLowSurrogate()) {
-                block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint())
-                index++
-                continue
-            }
-        }
-        block(codePointStartIndex, firstChar.toCodePoint())
-    }
-}
+    action: (index: Int, codePoint: CodePoint) -> Unit,
+) = intForEachCodePointIndexed { index, codePoint -> action(index, codePoint.toCodePoint()) }
diff --git a/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt
@@ -81,45 +81,6 @@ class CharSequenceExtensionsTest {
         )
     }
 
-    @Test
-    fun forEachCodepoint_with_non_default_indexes() {
-        fun CharSequence.collectCodepoints(
-            startIndex: Int,
-            endIndex: Int,
-        ): List<CodePoint> = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } }
-
-        assertEquals(
-            listOf('a'.toCodePoint()),
-            "ab".collectCodepoints(0, 1),
-        )
-        assertEquals(
-            listOf('b'.toCodePoint()),
-            "ab".collectCodepoints(1, 2),
-        )
-        assertEquals(
-            listOf('a'.toCodePoint()),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
-        )
-        assertEquals(
-            listOf(0xD83E.toCodePoint()),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1),
-        )
-        assertEquals(
-            listOf(0xDD95.toCodePoint()),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2),
-        )
-        assertEquals(
-            listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
-        )
-        assertFailsWith(IllegalArgumentException::class) {
-            "a".forEachCodePoint(startIndex = 1, endIndex = 0) {  }
-        }
-        assertFailsWith(IllegalArgumentException::class) {
-            "a".forEachCodePoint(startIndex = 1, endIndex = 2) {  }
-        }
-    }
-
     @Test
     fun forEachCodepointIndexed() {
         fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
@@ -146,43 +107,4 @@ class CharSequenceExtensionsTest {
             "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
         )
     }
-
-    @Test
-    fun forEachCodepointIndexed_with_non_default_indexes() {
-        fun CharSequence.collectCodepoints(start: Int, end: Int): List<Pair<Int, CodePoint>> =
-            buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } }
-
-        assertEquals(
-            listOf(0 to 'a'.toCodePoint()),
-            "ab".collectCodepoints(0, 1),
-        )
-        assertEquals(
-            listOf(1 to 'b'.toCodePoint()),
-            "ab".collectCodepoints(1, 2),
-        )
-        assertEquals(
-            listOf(1 to 0x1F995.toCodePoint()),
-            "a\uD83E\uDD95".collectCodepoints(1, 3),
-        )
-        assertEquals(
-            listOf(
-                1 to 0xDD95.toCodePoint(),
-                2 to 'a'.toCodePoint(),
-                3 to 0xD83E.toCodePoint(),
-            ),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
-        )
-        assertEquals(
-            listOf(
-                2 to 'a'.toCodePoint(),
-            ),
-            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
-        )
-        assertFailsWith(IllegalArgumentException::class) {
-            "a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ ->  }
-        }
-        assertFailsWith(IllegalArgumentException::class) {
-            "a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ ->  }
-        }
-    }
 }
diff --git a/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt
@@ -53,12 +53,12 @@ fun CharSequence.codePointAt(index: Int): Int {
  *
  * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
  * sequence.
- * 
+ *
  * If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
- * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is 
+ * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
  * returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`.
  *
- * If the value `index - 1` is out of bounds of this character sequence, this method throws an 
+ * If the value `index - 1` is out of bounds of this character sequence, this method throws an
  * [IndexOutOfBoundsException].
  */
 fun CharSequence.codePointBefore(index: Int): Int {
@@ -78,11 +78,11 @@ fun CharSequence.codePointBefore(index: Int): Int {
 
 /**
  * Returns the number of Unicode code points in the specified text range of this `CharSequence`.
- * 
- * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the 
+ *
+ * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
  * length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
  * one code point each.
- * 
+ *
  * If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
  * `endIndex`, this method throws an [IndexOutOfBoundsException].
  */
@@ -108,10 +108,10 @@ fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int {
 }
 
 /**
- * Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points. 
- * 
+ * Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
+ *
  * Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
- * 
+ *
  * If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and
  * the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
  * negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
@@ -153,3 +153,38 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
         return currentIndex + 1
     }
 }
+
+/**
+ * Performs given [action] for each codepoint in the [CharSequence]s.
+ *
+ * @see forEachCodePointIndexed
+ */
+inline fun CharSequence.forEachCodePoint(
+    action: (codePoint: Int) -> Unit,
+) = forEachCodePointIndexed { _, codePoint -> action(codePoint) }
+
+/**
+ * Performs given [action] for each codepoint in the [CharSequence].
+ * Provides the start index for the given codepoint
+ */
+inline fun CharSequence.forEachCodePointIndexed(
+    action: (index: Int, codePoint: Int) -> Unit,
+) {
+    val str = this
+    var index = 0
+    val endIndex = length
+    while (index < endIndex) {
+        val codePointStartIndex = index
+        val firstChar = str[index]
+        index++
+        if (firstChar.isHighSurrogate() && index < endIndex) {
+            val nextChar = str[index]
+            if (nextChar.isLowSurrogate()) {
+                action(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar))
+                index++
+                continue
+            }
+        }
+        action(codePointStartIndex, firstChar.code)
+    }
+}
diff --git a/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt b/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt
@@ -154,4 +154,53 @@ class CharSequenceExtensionsTest {
             "\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2)
         }
     }
+
+    @Test
+    fun forEachCodepoint() {
+        fun CharSequence.collectCodepoints(): List<Int> = buildList { forEachCodePoint { add(it) } }
+
+        assertEquals(
+            emptyList(),
+            "".collectCodepoints(),
+        )
+        assertEquals(
+            listOf('a'.code),
+            "a".collectCodepoints(),
+        )
+        assertEquals(
+            listOf('a'.code, 0xFFFF),
+            "a\uFFFF".collectCodepoints(),
+        )
+        assertEquals(
+            listOf(0x1F995, 'a'.code, 0x1F996),
+            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
+        )
+    }
+
+    @Test
+    fun forEachCodepointIndexed() {
+        fun CharSequence.collectCodepoints(): List<Pair<Int, Int>> =
+            buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }
+
+        assertEquals(
+            emptyList(),
+            "".collectCodepoints(),
+        )
+        assertEquals(
+            listOf(0 to 'a'.code),
+            "a".collectCodepoints(),
+        )
+        assertEquals(
+            listOf(0 to 'a'.code, 1 to 0x1F995),
+            "a\uD83E\uDD95".collectCodepoints(),
+        )
+        assertEquals(
+            listOf(
+                0 to 0x1F995,
+                2 to 'a'.code,
+                3 to 0x1F996,
+            ),
+            "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
+        )
+    }
 }