Skip to content

Commit d483391

Browse files
author
Oleg
committed
Add forEach extensions to kotlin-codepoints library
1 parent 03f0ae8 commit d483391

File tree

4 files changed

+101
-126
lines changed

4 files changed

+101
-126
lines changed

kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt

+8-39
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
)
55
package de.cketti.codepoints.deluxe
66

7-
import de.cketti.codepoints.CodePoints
87
import de.cketti.codepoints.codePointAt as intCodePointAt
98
import de.cketti.codepoints.codePointBefore as intCodePointBefore
9+
import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint
10+
import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed
1011

1112
/**
1213
* Returns the Unicode code point at the specified index.
@@ -54,50 +55,18 @@ fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length):
5455
}
5556

5657
/**
57-
* Performs given [block] for each [CodePoint] in the [CharSequence]
58-
* between [startIndex] (inclusive) and [endIndex] (exclusive).
58+
* Performs given [action] for each [CodePoint] in the [CharSequence].
5959
*
6060
* @see forEachCodePointIndexed
6161
*/
6262
inline fun CharSequence.forEachCodePoint(
63-
startIndex: Int = 0,
64-
endIndex: Int = length,
65-
block: (codePoint: CodePoint) -> Unit,
66-
) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) }
63+
action: (codePoint: CodePoint) -> Unit,
64+
) = intForEachCodePoint { action(it.toCodePoint()) }
6765

6866
/**
69-
* Performs given [block] for each [CodePoint] in the [CharSequence]
70-
* between [startIndex] (inclusive) and [endIndex] (exclusive).
67+
* Performs given [action] for each [CodePoint] in the [CharSequence].
7168
* Provides the start index for the given codepoint
72-
*
73-
* @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`)
74-
* @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`)
7569
*/
7670
inline fun CharSequence.forEachCodePointIndexed(
77-
startIndex: Int = 0,
78-
endIndex: Int = length,
79-
block: (index: Int, codePoint: CodePoint) -> Unit,
80-
) {
81-
require(startIndex <= endIndex) {
82-
"startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)"
83-
}
84-
require(endIndex <= length) {
85-
"endIndex ($endIndex) must be less than or equal to char sequence's length ($length)"
86-
}
87-
val str = this
88-
var index = startIndex
89-
while (index < endIndex) {
90-
val codePointStartIndex = index
91-
val firstChar = str[index]
92-
index++
93-
if (firstChar.isHighSurrogate() && index < endIndex) {
94-
val nextChar = str[index]
95-
if (nextChar.isLowSurrogate()) {
96-
block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint())
97-
index++
98-
continue
99-
}
100-
}
101-
block(codePointStartIndex, firstChar.toCodePoint())
102-
}
103-
}
71+
action: (index: Int, codePoint: CodePoint) -> Unit,
72+
) = intForEachCodePointIndexed { index, codePoint -> action(index, codePoint.toCodePoint()) }

kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

-78
Original file line numberDiff line numberDiff line change
@@ -81,45 +81,6 @@ class CharSequenceExtensionsTest {
8181
)
8282
}
8383

84-
@Test
85-
fun forEachCodepoint_with_non_default_indexes() {
86-
fun CharSequence.collectCodepoints(
87-
startIndex: Int,
88-
endIndex: Int,
89-
): List<CodePoint> = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } }
90-
91-
assertEquals(
92-
listOf('a'.toCodePoint()),
93-
"ab".collectCodepoints(0, 1),
94-
)
95-
assertEquals(
96-
listOf('b'.toCodePoint()),
97-
"ab".collectCodepoints(1, 2),
98-
)
99-
assertEquals(
100-
listOf('a'.toCodePoint()),
101-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
102-
)
103-
assertEquals(
104-
listOf(0xD83E.toCodePoint()),
105-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1),
106-
)
107-
assertEquals(
108-
listOf(0xDD95.toCodePoint()),
109-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2),
110-
)
111-
assertEquals(
112-
listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()),
113-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
114-
)
115-
assertFailsWith(IllegalArgumentException::class) {
116-
"a".forEachCodePoint(startIndex = 1, endIndex = 0) { }
117-
}
118-
assertFailsWith(IllegalArgumentException::class) {
119-
"a".forEachCodePoint(startIndex = 1, endIndex = 2) { }
120-
}
121-
}
122-
12384
@Test
12485
fun forEachCodepointIndexed() {
12586
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
@@ -146,43 +107,4 @@ class CharSequenceExtensionsTest {
146107
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
147108
)
148109
}
149-
150-
@Test
151-
fun forEachCodepointIndexed_with_non_default_indexes() {
152-
fun CharSequence.collectCodepoints(start: Int, end: Int): List<Pair<Int, CodePoint>> =
153-
buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } }
154-
155-
assertEquals(
156-
listOf(0 to 'a'.toCodePoint()),
157-
"ab".collectCodepoints(0, 1),
158-
)
159-
assertEquals(
160-
listOf(1 to 'b'.toCodePoint()),
161-
"ab".collectCodepoints(1, 2),
162-
)
163-
assertEquals(
164-
listOf(1 to 0x1F995.toCodePoint()),
165-
"a\uD83E\uDD95".collectCodepoints(1, 3),
166-
)
167-
assertEquals(
168-
listOf(
169-
1 to 0xDD95.toCodePoint(),
170-
2 to 'a'.toCodePoint(),
171-
3 to 0xD83E.toCodePoint(),
172-
),
173-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
174-
)
175-
assertEquals(
176-
listOf(
177-
2 to 'a'.toCodePoint(),
178-
),
179-
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
180-
)
181-
assertFailsWith(IllegalArgumentException::class) {
182-
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ -> }
183-
}
184-
assertFailsWith(IllegalArgumentException::class) {
185-
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ -> }
186-
}
187-
}
188110
}

kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt

+44-9
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ fun CharSequence.codePointAt(index: Int): Int {
5353
*
5454
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
5555
* sequence.
56-
*
56+
*
5757
* If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
58-
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
58+
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
5959
* returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`.
6060
*
61-
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
61+
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
6262
* [IndexOutOfBoundsException].
6363
*/
6464
fun CharSequence.codePointBefore(index: Int): Int {
@@ -78,11 +78,11 @@ fun CharSequence.codePointBefore(index: Int): Int {
7878

7979
/**
8080
* Returns the number of Unicode code points in the specified text range of this `CharSequence`.
81-
*
82-
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
81+
*
82+
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
8383
* length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
8484
* one code point each.
85-
*
85+
*
8686
* If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
8787
* `endIndex`, this method throws an [IndexOutOfBoundsException].
8888
*/
@@ -108,10 +108,10 @@ fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int {
108108
}
109109

110110
/**
111-
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
112-
*
111+
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
112+
*
113113
* Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
114-
*
114+
*
115115
* If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and
116116
* the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
117117
* negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
@@ -153,3 +153,38 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
153153
return currentIndex + 1
154154
}
155155
}
156+
157+
/**
158+
* Performs given [action] for each codepoint in the [CharSequence]s.
159+
*
160+
* @see forEachCodePointIndexed
161+
*/
162+
inline fun CharSequence.forEachCodePoint(
163+
action: (codePoint: Int) -> Unit,
164+
) = forEachCodePointIndexed { _, codePoint -> action(codePoint) }
165+
166+
/**
167+
* Performs given [action] for each codepoint in the [CharSequence].
168+
* Provides the start index for the given codepoint
169+
*/
170+
inline fun CharSequence.forEachCodePointIndexed(
171+
action: (index: Int, codePoint: Int) -> Unit,
172+
) {
173+
val str = this
174+
var index = 0
175+
val endIndex = length
176+
while (index < endIndex) {
177+
val codePointStartIndex = index
178+
val firstChar = str[index]
179+
index++
180+
if (firstChar.isHighSurrogate() && index < endIndex) {
181+
val nextChar = str[index]
182+
if (nextChar.isLowSurrogate()) {
183+
action(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar))
184+
index++
185+
continue
186+
}
187+
}
188+
action(codePointStartIndex, firstChar.code)
189+
}
190+
}

kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

+49
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,53 @@ class CharSequenceExtensionsTest {
154154
"\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2)
155155
}
156156
}
157+
158+
@Test
159+
fun forEachCodepoint() {
160+
fun CharSequence.collectCodepoints(): List<Int> = buildList { forEachCodePoint { add(it) } }
161+
162+
assertEquals(
163+
emptyList(),
164+
"".collectCodepoints(),
165+
)
166+
assertEquals(
167+
listOf('a'.code),
168+
"a".collectCodepoints(),
169+
)
170+
assertEquals(
171+
listOf('a'.code, 0xFFFF),
172+
"a\uFFFF".collectCodepoints(),
173+
)
174+
assertEquals(
175+
listOf(0x1F995, 'a'.code, 0x1F996),
176+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
177+
)
178+
}
179+
180+
@Test
181+
fun forEachCodepointIndexed() {
182+
fun CharSequence.collectCodepoints(): List<Pair<Int, Int>> =
183+
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }
184+
185+
assertEquals(
186+
emptyList(),
187+
"".collectCodepoints(),
188+
)
189+
assertEquals(
190+
listOf(0 to 'a'.code),
191+
"a".collectCodepoints(),
192+
)
193+
assertEquals(
194+
listOf(0 to 'a'.code, 1 to 0x1F995),
195+
"a\uD83E\uDD95".collectCodepoints(),
196+
)
197+
assertEquals(
198+
listOf(
199+
0 to 0x1F995,
200+
2 to 'a'.code,
201+
3 to 0x1F996,
202+
),
203+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
204+
)
205+
}
157206
}

0 commit comments

Comments
 (0)