Skip to content

Commit 03f0ae8

Browse files
author
Oleg
committed
Add forEach extensions methods to iterate over codepoints
1 parent 20b40c0 commit 03f0ae8

File tree

2 files changed

+178
-0
lines changed

2 files changed

+178
-0
lines changed

kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt

+50
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
)
55
package de.cketti.codepoints.deluxe
66

7+
import de.cketti.codepoints.CodePoints
78
import de.cketti.codepoints.codePointAt as intCodePointAt
89
import de.cketti.codepoints.codePointBefore as intCodePointBefore
910

@@ -51,3 +52,52 @@ fun CharSequence.codePointSequence(): CodePointSequence {
5152
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
5253
return CodePointIterator(this, startIndex, endIndex)
5354
}
55+
56+
/**
57+
* Performs given [block] for each [CodePoint] in the [CharSequence]
58+
* between [startIndex] (inclusive) and [endIndex] (exclusive).
59+
*
60+
* @see forEachCodePointIndexed
61+
*/
62+
inline fun CharSequence.forEachCodePoint(
63+
startIndex: Int = 0,
64+
endIndex: Int = length,
65+
block: (codePoint: CodePoint) -> Unit,
66+
) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) }
67+
68+
/**
69+
* Performs given [block] for each [CodePoint] in the [CharSequence]
70+
* between [startIndex] (inclusive) and [endIndex] (exclusive).
71+
* Provides the start index for the given codepoint
72+
*
73+
* @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`)
74+
* @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`)
75+
*/
76+
inline fun CharSequence.forEachCodePointIndexed(
77+
startIndex: Int = 0,
78+
endIndex: Int = length,
79+
block: (index: Int, codePoint: CodePoint) -> Unit,
80+
) {
81+
require(startIndex <= endIndex) {
82+
"startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)"
83+
}
84+
require(endIndex <= length) {
85+
"endIndex ($endIndex) must be less than or equal to char sequence's length ($length)"
86+
}
87+
val str = this
88+
var index = startIndex
89+
while (index < endIndex) {
90+
val codePointStartIndex = index
91+
val firstChar = str[index]
92+
index++
93+
if (firstChar.isHighSurrogate() && index < endIndex) {
94+
val nextChar = str[index]
95+
if (nextChar.isLowSurrogate()) {
96+
block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint())
97+
index++
98+
continue
99+
}
100+
}
101+
block(codePointStartIndex, firstChar.toCodePoint())
102+
}
103+
}

kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

+128
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package de.cketti.codepoints.deluxe
22

33
import kotlin.test.assertEquals
44
import kotlin.test.Test
5+
import kotlin.test.assertFailsWith
56

67
class CharSequenceExtensionsTest {
78
@Test
@@ -57,4 +58,131 @@ class CharSequenceExtensionsTest {
5758
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(1))
5859
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(3))
5960
}
61+
62+
@Test
63+
fun forEachCodepoint() {
64+
fun CharSequence.collectCodepoints(): List<CodePoint> = buildList { forEachCodePoint { add(it) } }
65+
66+
assertEquals(
67+
emptyList(),
68+
"".collectCodepoints(),
69+
)
70+
assertEquals(
71+
listOf('a'.toCodePoint()),
72+
"a".collectCodepoints(),
73+
)
74+
assertEquals(
75+
listOf('a'.toCodePoint(), 0xFFFF.toCodePoint()),
76+
"a\uFFFF".collectCodepoints(),
77+
)
78+
assertEquals(
79+
listOf(0x1F995.toCodePoint(), 'a'.toCodePoint(), 0x1F996.toCodePoint()),
80+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
81+
)
82+
}
83+
84+
@Test
85+
fun forEachCodepoint_with_non_default_indexes() {
86+
fun CharSequence.collectCodepoints(
87+
startIndex: Int,
88+
endIndex: Int,
89+
): List<CodePoint> = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } }
90+
91+
assertEquals(
92+
listOf('a'.toCodePoint()),
93+
"ab".collectCodepoints(0, 1),
94+
)
95+
assertEquals(
96+
listOf('b'.toCodePoint()),
97+
"ab".collectCodepoints(1, 2),
98+
)
99+
assertEquals(
100+
listOf('a'.toCodePoint()),
101+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
102+
)
103+
assertEquals(
104+
listOf(0xD83E.toCodePoint()),
105+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1),
106+
)
107+
assertEquals(
108+
listOf(0xDD95.toCodePoint()),
109+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2),
110+
)
111+
assertEquals(
112+
listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()),
113+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
114+
)
115+
assertFailsWith(IllegalArgumentException::class) {
116+
"a".forEachCodePoint(startIndex = 1, endIndex = 0) { }
117+
}
118+
assertFailsWith(IllegalArgumentException::class) {
119+
"a".forEachCodePoint(startIndex = 1, endIndex = 2) { }
120+
}
121+
}
122+
123+
@Test
124+
fun forEachCodepointIndexed() {
125+
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
126+
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }
127+
128+
assertEquals(
129+
emptyList(),
130+
"".collectCodepoints(),
131+
)
132+
assertEquals(
133+
listOf(0 to 'a'.toCodePoint()),
134+
"a".collectCodepoints(),
135+
)
136+
assertEquals(
137+
listOf(0 to 'a'.toCodePoint(), 1 to 0x1F995.toCodePoint()),
138+
"a\uD83E\uDD95".collectCodepoints(),
139+
)
140+
assertEquals(
141+
listOf(
142+
0 to 0x1F995.toCodePoint(),
143+
2 to 'a'.toCodePoint(),
144+
3 to 0x1F996.toCodePoint(),
145+
),
146+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
147+
)
148+
}
149+
150+
@Test
151+
fun forEachCodepointIndexed_with_non_default_indexes() {
152+
fun CharSequence.collectCodepoints(start: Int, end: Int): List<Pair<Int, CodePoint>> =
153+
buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } }
154+
155+
assertEquals(
156+
listOf(0 to 'a'.toCodePoint()),
157+
"ab".collectCodepoints(0, 1),
158+
)
159+
assertEquals(
160+
listOf(1 to 'b'.toCodePoint()),
161+
"ab".collectCodepoints(1, 2),
162+
)
163+
assertEquals(
164+
listOf(1 to 0x1F995.toCodePoint()),
165+
"a\uD83E\uDD95".collectCodepoints(1, 3),
166+
)
167+
assertEquals(
168+
listOf(
169+
1 to 0xDD95.toCodePoint(),
170+
2 to 'a'.toCodePoint(),
171+
3 to 0xD83E.toCodePoint(),
172+
),
173+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
174+
)
175+
assertEquals(
176+
listOf(
177+
2 to 'a'.toCodePoint(),
178+
),
179+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
180+
)
181+
assertFailsWith(IllegalArgumentException::class) {
182+
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ -> }
183+
}
184+
assertFailsWith(IllegalArgumentException::class) {
185+
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ -> }
186+
}
187+
}
60188
}

0 commit comments

Comments
 (0)