Skip to content

Commit e2293d6

Browse files
author
Julien Cretel
committed
mime: speed up ParseMediaType
Eschew UTF-8 decoding and strings.IndexFunc where possible, and rely on 128-bit bitmaps instead. Eliminate some bounds checks. Some benchmark results (no changes to allocations): goos: darwin goarch: amd64 pkg: mime cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ ParseMediaType-8 71.75µ ± 0% 55.53µ ± 0% -22.60% (p=0.000 n=20) ParseMediaTypeBogus-8 5.330µ ± 0% 3.603µ ± 0% -32.41% (p=0.000 n=20) geomean 19.56µ 14.14µ -27.67%
1 parent 4238fb0 commit e2293d6

File tree

2 files changed

+69
-24
lines changed

2 files changed

+69
-24
lines changed

src/mime/grammar.go

+62-11
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,68 @@
44

55
package mime
66

7-
import (
8-
"strings"
9-
)
10-
11-
// isTSpecial reports whether rune is in 'tspecials' as defined by RFC
7+
// isTSpecial reports whether c is in 'tspecials' as defined by RFC
128
// 1521 and RFC 2045.
13-
func isTSpecial(r rune) bool {
14-
return strings.ContainsRune(`()<>@,;:\"/[]?=`, r)
9+
func isTSpecial(c byte) bool {
10+
// tspecials := "(" / ")" / "<" / ">" / "@" /
11+
// "," / ";" / ":" / "\" / <">
12+
// "/" / "[" / "]" / "?" / "="
13+
//
14+
// mask is a 128-bit bitmap with 1s for allowed bytes,
15+
// so that the byte c can be tested with a shift and an and.
16+
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
17+
// and this function will return false.
18+
const mask = 0 |
19+
1<<'(' |
20+
1<<')' |
21+
1<<'<' |
22+
1<<'>' |
23+
1<<'@' |
24+
1<<',' |
25+
1<<';' |
26+
1<<':' |
27+
1<<'\\' |
28+
1<<'"' |
29+
1<<'/' |
30+
1<<'[' |
31+
1<<']' |
32+
1<<'?' |
33+
1<<'='
34+
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
35+
(uint64(1)<<(c-64))&(mask>>64)) != 0
1536
}
1637

17-
// isTokenChar reports whether rune is in 'token' as defined by RFC
38+
// isTokenChar reports whether c is in 'token' as defined by RFC
1839
// 1521 and RFC 2045.
19-
func isTokenChar(r rune) bool {
40+
func isTokenChar(c byte) bool {
2041
// token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
2142
// or tspecials>
22-
return r > 0x20 && r < 0x7f && !isTSpecial(r)
43+
//
44+
// mask is a 128-bit bitmap with 1s for allowed bytes,
45+
// so that the byte c can be tested with a shift and an and.
46+
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
47+
// and this function will return false.
48+
const mask = 0 |
49+
(1<<(10)-1)<<'0' |
50+
(1<<(26)-1)<<'a' |
51+
(1<<(26)-1)<<'A' |
52+
1<<'!' |
53+
1<<'#' |
54+
1<<'$' |
55+
1<<'%' |
56+
1<<'&' |
57+
1<<'\'' |
58+
1<<'*' |
59+
1<<'+' |
60+
1<<'-' |
61+
1<<'.' |
62+
1<<'^' |
63+
1<<'_' |
64+
1<<'`' |
65+
1<<'|' |
66+
1<<'~'
67+
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
68+
(uint64(1)<<(c-64))&(mask>>64)) != 0
2369
}
2470

2571
// isToken reports whether s is a 'token' as defined by RFC 1521
@@ -28,5 +74,10 @@ func isToken(s string) bool {
2874
if s == "" {
2975
return false
3076
}
31-
return strings.IndexFunc(s, isNotTokenChar) < 0
77+
for _, c := range []byte(s) {
78+
if !isTokenChar(c) {
79+
return false
80+
}
81+
}
82+
return true
3283
}

src/mime/mediatype.go

+7-13
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func FormatMediaType(t string, param map[string]string) string {
6060
// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
6161
if ch <= ' ' || ch >= 0x7F ||
6262
ch == '*' || ch == '\'' || ch == '%' ||
63-
isTSpecial(rune(ch)) {
63+
isTSpecial(ch) {
6464

6565
b.WriteString(value[offset:index])
6666
offset = index + 1
@@ -250,23 +250,17 @@ func decode2231Enc(v string) (string, bool) {
250250
return encv, true
251251
}
252252

253-
func isNotTokenChar(r rune) bool {
254-
return !isTokenChar(r)
255-
}
256-
257253
// consumeToken consumes a token from the beginning of provided
258254
// string, per RFC 2045 section 5.1 (referenced from 2183), and return
259255
// the token consumed and the rest of the string. Returns ("", v) on
260256
// failure to consume at least one character.
261257
func consumeToken(v string) (token, rest string) {
262-
notPos := strings.IndexFunc(v, isNotTokenChar)
263-
if notPos == -1 {
264-
return v, ""
265-
}
266-
if notPos == 0 {
267-
return "", v
258+
for i := range len(v) {
259+
if !isTokenChar(v[i]) {
260+
return v[:i], v[i:]
261+
}
268262
}
269-
return v[0:notPos], v[notPos:]
263+
return v, ""
270264
}
271265

272266
// consumeValue consumes a "value" per RFC 2045, where a value is
@@ -299,7 +293,7 @@ func consumeValue(v string) (value, rest string) {
299293
// and intended as a literal backslash. This makes Go servers deal better
300294
// with MSIE without affecting the way they handle conforming MIME
301295
// generators.
302-
if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) {
296+
if r == '\\' && i+1 < len(v) && isTSpecial(v[i+1]) {
303297
buffer.WriteByte(v[i+1])
304298
i++
305299
continue

0 commit comments

Comments
 (0)