Documentation Index
Fetch the complete documentation index at: https://mintlify.com/golang/go/llms.txt
Use this file to discover all available pages before exploring further.
The unicode package provides data and functions to test Unicode code point properties.
Character Classification
import "unicode"
// Check character type
isLetter := unicode.IsLetter('A') // true
isDigit := unicode.IsDigit('5') // true
isSpace := unicode.IsSpace(' ') // true
isPunct := unicode.IsPunct('.') // true
isUpper := unicode.IsUpper('A') // true
isLower := unicode.IsLower('a') // true
Case Conversion
// Convert case
upper := unicode.ToUpper('a') // 'A'
lower := unicode.ToLower('A') // 'a'
title := unicode.ToTitle('a') // 'A'
Character Categories
// Test categories
isControl := unicode.IsControl('\n') // true
isGraphic := unicode.IsGraphic('A') // true
isMark := unicode.IsMark('Μ') // true (combining acute)
isNumber := unicode.IsNumber('5') // true
isPrint := unicode.IsPrint('A') // true
isSymbol := unicode.IsSymbol('$') // true
Ranges and Scripts
// Check if in range
inLatin := unicode.In('A', unicode.Latin) // true
inCyrillic := unicode.In('Π', unicode.Cyrillic) // true
inHan := unicode.In('δΈ', unicode.Han) // true (Chinese)
unicode/utf8
UTF-8 encoding/decoding.
import "unicode/utf8"
// Encode rune to bytes
buf := make([]byte, 4)
n := utf8.EncodeRune(buf, 'δΈ') // Chinese character
// Decode rune from bytes
r, size := utf8.DecodeRune(buf[:n])
// Count runes in string
count := utf8.RuneCountInString("Hello π") // 7
// Validate UTF-8
valid := utf8.ValidString("Hello") // true
unicode/utf16
UTF-16 encoding/decoding.
import "unicode/utf16"
// Encode rune to UTF-16
r := 'π' // Emoji
u16 := utf16.Encode([]rune{r})
// Decode UTF-16 to runes
runes := utf16.Decode(u16)
Practical Examples
Count Character Types
func countCharTypes(s string) map[string]int {
counts := map[string]int{
"letters": 0,
"digits": 0,
"spaces": 0,
"other": 0,
}
for _, r := range s {
switch {
case unicode.IsLetter(r):
counts["letters"]++
case unicode.IsDigit(r):
counts["digits"]++
case unicode.IsSpace(r):
counts["spaces"]++
default:
counts["other"]++
}
}
return counts
}
Title Case
func toTitleCase(s string) string {
var result []rune
prevSpace := true
for _, r := range s {
if prevSpace {
result = append(result, unicode.ToUpper(r))
} else {
result = append(result, unicode.ToLower(r))
}
prevSpace = unicode.IsSpace(r)
}
return string(result)
}
Strip Non-Letters
func stripNonLetters(s string) string {
var result []rune
for _, r := range s {
if unicode.IsLetter(r) {
result = append(result, r)
}
}
return string(result)
}
Validate Identifier
func isValidIdentifier(s string) bool {
if len(s) == 0 {
return false
}
for i, r := range s {
if i == 0 {
if !unicode.IsLetter(r) && r != '_' {
return false
}
} else {
if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' {
return false
}
}
}
return true
}
Count Bytes vs Runes
func compareCount(s string) {
bytes := len(s)
runes := utf8.RuneCountInString(s)
fmt.Printf("String: %s\n", s)
fmt.Printf("Bytes: %d\n", bytes)
fmt.Printf("Runes: %d\n", runes)
}
// Example:
// compareCount("Hello π")
// Bytes: 10
// Runes: 7
Common Character Sets
var (
// Scripts
Latin *RangeTable
Greek *RangeTable
Cyrillic *RangeTable
Arabic *RangeTable
Hebrew *RangeTable
Han *RangeTable
Hiragana *RangeTable
Katakana *RangeTable
// Categories
Letter *RangeTable
Digit *RangeTable
Number *RangeTable
Punct *RangeTable
Symbol *RangeTable
Space *RangeTable
)
Best Practices
- Use utf8.RuneCountInString - Not len() for character count
- Range over runes - Use
for _, r := range str
- Validate UTF-8 - Check with ValidString before processing
- Handle multi-byte - Be aware of bytes vs runes
- Use unicode functions - For proper case conversion
- Consider normalization - Use golang.org/x/text/unicode/norm
Common Pitfalls
// Wrong: Counts bytes, not characters
len("π") // 4
// Correct: Counts runes (characters)
utf8.RuneCountInString("π") // 1
// Wrong: Byte indexing
s := "Helloπ"
s[5] // Part of emoji bytes
// Correct: Rune iteration
for i, r := range s {
// i is byte index, r is rune
}