slug.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // Copyright 2013 by Dobrosław Żybort. All rights reserved.
  2. // This Source Code Form is subject to the terms of the Mozilla Public
  3. // License, v. 2.0. If a copy of the MPL was not distributed with this
  4. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5. package slug
  6. import (
  7. "bytes"
  8. "regexp"
  9. "sort"
  10. "strings"
  11. "github.com/rainycape/unidecode"
  12. )
  13. var (
  14. // CustomSub stores custom substitution map
  15. CustomSub map[string]string
  16. // CustomRuneSub stores custom rune substitution map
  17. CustomRuneSub map[rune]string
  18. // MaxLength stores maximum slug length.
  19. // It's smart so it will cat slug after full word.
  20. // By default slugs aren't shortened.
  21. // If MaxLength is smaller than length of the first word, then returned
  22. // slug will contain only substring from the first word truncated
  23. // after MaxLength.
  24. MaxLength int
  25. regexpNonAuthorizedChars = regexp.MustCompile("[^a-z0-9-_]")
  26. regexpMultipleDashes = regexp.MustCompile("-+")
  27. )
  28. //=============================================================================
  29. // Make returns slug generated from provided string. Will use "en" as language
  30. // substitution.
  31. func Make(s string) (slug string) {
  32. return MakeLang(s, "en")
  33. }
  34. // MakeLang returns slug generated from provided string and will use provided
  35. // language for chars substitution.
  36. func MakeLang(s string, lang string) (slug string) {
  37. slug = strings.TrimSpace(s)
  38. // Custom substitutions
  39. // Always substitute runes first
  40. slug = SubstituteRune(slug, CustomRuneSub)
  41. slug = Substitute(slug, CustomSub)
  42. // Process string with selected substitution language
  43. switch lang {
  44. case "de":
  45. slug = SubstituteRune(slug, deSub)
  46. case "en":
  47. slug = SubstituteRune(slug, enSub)
  48. case "es":
  49. slug = SubstituteRune(slug, esSub)
  50. case "fi":
  51. slug = SubstituteRune(slug, fiSub)
  52. case "gr":
  53. slug = SubstituteRune(slug, grSub)
  54. case "nl":
  55. slug = SubstituteRune(slug, nlSub)
  56. case "pl":
  57. slug = SubstituteRune(slug, plSub)
  58. default: // fallback to "en" if lang not found
  59. slug = SubstituteRune(slug, enSub)
  60. }
  61. // Process all non ASCII symbols
  62. slug = unidecode.Unidecode(slug)
  63. slug = strings.ToLower(slug)
  64. // Process all remaining symbols
  65. slug = regexpNonAuthorizedChars.ReplaceAllString(slug, "-")
  66. slug = regexpMultipleDashes.ReplaceAllString(slug, "-")
  67. slug = strings.Trim(slug, "-_")
  68. if MaxLength > 0 {
  69. slug = smartTruncate(slug)
  70. }
  71. return slug
  72. }
  73. // Substitute returns string with superseded all substrings from
  74. // provided substitution map. Substitution map will be applied in alphabetic
  75. // order. Many passes, on one substitution another one could apply.
  76. func Substitute(s string, sub map[string]string) (buf string) {
  77. buf = s
  78. var keys []string
  79. for k := range sub {
  80. keys = append(keys, k)
  81. }
  82. sort.Strings(keys)
  83. for _, key := range keys {
  84. buf = strings.Replace(buf, key, sub[key], -1)
  85. }
  86. return
  87. }
  88. // SubstituteRune substitutes string chars with provided rune
  89. // substitution map. One pass.
  90. func SubstituteRune(s string, sub map[rune]string) string {
  91. var buf bytes.Buffer
  92. for _, c := range s {
  93. if d, ok := sub[c]; ok {
  94. buf.WriteString(d)
  95. } else {
  96. buf.WriteRune(c)
  97. }
  98. }
  99. return buf.String()
  100. }
  101. func smartTruncate(text string) string {
  102. if len(text) < MaxLength {
  103. return text
  104. }
  105. var truncated string
  106. words := strings.SplitAfter(text, "-")
  107. // If MaxLength is smaller than length of the first word return word
  108. // truncated after MaxLength.
  109. if len(words[0]) > MaxLength {
  110. return words[0][:MaxLength]
  111. }
  112. for _, word := range words {
  113. if len(truncated)+len(word)-1 <= MaxLength {
  114. truncated = truncated + word
  115. } else {
  116. break
  117. }
  118. }
  119. return strings.Trim(truncated, "-")
  120. }
  121. // IsSlug returns True if provided text does not contain white characters,
  122. // punctuation, all letters are lower case and only from ASCII range.
  123. // It could contain `-` and `_` but not at the beginning or end of the text.
  124. // It should be in range of the MaxLength var if specified.
  125. // All output from slug.Make(text) should pass this test.
  126. func IsSlug(text string) bool {
  127. if text == "" ||
  128. (MaxLength > 0 && len(text) > MaxLength) ||
  129. text[0] == '-' || text[0] == '_' ||
  130. text[len(text)-1] == '-' || text[len(text)-1] == '_' {
  131. return false
  132. }
  133. for _, c := range text {
  134. if (c < 'a' || c > 'z') && c != '-' && c != '_' && (c < '0' || c > '9') {
  135. return false
  136. }
  137. }
  138. return true
  139. }