shortid.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. // Copyright (c) 2016-2017. Oleg Sklyar & teris.io. All rights reserved.
  2. // See the LICENSE file in the project root for licensing information.
  3. // Original algorithm:
  4. // Copyright (c) 2015 Dylan Greene, contributors: https://github.com/dylang/shortid.
  5. // MIT-license as found in the LICENSE file.
  6. // Seed computation: based on The Central Randomizer 1.3
  7. // Copyright (c) 1997 Paul Houle (houle@msc.cornell.edu)
  8. // Package shortid enables the generation of short, unique, non-sequential and by default URL friendly
  9. // Ids. The package is heavily inspired by the node.js https://github.com/dylang/shortid library.
  10. //
  11. // Id Length
  12. //
  13. // The standard Id length is 9 symbols when generated at a rate of 1 Id per millisecond,
  14. // occasionally it reaches 11 (at the rate of a few thousand Ids per millisecond) and very-very
  15. // rarely it can go beyond that during continuous generation at full throttle on high-performant
  16. // hardware. A test generating 500k Ids at full throttle on conventional hardware generated the
  17. // following Ids at the head and the tail (length > 9 is expected for this test):
  18. //
  19. // -NDveu-9Q
  20. // iNove6iQ9J
  21. // NVDve6-9Q
  22. // VVDvc6i99J
  23. // NVovc6-QQy
  24. // VVoveui9QC
  25. // ...
  26. // tFmGc6iQQs
  27. // KpTvcui99k
  28. // KFTGcuiQ9p
  29. // KFmGeu-Q9O
  30. // tFTvcu-QQt
  31. // tpTveu-99u
  32. //
  33. // Life span
  34. //
  35. // The package guarantees the generation of unique Ids with zero collisions for 34 years
  36. // (1/1/2016-1/1/2050) using the same worker Id within a single (although concurrent) application if
  37. // application restarts take longer than 1 millisecond. The package supports up to 32 works, all
  38. // providing unique sequences.
  39. //
  40. // Implementation details
  41. //
  42. // Although heavily inspired by the node.js shortid library this is
  43. // not a simple Go port. In addition it
  44. //
  45. // - is safe to concurrency;
  46. // - does not require any yearly version/epoch resets;
  47. // - provides stable Id size over a long period at the rate of 1ms;
  48. // - guarantees no collisions (due to guaranteed fixed size of Ids between milliseconds and because
  49. // multiple requests within the same ms lead to longer Ids with the prefix unique to the ms);
  50. // - supports 32 over 16 workers.
  51. //
  52. // The algorithm uses less randomness than the original node.js implementation, which permits to
  53. // extend the life span as well as reduce and guarantee the length. In general terms, each Id
  54. // has the following 3 pieces of information encoded: the millisecond (first 8 symbols), the worker
  55. // Id (9th symbol), running concurrent counter within the same millisecond, only if required, over
  56. // all remaining symbols. The element of randomness per symbol is 1/2 for the worker and the
  57. // millisecond and 0 for the counter. Here 0 means no randomness, i.e. every value is encoded using
  58. // a 64-base alphabet; 1/2 means one of two matching symbols of the supplied alphabet, 1/4 one of
  59. // four matching symbols. The original algorithm of the node.js module uses 1/4 throughout.
  60. //
  61. // All methods accepting the parameters that govern the randomness are exported and can be used
  62. // to directly implement an algorithm with e.g. more randomness, but with longer Ids and shorter
  63. // life spans.
  64. package shortid
  65. import (
  66. randc "crypto/rand"
  67. "errors"
  68. "fmt"
  69. "math"
  70. randm "math/rand"
  71. "sync"
  72. "sync/atomic"
  73. "time"
  74. "unsafe"
  75. )
  76. // Version defined the library version.
  77. const Version = 1.1
  78. // DefaultABC is the default URL-friendly alphabet.
  79. const DefaultABC = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"
  80. // Abc represents a shuffled alphabet used to generate the Ids and provides methods to
  81. // encode data.
  82. type Abc struct {
  83. alphabet []rune
  84. }
  85. // Shortid type represents a short Id generator working with a given alphabet.
  86. type Shortid struct {
  87. abc Abc
  88. worker uint
  89. epoch time.Time // ids can be generated for 34 years since this date
  90. ms uint // ms since epoch for the last id
  91. count uint // request count within the same ms
  92. mx sync.Mutex // locks access to ms and count
  93. }
  94. var shortid *Shortid
  95. func init() {
  96. shortid = MustNew(0, DefaultABC, 1)
  97. }
  98. // GetDefault retrieves the default short Id generator initialised with the default alphabet,
  99. // worker=0 and seed=1. The default can be overwritten using SetDefault.
  100. func GetDefault() *Shortid {
  101. return (*Shortid)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&shortid))))
  102. }
  103. // SetDefault overwrites the default generator.
  104. func SetDefault(sid *Shortid) {
  105. target := (*unsafe.Pointer)(unsafe.Pointer(&shortid))
  106. source := unsafe.Pointer(sid)
  107. atomic.SwapPointer(target, source)
  108. }
  109. // Generate generates an Id using the default generator.
  110. func Generate() (string, error) {
  111. return shortid.Generate()
  112. }
  113. // MustGenerate acts just like Generate, but panics instead of returning errors.
  114. func MustGenerate() string {
  115. id, err := Generate()
  116. if err == nil {
  117. return id
  118. }
  119. panic(err)
  120. }
  121. // New constructs an instance of the short Id generator for the given worker number [0,31], alphabet
  122. // (64 unique symbols) and seed value (to shuffle the alphabet). The worker number should be
  123. // different for multiple or distributed processes generating Ids into the same data space. The
  124. // seed, on contrary, should be identical.
  125. func New(worker uint8, alphabet string, seed uint64) (*Shortid, error) {
  126. if worker > 31 {
  127. return nil, errors.New("expected worker in the range [0,31]")
  128. }
  129. abc, err := NewAbc(alphabet, seed)
  130. if err == nil {
  131. sid := &Shortid{
  132. abc: abc,
  133. worker: uint(worker),
  134. epoch: time.Date(2016, time.January, 1, 0, 0, 0, 0, time.UTC),
  135. ms: 0,
  136. count: 0,
  137. }
  138. return sid, nil
  139. }
  140. return nil, err
  141. }
  142. // MustNew acts just like New, but panics instead of returning errors.
  143. func MustNew(worker uint8, alphabet string, seed uint64) *Shortid {
  144. sid, err := New(worker, alphabet, seed)
  145. if err == nil {
  146. return sid
  147. }
  148. panic(err)
  149. }
  150. // Generate generates a new short Id.
  151. func (sid *Shortid) Generate() (string, error) {
  152. return sid.GenerateInternal(nil, sid.epoch)
  153. }
  154. // MustGenerate acts just like Generate, but panics instead of returning errors.
  155. func (sid *Shortid) MustGenerate() string {
  156. id, err := sid.Generate()
  157. if err == nil {
  158. return id
  159. }
  160. panic(err)
  161. }
  162. // GenerateInternal should only be used for testing purposes.
  163. func (sid *Shortid) GenerateInternal(tm *time.Time, epoch time.Time) (string, error) {
  164. ms, count := sid.getMsAndCounter(tm, epoch)
  165. idrunes := make([]rune, 9)
  166. if tmp, err := sid.abc.Encode(ms, 8, 5); err == nil {
  167. copy(idrunes, tmp) // first 8 symbols
  168. } else {
  169. return "", err
  170. }
  171. if tmp, err := sid.abc.Encode(sid.worker, 1, 5); err == nil {
  172. idrunes[8] = tmp[0]
  173. } else {
  174. return "", err
  175. }
  176. if count > 0 {
  177. if countrunes, err := sid.abc.Encode(count, 0, 6); err == nil {
  178. // only extend if really need it
  179. idrunes = append(idrunes, countrunes...)
  180. } else {
  181. return "", err
  182. }
  183. }
  184. return string(idrunes), nil
  185. }
  186. func (sid *Shortid) getMsAndCounter(tm *time.Time, epoch time.Time) (uint, uint) {
  187. sid.mx.Lock()
  188. defer sid.mx.Unlock()
  189. var ms uint
  190. if tm != nil {
  191. ms = uint(tm.Sub(epoch).Nanoseconds() / 1000000)
  192. } else {
  193. ms = uint(time.Now().Sub(epoch).Nanoseconds() / 1000000)
  194. }
  195. if ms == sid.ms {
  196. sid.count++
  197. } else {
  198. sid.count = 0
  199. sid.ms = ms
  200. }
  201. return sid.ms, sid.count
  202. }
  203. // String returns a string representation of the short Id generator.
  204. func (sid *Shortid) String() string {
  205. return fmt.Sprintf("Shortid(worker=%v, epoch=%v, abc=%v)", sid.worker, sid.epoch, sid.abc)
  206. }
  207. // Abc returns the instance of alphabet used for representing the Ids.
  208. func (sid *Shortid) Abc() Abc {
  209. return sid.abc
  210. }
  211. // Epoch returns the value of epoch used as the beginning of millisecond counting (normally
  212. // 2016-01-01 00:00:00 local time)
  213. func (sid *Shortid) Epoch() time.Time {
  214. return sid.epoch
  215. }
  216. // Worker returns the value of worker for this short Id generator.
  217. func (sid *Shortid) Worker() uint {
  218. return sid.worker
  219. }
  220. // NewAbc constructs a new instance of shuffled alphabet to be used for Id representation.
  221. func NewAbc(alphabet string, seed uint64) (Abc, error) {
  222. runes := []rune(alphabet)
  223. if len(runes) != len(DefaultABC) {
  224. return Abc{}, fmt.Errorf("alphabet must contain %v unique characters", len(DefaultABC))
  225. }
  226. if nonUnique(runes) {
  227. return Abc{}, errors.New("alphabet must contain unique characters only")
  228. }
  229. abc := Abc{alphabet: nil}
  230. abc.shuffle(alphabet, seed)
  231. return abc, nil
  232. }
  233. // MustNewAbc acts just like NewAbc, but panics instead of returning errors.
  234. func MustNewAbc(alphabet string, seed uint64) Abc {
  235. res, err := NewAbc(alphabet, seed)
  236. if err == nil {
  237. return res
  238. }
  239. panic(err)
  240. }
  241. func nonUnique(runes []rune) bool {
  242. found := make(map[rune]struct{})
  243. for _, r := range runes {
  244. if _, seen := found[r]; !seen {
  245. found[r] = struct{}{}
  246. }
  247. }
  248. return len(found) < len(runes)
  249. }
  250. func (abc *Abc) shuffle(alphabet string, seed uint64) {
  251. source := []rune(alphabet)
  252. for len(source) > 1 {
  253. seed = (seed*9301 + 49297) % 233280
  254. i := int(seed * uint64(len(source)) / 233280)
  255. abc.alphabet = append(abc.alphabet, source[i])
  256. source = append(source[:i], source[i+1:]...)
  257. }
  258. abc.alphabet = append(abc.alphabet, source[0])
  259. }
  260. // Encode encodes a given value into a slice of runes of length nsymbols. In case nsymbols==0, the
  261. // length of the result is automatically computed from data. Even if fewer symbols is required to
  262. // encode the data than nsymbols, all positions are used encoding 0 where required to guarantee
  263. // uniqueness in case further data is added to the sequence. The value of digits [4,6] represents
  264. // represents n in 2^n, which defines how much randomness flows into the algorithm: 4 -- every value
  265. // can be represented by 4 symbols in the alphabet (permitting at most 16 values), 5 -- every value
  266. // can be represented by 2 symbols in the alphabet (permitting at most 32 values), 6 -- every value
  267. // is represented by exactly 1 symbol with no randomness (permitting 64 values).
  268. func (abc *Abc) Encode(val, nsymbols, digits uint) ([]rune, error) {
  269. if digits < 4 || 6 < digits {
  270. return nil, fmt.Errorf("allowed digits range [4,6], found %v", digits)
  271. }
  272. var computedSize uint = 1
  273. if val >= 1 {
  274. computedSize = uint(math.Log2(float64(val)))/digits + 1
  275. }
  276. if nsymbols == 0 {
  277. nsymbols = computedSize
  278. } else if nsymbols < computedSize {
  279. return nil, fmt.Errorf("cannot accommodate data, need %v digits, got %v", computedSize, nsymbols)
  280. }
  281. mask := 1<<digits - 1
  282. random := make([]int, int(nsymbols))
  283. // no random component if digits == 6
  284. if digits < 6 {
  285. copy(random, maskedRandomInts(len(random), 0x3f-mask))
  286. }
  287. res := make([]rune, int(nsymbols))
  288. for i := range res {
  289. shift := digits * uint(i)
  290. index := (int(val>>shift) & mask) | random[i]
  291. res[i] = abc.alphabet[index]
  292. }
  293. return res, nil
  294. }
  295. // MustEncode acts just like Encode, but panics instead of returning errors.
  296. func (abc *Abc) MustEncode(val, size, digits uint) []rune {
  297. res, err := abc.Encode(val, size, digits)
  298. if err == nil {
  299. return res
  300. }
  301. panic(err)
  302. }
  303. func maskedRandomInts(size, mask int) []int {
  304. ints := make([]int, size)
  305. bytes := make([]byte, size)
  306. if _, err := randc.Read(bytes); err == nil {
  307. for i, b := range bytes {
  308. ints[i] = int(b) & mask
  309. }
  310. } else {
  311. for i := range ints {
  312. ints[i] = randm.Intn(0xff) & mask
  313. }
  314. }
  315. return ints
  316. }
  317. // String returns a string representation of the Abc instance.
  318. func (abc Abc) String() string {
  319. return fmt.Sprintf("Abc{alphabet='%v')", abc.Alphabet())
  320. }
  321. // Alphabet returns the alphabet used as an immutable string.
  322. func (abc Abc) Alphabet() string {
  323. return string(abc.alphabet)
  324. }