parser.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. package uaparser
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "regexp"
  6. "sync"
  7. "sync/atomic"
  8. "sort"
  9. "time"
  10. "gopkg.in/yaml.v2"
  11. )
  12. type RegexesDefinitions struct {
  13. UA []*uaParser `yaml:"user_agent_parsers"`
  14. OS []*osParser `yaml:"os_parsers"`
  15. Device []*deviceParser `yaml:"device_parsers"`
  16. _ [4]byte // padding for alignment
  17. sync.RWMutex
  18. }
  19. type UserAgentSorter []*uaParser
  20. func (a UserAgentSorter) Len() int { return len(a) }
  21. func (a UserAgentSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  22. func (a UserAgentSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
  23. type uaParser struct {
  24. Reg *regexp.Regexp
  25. Expr string `yaml:"regex"`
  26. Flags string `yaml:"regex_flag"`
  27. FamilyReplacement string `yaml:"family_replacement"`
  28. V1Replacement string `yaml:"v1_replacement"`
  29. V2Replacement string `yaml:"v2_replacement"`
  30. V3Replacement string `yaml:"v3_replacement"`
  31. _ [4]byte // padding for alignment
  32. MatchesCount uint64
  33. }
  34. func (ua *uaParser) setDefaults() {
  35. if ua.FamilyReplacement == "" {
  36. ua.FamilyReplacement = "$1"
  37. }
  38. if ua.V1Replacement == "" {
  39. ua.V1Replacement = "$2"
  40. }
  41. if ua.V2Replacement == "" {
  42. ua.V2Replacement = "$3"
  43. }
  44. if ua.V3Replacement == "" {
  45. ua.V3Replacement = "$4"
  46. }
  47. }
  48. type OsSorter []*osParser
  49. func (a OsSorter) Len() int { return len(a) }
  50. func (a OsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  51. func (a OsSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
  52. type osParser struct {
  53. Reg *regexp.Regexp
  54. Expr string `yaml:"regex"`
  55. Flags string `yaml:"regex_flag"`
  56. OSReplacement string `yaml:"os_replacement"`
  57. V1Replacement string `yaml:"os_v1_replacement"`
  58. V2Replacement string `yaml:"os_v2_replacement"`
  59. V3Replacement string `yaml:"os_v3_replacement"`
  60. V4Replacement string `yaml:"os_v4_replacement"`
  61. _ [4]byte // padding for alignment
  62. MatchesCount uint64
  63. }
  64. func (os *osParser) setDefaults() {
  65. if os.OSReplacement == "" {
  66. os.OSReplacement = "$1"
  67. }
  68. if os.V1Replacement == "" {
  69. os.V1Replacement = "$2"
  70. }
  71. if os.V2Replacement == "" {
  72. os.V2Replacement = "$3"
  73. }
  74. if os.V3Replacement == "" {
  75. os.V3Replacement = "$4"
  76. }
  77. if os.V4Replacement == "" {
  78. os.V4Replacement = "$5"
  79. }
  80. }
  81. type DeviceSorter []*deviceParser
  82. func (a DeviceSorter) Len() int { return len(a) }
  83. func (a DeviceSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  84. func (a DeviceSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
  85. type deviceParser struct {
  86. Reg *regexp.Regexp
  87. Expr string `yaml:"regex"`
  88. Flags string `yaml:"regex_flag"`
  89. DeviceReplacement string `yaml:"device_replacement"`
  90. BrandReplacement string `yaml:"brand_replacement"`
  91. ModelReplacement string `yaml:"model_replacement"`
  92. _ [4]byte // padding for alignment
  93. MatchesCount uint64
  94. }
  95. func (device *deviceParser) setDefaults() {
  96. if device.DeviceReplacement == "" {
  97. device.DeviceReplacement = "$1"
  98. }
  99. if device.ModelReplacement == "" {
  100. device.ModelReplacement = "$1"
  101. }
  102. }
  103. type Client struct {
  104. UserAgent *UserAgent
  105. Os *Os
  106. Device *Device
  107. }
  108. type Parser struct {
  109. RegexesDefinitions
  110. UserAgentMisses uint64
  111. OsMisses uint64
  112. DeviceMisses uint64
  113. Mode int
  114. UseSort bool
  115. debugMode bool
  116. }
  117. const (
  118. EOsLookUpMode = 1 /* 00000001 */
  119. EUserAgentLookUpMode = 2 /* 00000010 */
  120. EDeviceLookUpMode = 4 /* 00000100 */
  121. cMinMissesTreshold = 100000
  122. cDefaultMissesTreshold = 500000
  123. cDefaultMatchIdxNotOk = 20
  124. cDefaultSortOption = false
  125. )
  126. var (
  127. missesTreshold = uint64(500000)
  128. matchIdxNotOk = 20
  129. )
  130. func (parser *Parser) mustCompile() { // until we can use yaml.UnmarshalYAML with embedded pointer struct
  131. for _, p := range parser.UA {
  132. p.Reg = compileRegex(p.Flags, p.Expr)
  133. p.setDefaults()
  134. }
  135. for _, p := range parser.OS {
  136. p.Reg = compileRegex(p.Flags, p.Expr)
  137. p.setDefaults()
  138. }
  139. for _, p := range parser.Device {
  140. p.Reg = compileRegex(p.Flags, p.Expr)
  141. p.setDefaults()
  142. }
  143. }
  144. func NewWithOptions(regexFile string, mode, treshold, topCnt int, useSort, debugMode bool) (*Parser, error) {
  145. data, err := ioutil.ReadFile(regexFile)
  146. if nil != err {
  147. return nil, err
  148. }
  149. if topCnt >= 0 {
  150. matchIdxNotOk = topCnt
  151. }
  152. if treshold > cMinMissesTreshold {
  153. missesTreshold = uint64(treshold)
  154. }
  155. parser, err := NewFromBytes(data)
  156. if err != nil {
  157. return nil, err
  158. }
  159. parser.Mode = mode
  160. parser.UseSort = useSort
  161. parser.debugMode = debugMode
  162. return parser, nil
  163. }
  164. func New(regexFile string) (*Parser, error) {
  165. data, err := ioutil.ReadFile(regexFile)
  166. if nil != err {
  167. return nil, err
  168. }
  169. matchIdxNotOk = cDefaultMatchIdxNotOk
  170. missesTreshold = cDefaultMissesTreshold
  171. parser, err := NewFromBytes(data)
  172. if err != nil {
  173. return nil, err
  174. }
  175. return parser, nil
  176. }
  177. func NewFromSaved() *Parser {
  178. parser, err := NewFromBytes(definitionYaml)
  179. if err != nil {
  180. // if the YAML is malformed, it's a programmatic error inside what
  181. // we've statically-compiled in our binary. Panic!
  182. panic(err.Error())
  183. }
  184. return parser
  185. }
  186. func NewFromBytes(data []byte) (*Parser, error) {
  187. var definitions RegexesDefinitions
  188. if err := yaml.Unmarshal(data, &definitions); err != nil {
  189. return nil, err
  190. }
  191. parser := &Parser{definitions, 0, 0, 0, (EOsLookUpMode|EUserAgentLookUpMode|EDeviceLookUpMode), false, false}
  192. parser.mustCompile()
  193. return parser, nil
  194. }
  195. func (parser *Parser) Parse(line string) *Client {
  196. cli := new(Client)
  197. var wg sync.WaitGroup
  198. if EUserAgentLookUpMode & parser.Mode == EUserAgentLookUpMode {
  199. wg.Add(1)
  200. go func() {
  201. defer wg.Done()
  202. parser.RLock()
  203. cli.UserAgent = parser.ParseUserAgent(line)
  204. parser.RUnlock()
  205. }()
  206. }
  207. if EOsLookUpMode & parser.Mode == EOsLookUpMode {
  208. wg.Add(1)
  209. go func() {
  210. defer wg.Done()
  211. parser.RLock()
  212. cli.Os = parser.ParseOs(line)
  213. parser.RUnlock()
  214. }()
  215. }
  216. if EDeviceLookUpMode & parser.Mode == EDeviceLookUpMode {
  217. wg.Add(1)
  218. go func() {
  219. defer wg.Done()
  220. parser.RLock()
  221. cli.Device = parser.ParseDevice(line)
  222. parser.RUnlock()
  223. }()
  224. }
  225. wg.Wait()
  226. if parser.UseSort == true {
  227. checkAndSort(parser)
  228. }
  229. return cli
  230. }
  231. func (parser *Parser) ParseUserAgent(line string) *UserAgent {
  232. ua := new(UserAgent)
  233. foundIdx := -1
  234. found := false
  235. for i, uaPattern := range parser.UA {
  236. uaPattern.Match(line, ua)
  237. if len(ua.Family) > 0 {
  238. found = true
  239. foundIdx = i
  240. atomic.AddUint64(&uaPattern.MatchesCount, 1)
  241. break
  242. }
  243. }
  244. if !found {
  245. ua.Family = "Other"
  246. }
  247. if(foundIdx > matchIdxNotOk) {
  248. atomic.AddUint64(&parser.UserAgentMisses, 1)
  249. }
  250. return ua
  251. }
  252. func (parser *Parser) ParseOs(line string) *Os {
  253. os := new(Os)
  254. foundIdx := -1
  255. found := false
  256. for i, osPattern := range parser.OS {
  257. osPattern.Match(line, os)
  258. if len(os.Family) > 0 {
  259. found = true
  260. foundIdx = i
  261. atomic.AddUint64(&osPattern.MatchesCount, 1)
  262. break
  263. }
  264. }
  265. if !found {
  266. os.Family = "Other"
  267. }
  268. if(foundIdx > matchIdxNotOk) {
  269. atomic.AddUint64(&parser.OsMisses, 1)
  270. }
  271. return os
  272. }
  273. func (parser *Parser) ParseDevice(line string) *Device {
  274. dvc := new(Device)
  275. foundIdx := -1
  276. found := false
  277. for i, dvcPattern := range parser.Device {
  278. dvcPattern.Match(line, dvc)
  279. if len(dvc.Family) > 0 {
  280. found = true
  281. foundIdx = i
  282. atomic.AddUint64(&dvcPattern.MatchesCount, 1)
  283. break
  284. }
  285. }
  286. if !found {
  287. dvc.Family = "Other"
  288. }
  289. if(foundIdx > matchIdxNotOk) {
  290. atomic.AddUint64(&parser.DeviceMisses, 1)
  291. }
  292. return dvc
  293. }
  294. func checkAndSort(parser *Parser) {
  295. parser.Lock()
  296. if(atomic.LoadUint64(&parser.UserAgentMisses) >= missesTreshold) {
  297. if parser.debugMode {
  298. fmt.Printf("%s\tSorting UserAgents slice\n", time.Now());
  299. }
  300. parser.UserAgentMisses = 0
  301. sort.Sort(UserAgentSorter(parser.UA));
  302. }
  303. parser.Unlock()
  304. parser.Lock()
  305. if(atomic.LoadUint64(&parser.OsMisses) >= missesTreshold) {
  306. if parser.debugMode {
  307. fmt.Printf("%s\tSorting OS slice\n", time.Now());
  308. }
  309. parser.OsMisses = 0
  310. sort.Sort(OsSorter(parser.OS));
  311. }
  312. parser.Unlock()
  313. parser.Lock()
  314. if(atomic.LoadUint64(&parser.DeviceMisses) >= missesTreshold) {
  315. if parser.debugMode {
  316. fmt.Printf("%s\tSorting Device slice\n", time.Now());
  317. }
  318. parser.DeviceMisses = 0
  319. sort.Sort(DeviceSorter(parser.Device));
  320. }
  321. parser.Unlock()
  322. }
  323. func compileRegex(flags, expr string) *regexp.Regexp {
  324. if flags == "" {
  325. return regexp.MustCompile(expr)
  326. } else {
  327. return regexp.MustCompile(fmt.Sprintf("(?%s)%s", flags, expr))
  328. }
  329. }