| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- package uaparser
- import (
- "fmt"
- "io/ioutil"
- "regexp"
- "sync"
- "sync/atomic"
- "sort"
- "time"
- "gopkg.in/yaml.v2"
- )
- type RegexesDefinitions struct {
- UA []*uaParser `yaml:"user_agent_parsers"`
- OS []*osParser `yaml:"os_parsers"`
- Device []*deviceParser `yaml:"device_parsers"`
- _ [4]byte // padding for alignment
- sync.RWMutex
- }
- type UserAgentSorter []*uaParser
- func (a UserAgentSorter) Len() int { return len(a) }
- func (a UserAgentSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a UserAgentSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
- type uaParser struct {
- Reg *regexp.Regexp
- Expr string `yaml:"regex"`
- Flags string `yaml:"regex_flag"`
- FamilyReplacement string `yaml:"family_replacement"`
- V1Replacement string `yaml:"v1_replacement"`
- V2Replacement string `yaml:"v2_replacement"`
- V3Replacement string `yaml:"v3_replacement"`
- _ [4]byte // padding for alignment
- MatchesCount uint64
- }
- func (ua *uaParser) setDefaults() {
- if ua.FamilyReplacement == "" {
- ua.FamilyReplacement = "$1"
- }
- if ua.V1Replacement == "" {
- ua.V1Replacement = "$2"
- }
- if ua.V2Replacement == "" {
- ua.V2Replacement = "$3"
- }
- if ua.V3Replacement == "" {
- ua.V3Replacement = "$4"
- }
- }
- type OsSorter []*osParser
- func (a OsSorter) Len() int { return len(a) }
- func (a OsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a OsSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
- type osParser struct {
- Reg *regexp.Regexp
- Expr string `yaml:"regex"`
- Flags string `yaml:"regex_flag"`
- OSReplacement string `yaml:"os_replacement"`
- V1Replacement string `yaml:"os_v1_replacement"`
- V2Replacement string `yaml:"os_v2_replacement"`
- V3Replacement string `yaml:"os_v3_replacement"`
- V4Replacement string `yaml:"os_v4_replacement"`
- _ [4]byte // padding for alignment
- MatchesCount uint64
- }
- func (os *osParser) setDefaults() {
- if os.OSReplacement == "" {
- os.OSReplacement = "$1"
- }
- if os.V1Replacement == "" {
- os.V1Replacement = "$2"
- }
- if os.V2Replacement == "" {
- os.V2Replacement = "$3"
- }
- if os.V3Replacement == "" {
- os.V3Replacement = "$4"
- }
- if os.V4Replacement == "" {
- os.V4Replacement = "$5"
- }
- }
- type DeviceSorter []*deviceParser
- func (a DeviceSorter) Len() int { return len(a) }
- func (a DeviceSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a DeviceSorter) Less(i, j int) bool { return atomic.LoadUint64(&a[i].MatchesCount) > atomic.LoadUint64(&a[j].MatchesCount) }
- type deviceParser struct {
- Reg *regexp.Regexp
- Expr string `yaml:"regex"`
- Flags string `yaml:"regex_flag"`
- DeviceReplacement string `yaml:"device_replacement"`
- BrandReplacement string `yaml:"brand_replacement"`
- ModelReplacement string `yaml:"model_replacement"`
- _ [4]byte // padding for alignment
- MatchesCount uint64
- }
- func (device *deviceParser) setDefaults() {
- if device.DeviceReplacement == "" {
- device.DeviceReplacement = "$1"
- }
- if device.ModelReplacement == "" {
- device.ModelReplacement = "$1"
- }
- }
- type Client struct {
- UserAgent *UserAgent
- Os *Os
- Device *Device
- }
- type Parser struct {
- RegexesDefinitions
- UserAgentMisses uint64
- OsMisses uint64
- DeviceMisses uint64
- Mode int
- UseSort bool
- debugMode bool
- }
- const (
- EOsLookUpMode = 1 /* 00000001 */
- EUserAgentLookUpMode = 2 /* 00000010 */
- EDeviceLookUpMode = 4 /* 00000100 */
- cMinMissesTreshold = 100000
- cDefaultMissesTreshold = 500000
- cDefaultMatchIdxNotOk = 20
- cDefaultSortOption = false
- )
- var (
- missesTreshold = uint64(500000)
- matchIdxNotOk = 20
- )
- func (parser *Parser) mustCompile() { // until we can use yaml.UnmarshalYAML with embedded pointer struct
- for _, p := range parser.UA {
- p.Reg = compileRegex(p.Flags, p.Expr)
- p.setDefaults()
- }
- for _, p := range parser.OS {
- p.Reg = compileRegex(p.Flags, p.Expr)
- p.setDefaults()
- }
- for _, p := range parser.Device {
- p.Reg = compileRegex(p.Flags, p.Expr)
- p.setDefaults()
- }
- }
- func NewWithOptions(regexFile string, mode, treshold, topCnt int, useSort, debugMode bool) (*Parser, error) {
- data, err := ioutil.ReadFile(regexFile)
- if nil != err {
- return nil, err
- }
- if topCnt >= 0 {
- matchIdxNotOk = topCnt
- }
- if treshold > cMinMissesTreshold {
- missesTreshold = uint64(treshold)
- }
- parser, err := NewFromBytes(data)
- if err != nil {
- return nil, err
- }
- parser.Mode = mode
- parser.UseSort = useSort
- parser.debugMode = debugMode
- return parser, nil
- }
- func New(regexFile string) (*Parser, error) {
- data, err := ioutil.ReadFile(regexFile)
- if nil != err {
- return nil, err
- }
- matchIdxNotOk = cDefaultMatchIdxNotOk
- missesTreshold = cDefaultMissesTreshold
- parser, err := NewFromBytes(data)
- if err != nil {
- return nil, err
- }
- return parser, nil
- }
- func NewFromSaved() *Parser {
- parser, err := NewFromBytes(definitionYaml)
- if err != nil {
- // if the YAML is malformed, it's a programmatic error inside what
- // we've statically-compiled in our binary. Panic!
- panic(err.Error())
- }
- return parser
- }
- func NewFromBytes(data []byte) (*Parser, error) {
- var definitions RegexesDefinitions
- if err := yaml.Unmarshal(data, &definitions); err != nil {
- return nil, err
- }
- parser := &Parser{definitions, 0, 0, 0, (EOsLookUpMode|EUserAgentLookUpMode|EDeviceLookUpMode), false, false}
- parser.mustCompile()
- return parser, nil
- }
- func (parser *Parser) Parse(line string) *Client {
- cli := new(Client)
- var wg sync.WaitGroup
- if EUserAgentLookUpMode & parser.Mode == EUserAgentLookUpMode {
- wg.Add(1)
- go func() {
- defer wg.Done()
- parser.RLock()
- cli.UserAgent = parser.ParseUserAgent(line)
- parser.RUnlock()
- }()
- }
- if EOsLookUpMode & parser.Mode == EOsLookUpMode {
- wg.Add(1)
- go func() {
- defer wg.Done()
- parser.RLock()
- cli.Os = parser.ParseOs(line)
- parser.RUnlock()
- }()
- }
- if EDeviceLookUpMode & parser.Mode == EDeviceLookUpMode {
- wg.Add(1)
- go func() {
- defer wg.Done()
- parser.RLock()
- cli.Device = parser.ParseDevice(line)
- parser.RUnlock()
- }()
- }
- wg.Wait()
- if parser.UseSort == true {
- checkAndSort(parser)
- }
- return cli
- }
- func (parser *Parser) ParseUserAgent(line string) *UserAgent {
- ua := new(UserAgent)
- foundIdx := -1
- found := false
- for i, uaPattern := range parser.UA {
- uaPattern.Match(line, ua)
- if len(ua.Family) > 0 {
- found = true
- foundIdx = i
- atomic.AddUint64(&uaPattern.MatchesCount, 1)
- break
- }
- }
- if !found {
- ua.Family = "Other"
- }
- if(foundIdx > matchIdxNotOk) {
- atomic.AddUint64(&parser.UserAgentMisses, 1)
- }
- return ua
- }
- func (parser *Parser) ParseOs(line string) *Os {
- os := new(Os)
- foundIdx := -1
- found := false
- for i, osPattern := range parser.OS {
- osPattern.Match(line, os)
- if len(os.Family) > 0 {
- found = true
- foundIdx = i
- atomic.AddUint64(&osPattern.MatchesCount, 1)
- break
- }
- }
- if !found {
- os.Family = "Other"
- }
- if(foundIdx > matchIdxNotOk) {
- atomic.AddUint64(&parser.OsMisses, 1)
- }
- return os
- }
- func (parser *Parser) ParseDevice(line string) *Device {
- dvc := new(Device)
- foundIdx := -1
- found := false
- for i, dvcPattern := range parser.Device {
- dvcPattern.Match(line, dvc)
- if len(dvc.Family) > 0 {
- found = true
- foundIdx = i
- atomic.AddUint64(&dvcPattern.MatchesCount, 1)
- break
- }
- }
- if !found {
- dvc.Family = "Other"
- }
- if(foundIdx > matchIdxNotOk) {
- atomic.AddUint64(&parser.DeviceMisses, 1)
- }
- return dvc
- }
- func checkAndSort(parser *Parser) {
- parser.Lock()
- if(atomic.LoadUint64(&parser.UserAgentMisses) >= missesTreshold) {
- if parser.debugMode {
- fmt.Printf("%s\tSorting UserAgents slice\n", time.Now());
- }
- parser.UserAgentMisses = 0
- sort.Sort(UserAgentSorter(parser.UA));
- }
- parser.Unlock()
- parser.Lock()
- if(atomic.LoadUint64(&parser.OsMisses) >= missesTreshold) {
- if parser.debugMode {
- fmt.Printf("%s\tSorting OS slice\n", time.Now());
- }
- parser.OsMisses = 0
- sort.Sort(OsSorter(parser.OS));
- }
- parser.Unlock()
- parser.Lock()
- if(atomic.LoadUint64(&parser.DeviceMisses) >= missesTreshold) {
- if parser.debugMode {
- fmt.Printf("%s\tSorting Device slice\n", time.Now());
- }
- parser.DeviceMisses = 0
- sort.Sort(DeviceSorter(parser.Device));
- }
- parser.Unlock()
- }
- func compileRegex(flags, expr string) *regexp.Regexp {
- if flags == "" {
- return regexp.MustCompile(expr)
- } else {
- return regexp.MustCompile(fmt.Sprintf("(?%s)%s", flags, expr))
- }
- }
|