1
+ package nginx_log
2
+
3
+ import (
4
+ "regexp"
5
+ "time"
6
+ )
7
+
8
+ // AccessLogEntry represents a parsed access log entry
9
+ type AccessLogEntry struct {
10
+ Timestamp time.Time `json:"timestamp"`
11
+ IP string `json:"ip"`
12
+ RegionCode string `json:"region_code"`
13
+ Province string `json:"province"`
14
+ City string `json:"city"`
15
+ Method string `json:"method"`
16
+ Path string `json:"path"`
17
+ Protocol string `json:"protocol"`
18
+ Status int `json:"status"`
19
+ BytesSent int64 `json:"bytes_sent"`
20
+ Referer string `json:"referer"`
21
+ UserAgent string `json:"user_agent"`
22
+ Browser string `json:"browser"`
23
+ BrowserVer string `json:"browser_version"`
24
+ OS string `json:"os"`
25
+ OSVersion string `json:"os_version"`
26
+ DeviceType string `json:"device_type"`
27
+ RequestTime float64 `json:"request_time,omitempty"`
28
+ UpstreamTime * float64 `json:"upstream_time,omitempty"`
29
+ Raw string `json:"raw"`
30
+ }
31
+
32
+ // LogFormat represents different nginx log format patterns
33
+ type LogFormat struct {
34
+ Name string
35
+ Pattern * regexp.Regexp
36
+ Fields []string
37
+ }
38
+
39
+ // UserAgentParser interface for user agent parsing
40
+ type UserAgentParser interface {
41
+ Parse (userAgent string ) UserAgentInfo
42
+ }
43
+
44
+ // UserAgentInfo represents parsed user agent information
45
+ type UserAgentInfo struct {
46
+ Browser string
47
+ BrowserVer string
48
+ OS string
49
+ OSVersion string
50
+ DeviceType string
51
+ }
52
+
53
+ // Constants for optimization
54
+ const (
55
+ invalidIPString = "invalid"
56
+ )
57
+
58
+ // Valid HTTP methods according to RFC specifications
59
+ var validHTTPMethods = map [string ]bool {
60
+ "GET" : true ,
61
+ "POST" : true ,
62
+ "PUT" : true ,
63
+ "DELETE" : true ,
64
+ "HEAD" : true ,
65
+ "OPTIONS" : true ,
66
+ "PATCH" : true ,
67
+ "TRACE" : true ,
68
+ "CONNECT" : true ,
69
+ }
70
+
71
+ // Common nginx log formats
72
+ var (
73
+ // Standard combined log format
74
+ CombinedFormat = & LogFormat {
75
+ Name : "combined" ,
76
+ Pattern : regexp .MustCompile (`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"(?:\s+(\S+))?(?:\s+(\S+))?` ),
77
+ Fields : []string {"ip" , "remote_user" , "timestamp" , "request" , "status" , "bytes_sent" , "referer" , "user_agent" , "request_time" , "upstream_time" },
78
+ }
79
+
80
+ // Standard main log format
81
+ MainFormat = & LogFormat {
82
+ Name : "main" ,
83
+ Pattern : regexp .MustCompile (`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"` ),
84
+ Fields : []string {"ip" , "remote_user" , "timestamp" , "request" , "status" , "bytes_sent" , "referer" , "user_agent" },
85
+ }
86
+
87
+ // Custom format with more details
88
+ DetailedFormat = & LogFormat {
89
+ Name : "detailed" ,
90
+ Pattern : regexp .MustCompile (`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)" (\S+) (\S+) "([^"]*)" (\S+)` ),
91
+ Fields : []string {"ip" , "remote_user" , "timestamp" , "request" , "status" , "bytes_sent" , "referer" , "user_agent" , "request_time" , "upstream_time" , "x_forwarded_for" , "connection" },
92
+ }
93
+
94
+ // All supported formats
95
+ SupportedFormats = []* LogFormat {DetailedFormat , CombinedFormat , MainFormat }
96
+ )
97
+
98
+ // DetectLogFormat tries to detect the log format from sample lines
99
+ func DetectLogFormat (lines []string ) * LogFormat {
100
+ if len (lines ) == 0 {
101
+ return nil
102
+ }
103
+
104
+ for _ , format := range SupportedFormats {
105
+ matchCount := 0
106
+ for _ , line := range lines {
107
+ if format .Pattern .MatchString (line ) {
108
+ matchCount ++
109
+ }
110
+ }
111
+ // If more than 50% of lines match, consider it a match
112
+ if float64 (matchCount )/ float64 (len (lines )) > 0.5 {
113
+ return format
114
+ }
115
+ }
116
+
117
+ return nil
118
+ }
0 commit comments