Skip to content

Commit d83c4f7

Browse files
committed
perf(nginx-log): use optimized implementation
Replaces the original LogParser with a new OptimizedLogParser for improved performance. Removes legacy parser and related files, updates all usages and tests to use the new parser, and introduces new log format/type definitions. Adds performance benchmarks and streaming log processor for efficient batch processing.
1 parent ff0245f commit d83c4f7

16 files changed

+1581
-590
lines changed

.claude/settings.local.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
"Bash(go test:*)",
1313
"mcp__context7__resolve-library-id",
1414
"mcp__context7__get-library-docs",
15-
"Bash(find:*)"
15+
"Bash(find:*)",
16+
"Bash(sed:*)"
1617
],
1718
"deny": []
1819
}
19-
}
20+
}

internal/nginx_log/analytics_service.go

Lines changed: 0 additions & 11 deletions
This file was deleted.

internal/nginx_log/analytics_service_core.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ import (
1111
// AnalyticsService provides log analytics functionality
1212
type AnalyticsService struct {
1313
indexer *LogIndexer
14-
parser *LogParser
14+
parser *OptimizedLogParser
1515
}
1616

1717
// NewAnalyticsService creates a new analytics service
1818
func NewAnalyticsService() *AnalyticsService {
1919
// Create user agent parser
2020
userAgent := NewSimpleUserAgentParser()
21-
parser := NewLogParser(userAgent)
21+
parser := NewOptimizedLogParser(userAgent)
2222

2323
return &AnalyticsService{
2424
parser: parser,

internal/nginx_log/bleve_stats_service.go

Lines changed: 0 additions & 10 deletions
This file was deleted.

internal/nginx_log/indexer_file.go

Lines changed: 0 additions & 13 deletions
This file was deleted.

internal/nginx_log/log_cache.go

Lines changed: 0 additions & 11 deletions
This file was deleted.

internal/nginx_log/log_formats.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package nginx_log
2+
3+
import (
4+
"regexp"
5+
"time"
6+
)
7+
8+
// AccessLogEntry represents a parsed access log entry
9+
type AccessLogEntry struct {
10+
Timestamp time.Time `json:"timestamp"`
11+
IP string `json:"ip"`
12+
RegionCode string `json:"region_code"`
13+
Province string `json:"province"`
14+
City string `json:"city"`
15+
Method string `json:"method"`
16+
Path string `json:"path"`
17+
Protocol string `json:"protocol"`
18+
Status int `json:"status"`
19+
BytesSent int64 `json:"bytes_sent"`
20+
Referer string `json:"referer"`
21+
UserAgent string `json:"user_agent"`
22+
Browser string `json:"browser"`
23+
BrowserVer string `json:"browser_version"`
24+
OS string `json:"os"`
25+
OSVersion string `json:"os_version"`
26+
DeviceType string `json:"device_type"`
27+
RequestTime float64 `json:"request_time,omitempty"`
28+
UpstreamTime *float64 `json:"upstream_time,omitempty"`
29+
Raw string `json:"raw"`
30+
}
31+
32+
// LogFormat represents different nginx log format patterns
33+
type LogFormat struct {
34+
Name string
35+
Pattern *regexp.Regexp
36+
Fields []string
37+
}
38+
39+
// UserAgentParser interface for user agent parsing
40+
type UserAgentParser interface {
41+
Parse(userAgent string) UserAgentInfo
42+
}
43+
44+
// UserAgentInfo represents parsed user agent information
45+
type UserAgentInfo struct {
46+
Browser string
47+
BrowserVer string
48+
OS string
49+
OSVersion string
50+
DeviceType string
51+
}
52+
53+
// Constants for optimization
54+
const (
55+
invalidIPString = "invalid"
56+
)
57+
58+
// Valid HTTP methods according to RFC specifications
59+
var validHTTPMethods = map[string]bool{
60+
"GET": true,
61+
"POST": true,
62+
"PUT": true,
63+
"DELETE": true,
64+
"HEAD": true,
65+
"OPTIONS": true,
66+
"PATCH": true,
67+
"TRACE": true,
68+
"CONNECT": true,
69+
}
70+
71+
// Common nginx log formats
72+
var (
73+
// Standard combined log format
74+
CombinedFormat = &LogFormat{
75+
Name: "combined",
76+
Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"(?:\s+(\S+))?(?:\s+(\S+))?`),
77+
Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time"},
78+
}
79+
80+
// Standard main log format
81+
MainFormat = &LogFormat{
82+
Name: "main",
83+
Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"`),
84+
Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent"},
85+
}
86+
87+
// Custom format with more details
88+
DetailedFormat = &LogFormat{
89+
Name: "detailed",
90+
Pattern: regexp.MustCompile(`^(\S+) - (\S+) \[([^]]+)\] "([^"]*)" (\d+) (\d+|-) "([^"]*)" "([^"]*)" (\S+) (\S+) "([^"]*)" (\S+)`),
91+
Fields: []string{"ip", "remote_user", "timestamp", "request", "status", "bytes_sent", "referer", "user_agent", "request_time", "upstream_time", "x_forwarded_for", "connection"},
92+
}
93+
94+
// All supported formats
95+
SupportedFormats = []*LogFormat{DetailedFormat, CombinedFormat, MainFormat}
96+
)
97+
98+
// DetectLogFormat tries to detect the log format from sample lines
99+
func DetectLogFormat(lines []string) *LogFormat {
100+
if len(lines) == 0 {
101+
return nil
102+
}
103+
104+
for _, format := range SupportedFormats {
105+
matchCount := 0
106+
for _, line := range lines {
107+
if format.Pattern.MatchString(line) {
108+
matchCount++
109+
}
110+
}
111+
// If more than 50% of lines match, consider it a match
112+
if float64(matchCount)/float64(len(lines)) > 0.5 {
113+
return format
114+
}
115+
}
116+
117+
return nil
118+
}

internal/nginx_log/log_indexer.go

Lines changed: 0 additions & 16 deletions
This file was deleted.

internal/nginx_log/log_indexer_core.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ type LogIndexer struct {
2727
index bleve.Index
2828
cache *ristretto.Cache[string, *CachedSearchResult]
2929
statsCache *ristretto.Cache[string, *CachedStatsResult]
30-
parser *LogParser
30+
parser *OptimizedLogParser
3131
watcher *fsnotify.Watcher
3232
logPaths map[string]*LogFileInfo
3333
mu sync.RWMutex
@@ -99,7 +99,7 @@ func NewLogIndexer() (*LogIndexer, error) {
9999

100100
// Create user agent parser
101101
userAgent := NewSimpleUserAgentParser()
102-
parser := NewLogParser(userAgent)
102+
parser := NewOptimizedLogParser(userAgent)
103103

104104
// Initialize file system watcher
105105
watcher, err := fsnotify.NewWatcher()

0 commit comments

Comments
 (0)