@@ -27,11 +27,14 @@ func (s *ScanServiceImpl) ScanFolder(ctx context.Context, req *entities.ScanRequ
27
27
return nil , err
28
28
}
29
29
30
- results , err := s .scanNode (ctx , req .Root , req .RankThreshold , req .RecursiveThreshold , true )
30
+ results , err := s .scanNode (ctx , req .Root , req .RankThreshold , req .RecursiveThreshold , req . MinAcceptedScore , true )
31
31
if err != nil {
32
32
return nil , err
33
33
}
34
34
35
+ // Deduplicate components across folders, keeping only the highest scoring instance
36
+ results = s .deduplicateComponents (results )
37
+
35
38
response := & entities.ScanResponse {
36
39
Results : results ,
37
40
}
@@ -41,29 +44,45 @@ func (s *ScanServiceImpl) ScanFolder(ctx context.Context, req *entities.ScanRequ
41
44
return response , nil
42
45
}
43
46
44
- func (s * ScanServiceImpl ) processComponentGroups (componentGroups []entities.ComponentGroup , path string ) []* entities.ScanResult {
47
+ func (s * ScanServiceImpl ) processComponentGroups (componentGroups []entities.ComponentGroup , path string , minAcceptedScore float32 ) []* entities.ScanResult {
45
48
if len (componentGroups ) == 0 {
46
49
return []* entities.ScanResult {}
47
50
}
48
51
49
52
var results []* entities.ScanResult
53
+ var filteredGroups []* entities.ComponentGroup
50
54
51
- result := & entities.ScanResult {
52
- PathID : path ,
53
- ComponentGroups : make ([]* entities.ComponentGroup , len (componentGroups )),
54
- }
55
+ // Filter component groups based on minimum accepted score
56
+ for _ , group := range componentGroups {
57
+ // Filter versions within the group
58
+ var filteredVersions []entities.Version
59
+ for _ , version := range group .Versions {
60
+ if version .Score > minAcceptedScore {
61
+ filteredVersions = append (filteredVersions , version )
62
+ }
63
+ }
55
64
56
- for i , group := range componentGroups {
57
- groupCopy := group
58
- result .ComponentGroups [i ] = & groupCopy
65
+ // Only include the group if it has at least one version above the threshold
66
+ if len (filteredVersions ) > 0 {
67
+ groupCopy := group
68
+ groupCopy .Versions = filteredVersions
69
+ filteredGroups = append (filteredGroups , & groupCopy )
70
+ }
59
71
}
60
72
61
- results = append (results , result )
73
+ // Only create a result if we have filtered groups
74
+ if len (filteredGroups ) > 0 {
75
+ result := & entities.ScanResult {
76
+ PathID : path ,
77
+ ComponentGroups : filteredGroups ,
78
+ }
79
+ results = append (results , result )
80
+ }
62
81
63
82
return results
64
83
}
65
84
66
- func (s * ScanServiceImpl ) scanNode (ctx context.Context , node * entities.FolderNode , rankThreshold int , recursiveThreshold float32 , isRoot bool ) ([]* entities.ScanResult , error ) {
85
+ func (s * ScanServiceImpl ) scanNode (ctx context.Context , node * entities.FolderNode , rankThreshold int , recursiveThreshold float32 , minAcceptedScore float32 , isRoot bool ) ([]* entities.ScanResult , error ) {
67
86
logger := ctxzap .Extract (ctx ).Sugar ()
68
87
69
88
if node .SimHashDirNames == "" && node .SimHashNames == "" && node .SimHashContent == "" {
@@ -86,15 +105,15 @@ func (s *ScanServiceImpl) scanNode(ctx context.Context, node *entities.FolderNod
86
105
// Check if any component group has a version with score >= recursiveThreshold
87
106
if shouldCheckThreshold && recursiveThreshold > 0 && s .hasHighScoreMatch (componentGroups , recursiveThreshold ) {
88
107
logger .Infof ("Found high score match (>= %f) for node %s, stopping search" , recursiveThreshold , node .PathID )
89
- results := s .processComponentGroups (componentGroups , node .PathID )
108
+ results := s .processComponentGroups (componentGroups , node .PathID , minAcceptedScore )
90
109
return results , nil
91
110
}
92
111
93
- results := s .processComponentGroups (componentGroups , node .PathID )
112
+ results := s .processComponentGroups (componentGroups , node .PathID , minAcceptedScore )
94
113
95
114
if len (node .Children ) > 0 {
96
115
for _ , child := range node .Children {
97
- childResults , err := s .scanNode (ctx , child , rankThreshold , recursiveThreshold , false )
116
+ childResults , err := s .scanNode (ctx , child , rankThreshold , recursiveThreshold , minAcceptedScore , false )
98
117
if err != nil {
99
118
return nil , err
100
119
}
@@ -116,3 +135,69 @@ func (s *ScanServiceImpl) hasHighScoreMatch(componentGroups []entities.Component
116
135
}
117
136
return false
118
137
}
138
+
139
+ // deduplicateComponents removes duplicate components across folders, keeping only the highest scoring instance
140
+ func (s * ScanServiceImpl ) deduplicateComponents (results []* entities.ScanResult ) []* entities.ScanResult {
141
+ if len (results ) == 0 {
142
+ return results
143
+ }
144
+
145
+ // Map to track the best component instance: PURL -> (pathID, componentGroup, maxScore)
146
+ type componentInfo struct {
147
+ pathID string
148
+ component * entities.ComponentGroup
149
+ maxScore float32
150
+ }
151
+ bestComponents := make (map [string ]* componentInfo )
152
+
153
+ // Find the highest scoring instance of each component
154
+ for _ , result := range results {
155
+ for _ , group := range result .ComponentGroups {
156
+ // Find the maximum score for this component group
157
+ var maxScore float32
158
+ for _ , version := range group .Versions {
159
+ if version .Score > maxScore {
160
+ maxScore = version .Score
161
+ }
162
+ }
163
+
164
+ // Check if we've seen this component before
165
+ if existing , exists := bestComponents [group .PURL ]; exists {
166
+ // Keep the one with higher score
167
+ if maxScore > existing .maxScore {
168
+ bestComponents [group .PURL ] = & componentInfo {
169
+ pathID : result .PathID ,
170
+ component : group ,
171
+ maxScore : maxScore ,
172
+ }
173
+ }
174
+ } else {
175
+ // First time seeing this component
176
+ bestComponents [group .PURL ] = & componentInfo {
177
+ pathID : result .PathID ,
178
+ component : group ,
179
+ maxScore : maxScore ,
180
+ }
181
+ }
182
+ }
183
+ }
184
+
185
+ // Rebuild results with deduplicated components
186
+ pathToComponents := make (map [string ][]* entities.ComponentGroup )
187
+ for _ , info := range bestComponents {
188
+ pathToComponents [info .pathID ] = append (pathToComponents [info .pathID ], info .component )
189
+ }
190
+
191
+ // Create new result set
192
+ var deduplicatedResults []* entities.ScanResult
193
+ for pathID , components := range pathToComponents {
194
+ if len (components ) > 0 {
195
+ deduplicatedResults = append (deduplicatedResults , & entities.ScanResult {
196
+ PathID : pathID ,
197
+ ComponentGroups : components ,
198
+ })
199
+ }
200
+ }
201
+
202
+ return deduplicatedResults
203
+ }
0 commit comments