Skip to content

Commit 09cd2ba

Browse files
coresoftware devmatiasdaloia
authored andcommitted
remove file extension filters to match go-minr criteria
1 parent 22e2af2 commit 09cd2ba

File tree

2 files changed

+2
-163
lines changed

2 files changed

+2
-163
lines changed

src/scanoss/file_filters.py

Lines changed: 1 addition & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -269,163 +269,6 @@
269269
'sqlite3',
270270
}
271271

272-
# TODO: For hfh add the .gitignore patterns
273-
DEFAULT_SKIPPED_EXT_HFH = {
274-
'.1',
275-
'.2',
276-
'.3',
277-
'.4',
278-
'.5',
279-
'.6',
280-
'.7',
281-
'.8',
282-
'.9',
283-
'.ac',
284-
'.adoc',
285-
'.am',
286-
'.asciidoc',
287-
'.bmp',
288-
'.build',
289-
'.cfg',
290-
'.chm',
291-
'.class',
292-
'.cmake',
293-
'.cnf',
294-
'.conf',
295-
'.config',
296-
'.contributors',
297-
'.copying',
298-
'.crt',
299-
'.csproj',
300-
'.css',
301-
'.csv',
302-
'.dat',
303-
'.data',
304-
'.dtd',
305-
'.dts',
306-
'.iws',
307-
'.c9',
308-
'.c9revisions',
309-
'.dtsi',
310-
'.dump',
311-
'.eot',
312-
'.eps',
313-
'.geojson',
314-
'.gif',
315-
'.glif',
316-
'.gmo',
317-
'.guess',
318-
'.hex',
319-
'.htm',
320-
'.html',
321-
'.ico',
322-
'.iml',
323-
'.in',
324-
'.inc',
325-
'.info',
326-
'.ini',
327-
'.ipynb',
328-
'.jpeg',
329-
'.jpg',
330-
'.json',
331-
'.jsonld',
332-
'.lock',
333-
'.log',
334-
'.m4',
335-
'.map',
336-
'.md5',
337-
'.meta',
338-
'.mk',
339-
'.mxml',
340-
'.o',
341-
'.otf',
342-
'.out',
343-
'.pbtxt',
344-
'.pdf',
345-
'.pem',
346-
'.phtml',
347-
'.plist',
348-
'.png',
349-
'.prefs',
350-
'.properties',
351-
'.pyc',
352-
'.qdoc',
353-
'.result',
354-
'.rgb',
355-
'.rst',
356-
'.scss',
357-
'.sha',
358-
'.sha1',
359-
'.sha2',
360-
'.sha256',
361-
'.sln',
362-
'.spec',
363-
'.sub',
364-
'.svg',
365-
'.svn-base',
366-
'.tab',
367-
'.template',
368-
'.test',
369-
'.tex',
370-
'.tiff',
371-
'.ttf',
372-
'.txt',
373-
'.utf-8',
374-
'.vim',
375-
'.wav',
376-
'.woff',
377-
'.woff2',
378-
'.xht',
379-
'.xhtml',
380-
'.xml',
381-
'.xpm',
382-
'.xsd',
383-
'.xul',
384-
'.yaml',
385-
'.yml',
386-
'.wfp',
387-
'.editorconfig',
388-
'.dotcover',
389-
'.pid',
390-
'.lcov',
391-
'.egg',
392-
'.manifest',
393-
'.cache',
394-
'.coverage',
395-
'.cover',
396-
'.gem',
397-
'.lst',
398-
'.pickle',
399-
'.pdb',
400-
'.gml',
401-
'.pot',
402-
'.plt',
403-
'.whml',
404-
'.pom',
405-
'.smtml',
406-
'.min.js',
407-
'.mf',
408-
'.base64',
409-
'.s',
410-
'.diff',
411-
'.patch',
412-
'.rules',
413-
# File endings
414-
'-doc',
415-
'config',
416-
'news',
417-
'readme',
418-
'swiftdoc',
419-
'texidoc',
420-
'todo',
421-
'version',
422-
'ignore',
423-
'manifest',
424-
'sqlite',
425-
'sqlite3',
426-
}
427-
428-
429272
class FileFilters(ScanossBase):
430273
"""
431274
Filter for determining which files to process during scanning, fingerprinting, etc.
@@ -709,7 +552,7 @@ def _should_skip_file(self, file_rel_path: str) -> bool: # noqa: PLR0911
709552
file_name = os.path.basename(file_rel_path)
710553

711554
DEFAULT_SKIPPED_FILES_LIST = DEFAULT_SKIPPED_FILES_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_FILES
712-
DEFAULT_SKIPPED_EXT_LIST = DEFAULT_SKIPPED_EXT_HFH if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
555+
DEFAULT_SKIPPED_EXT_LIST = {} if self.is_folder_hashing_scan else DEFAULT_SKIPPED_EXT
713556

714557
if not self.hidden_files_folders and file_name.startswith('.'):
715558
self.print_debug(f'Skipping file: {file_rel_path} (hidden file)')

src/scanoss/scanners/folder_hasher.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
MINIMUM_FILE_COUNT = 8
1818
MINIMUM_CONCATENATED_NAME_LENGTH = 32
19-
MAXIMUM_FILE_NAME_LENGTH = 32
20-
2119

2220
class DirectoryNode:
2321
"""
@@ -152,7 +150,7 @@ def _build_root_node(
152150
root_node = DirectoryNode(str(root))
153151

154152
all_files = [
155-
f for f in root.rglob('*') if f.is_file() and len(f.name.encode('utf-8')) <= MAXIMUM_FILE_NAME_LENGTH
153+
f for f in root.rglob('*') if f.is_file()
156154
]
157155
filtered_files = self.file_filters.get_filtered_files_from_files(all_files, str(root))
158156

@@ -255,8 +253,6 @@ def _hash_calc(self, node: DirectoryNode) -> dict:
255253

256254
for file in node.files:
257255
key_str = file.key_str
258-
if key_str in processed_hashes:
259-
continue
260256

261257
file_name = os.path.basename(file.path)
262258

0 commit comments

Comments
 (0)