66
77from progress .bar import Bar
88
9+ from scanoss .constants import DEFAULT_HFH_DEPTH
910from scanoss .file_filters import FileFilters
1011from scanoss .scanoss_settings import ScanossSettings
1112from scanoss .scanossbase import ScanossBase
@@ -72,13 +73,20 @@ class FolderHasher:
7273
7374 It builds a directory tree (DirectoryNode) and computes the associated
7475 hash data for the folder.
76+
77+ Args:
78+ scan_dir (str): The directory to be hashed.
79+ config (FolderHasherConfig): Configuration parameters for the folder hasher.
80+ scanoss_settings (Optional[ScanossSettings]): Optional settings for Scanoss.
81+ depth (int): How many levels to hash from the root directory (default: 1).
7582 """
7683
7784 def __init__ (
7885 self ,
7986 scan_dir : str ,
8087 config : FolderHasherConfig ,
8188 scanoss_settings : Optional [ScanossSettings ] = None ,
89+ depth : int = DEFAULT_HFH_DEPTH ,
8290 ):
8391 self .base = ScanossBase (
8492 debug = config .debug ,
@@ -101,6 +109,7 @@ def __init__(
101109
102110 self .scan_dir = scan_dir
103111 self .tree = None
112+ self .depth = depth
104113
105114 def hash_directory (self , path : str ) -> dict :
106115 """
@@ -123,7 +132,10 @@ def hash_directory(self, path: str) -> dict:
123132
124133 return tree
125134
126- def _build_root_node (self , path : str ) -> DirectoryNode :
135+ def _build_root_node (
136+ self ,
137+ path : str ,
138+ ) -> DirectoryNode :
127139 """
128140 Build a directory tree from the given path with file information.
129141
@@ -180,7 +192,7 @@ def _build_root_node(self, path: str) -> DirectoryNode:
180192 bar .finish ()
181193 return root_node
182194
183- def _hash_calc_from_node (self , node : DirectoryNode ) -> dict :
195+ def _hash_calc_from_node (self , node : DirectoryNode , current_depth : int = 1 ) -> dict :
184196 """
185197 Recursively compute folder hash data for a directory node.
186198
@@ -189,12 +201,13 @@ def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
189201
190202 Args:
191203 node (DirectoryNode): The directory node to compute the hash for.
204+ current_depth (int): The current depth level (1-based, root is depth 1).
192205
193206 Returns:
194207 dict: The computed hash data for the node.
195208 """
196209 hash_data = self ._hash_calc (node )
197-
210+
198211 # Safely calculate relative path
199212 try :
200213 node_path = Path (node .path ).resolve ()
@@ -204,13 +217,18 @@ def _hash_calc_from_node(self, node: DirectoryNode) -> dict:
204217 # If relative_to fails, use the node path as is or a fallback
205218 rel_path = Path (node .path ).name if node .path else Path ('.' )
206219
220+ # Only process children if we haven't reached the depth limit
221+ children = []
222+ if current_depth < self .depth :
223+ children = [self ._hash_calc_from_node (child , current_depth + 1 ) for child in node .children .values ()]
224+
207225 return {
208226 'path_id' : str (rel_path ),
209227 'sim_hash_names' : f'{ hash_data ["name_hash" ]:02x} ' if hash_data ['name_hash' ] is not None else None ,
210228 'sim_hash_content' : f'{ hash_data ["content_hash" ]:02x} ' if hash_data ['content_hash' ] is not None else None ,
211229 'sim_hash_dir_names' : f'{ hash_data ["dir_hash" ]:02x} ' if hash_data ['dir_hash' ] is not None else None ,
212230 'lang_extensions' : hash_data ['lang_extensions' ],
213- 'children' : [ self . _hash_calc_from_node ( child ) for child in node . children . values ()] ,
231+ 'children' : children ,
214232 }
215233
216234 def _hash_calc (self , node : DirectoryNode ) -> dict :
0 commit comments