@@ -86,6 +86,9 @@ struct bucket {
8686 };
8787};
8888
89+ #define HASHTAB_MAP_LOCK_COUNT 8
90+ #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
91+
8992struct bpf_htab {
9093 struct bpf_map map ;
9194 struct bucket * buckets ;
@@ -100,6 +103,7 @@ struct bpf_htab {
100103 u32 elem_size ; /* size of each element in bytes */
101104 u32 hashrnd ;
102105 struct lock_class_key lockdep_key ;
106+ int __percpu * map_locked [HASHTAB_MAP_LOCK_COUNT ];
103107};
104108
105109/* each htab element is struct htab_elem + key + value */
@@ -152,26 +156,41 @@ static void htab_init_buckets(struct bpf_htab *htab)
152156 }
153157}
154158
155- static inline unsigned long htab_lock_bucket (const struct bpf_htab * htab ,
156- struct bucket * b )
159+ static inline int htab_lock_bucket (const struct bpf_htab * htab ,
160+ struct bucket * b , u32 hash ,
161+ unsigned long * pflags )
157162{
158163 unsigned long flags ;
159164
165+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
166+
167+ migrate_disable ();
168+ if (unlikely (__this_cpu_inc_return (* (htab -> map_locked [hash ])) != 1 )) {
169+ __this_cpu_dec (* (htab -> map_locked [hash ]));
170+ migrate_enable ();
171+ return - EBUSY ;
172+ }
173+
160174 if (htab_use_raw_lock (htab ))
161175 raw_spin_lock_irqsave (& b -> raw_lock , flags );
162176 else
163177 spin_lock_irqsave (& b -> lock , flags );
164- return flags ;
178+ * pflags = flags ;
179+
180+ return 0 ;
165181}
166182
167183static inline void htab_unlock_bucket (const struct bpf_htab * htab ,
168- struct bucket * b ,
184+ struct bucket * b , u32 hash ,
169185 unsigned long flags )
170186{
187+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
171188 if (htab_use_raw_lock (htab ))
172189 raw_spin_unlock_irqrestore (& b -> raw_lock , flags );
173190 else
174191 spin_unlock_irqrestore (& b -> lock , flags );
192+ __this_cpu_dec (* (htab -> map_locked [hash ]));
193+ migrate_enable ();
175194}
176195
177196static bool htab_lru_map_delete_node (void * arg , struct bpf_lru_node * node );
@@ -429,8 +448,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
429448 bool percpu_lru = (attr -> map_flags & BPF_F_NO_COMMON_LRU );
430449 bool prealloc = !(attr -> map_flags & BPF_F_NO_PREALLOC );
431450 struct bpf_htab * htab ;
451+ int err , i ;
432452 u64 cost ;
433- int err ;
434453
435454 htab = kzalloc (sizeof (* htab ), GFP_USER );
436455 if (!htab )
@@ -487,6 +506,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
487506 if (!htab -> buckets )
488507 goto free_charge ;
489508
509+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ ) {
510+ htab -> map_locked [i ] = __alloc_percpu_gfp (sizeof (int ),
511+ sizeof (int ), GFP_USER );
512+ if (!htab -> map_locked [i ])
513+ goto free_map_locked ;
514+ }
515+
490516 if (htab -> map .map_flags & BPF_F_ZERO_SEED )
491517 htab -> hashrnd = 0 ;
492518 else
@@ -497,7 +523,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
497523 if (prealloc ) {
498524 err = prealloc_init (htab );
499525 if (err )
500- goto free_buckets ;
526+ goto free_map_locked ;
501527
502528 if (!percpu && !lru ) {
503529 /* lru itself can remove the least used element, so
@@ -513,7 +539,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
513539
514540free_prealloc :
515541 prealloc_destroy (htab );
516- free_buckets :
542+ free_map_locked :
543+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
544+ free_percpu (htab -> map_locked [i ]);
517545 bpf_map_area_free (htab -> buckets );
518546free_charge :
519547 bpf_map_charge_finish (& htab -> map .memory );
@@ -694,20 +722,23 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
694722 struct hlist_nulls_node * n ;
695723 unsigned long flags ;
696724 struct bucket * b ;
725+ int ret ;
697726
698727 tgt_l = container_of (node , struct htab_elem , lru_node );
699728 b = __select_bucket (htab , tgt_l -> hash );
700729 head = & b -> head ;
701730
702- flags = htab_lock_bucket (htab , b );
731+ ret = htab_lock_bucket (htab , b , tgt_l -> hash , & flags );
732+ if (ret )
733+ return false;
703734
704735 hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
705736 if (l == tgt_l ) {
706737 hlist_nulls_del_rcu (& l -> hash_node );
707738 break ;
708739 }
709740
710- htab_unlock_bucket (htab , b , flags );
741+ htab_unlock_bucket (htab , b , tgt_l -> hash , flags );
711742
712743 return l == tgt_l ;
713744}
@@ -979,7 +1010,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
9791010 */
9801011 }
9811012
982- flags = htab_lock_bucket (htab , b );
1013+ ret = htab_lock_bucket (htab , b , hash , & flags );
1014+ if (ret )
1015+ return ret ;
9831016
9841017 l_old = lookup_elem_raw (head , hash , key , key_size );
9851018
@@ -1020,7 +1053,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
10201053 }
10211054 ret = 0 ;
10221055err :
1023- htab_unlock_bucket (htab , b , flags );
1056+ htab_unlock_bucket (htab , b , hash , flags );
10241057 return ret ;
10251058}
10261059
@@ -1058,7 +1091,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
10581091 return - ENOMEM ;
10591092 memcpy (l_new -> key + round_up (map -> key_size , 8 ), value , map -> value_size );
10601093
1061- flags = htab_lock_bucket (htab , b );
1094+ ret = htab_lock_bucket (htab , b , hash , & flags );
1095+ if (ret )
1096+ return ret ;
10621097
10631098 l_old = lookup_elem_raw (head , hash , key , key_size );
10641099
@@ -1077,7 +1112,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
10771112 ret = 0 ;
10781113
10791114err :
1080- htab_unlock_bucket (htab , b , flags );
1115+ htab_unlock_bucket (htab , b , hash , flags );
10811116
10821117 if (ret )
10831118 bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
@@ -1112,7 +1147,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
11121147 b = __select_bucket (htab , hash );
11131148 head = & b -> head ;
11141149
1115- flags = htab_lock_bucket (htab , b );
1150+ ret = htab_lock_bucket (htab , b , hash , & flags );
1151+ if (ret )
1152+ return ret ;
11161153
11171154 l_old = lookup_elem_raw (head , hash , key , key_size );
11181155
@@ -1135,7 +1172,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
11351172 }
11361173 ret = 0 ;
11371174err :
1138- htab_unlock_bucket (htab , b , flags );
1175+ htab_unlock_bucket (htab , b , hash , flags );
11391176 return ret ;
11401177}
11411178
@@ -1175,7 +1212,9 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
11751212 return - ENOMEM ;
11761213 }
11771214
1178- flags = htab_lock_bucket (htab , b );
1215+ ret = htab_lock_bucket (htab , b , hash , & flags );
1216+ if (ret )
1217+ return ret ;
11791218
11801219 l_old = lookup_elem_raw (head , hash , key , key_size );
11811220
@@ -1197,7 +1236,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
11971236 }
11981237 ret = 0 ;
11991238err :
1200- htab_unlock_bucket (htab , b , flags );
1239+ htab_unlock_bucket (htab , b , hash , flags );
12011240 if (l_new )
12021241 bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
12031242 return ret ;
@@ -1225,7 +1264,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
12251264 struct htab_elem * l ;
12261265 unsigned long flags ;
12271266 u32 hash , key_size ;
1228- int ret = - ENOENT ;
1267+ int ret ;
12291268
12301269 WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
12311270
@@ -1235,17 +1274,20 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
12351274 b = __select_bucket (htab , hash );
12361275 head = & b -> head ;
12371276
1238- flags = htab_lock_bucket (htab , b );
1277+ ret = htab_lock_bucket (htab , b , hash , & flags );
1278+ if (ret )
1279+ return ret ;
12391280
12401281 l = lookup_elem_raw (head , hash , key , key_size );
12411282
12421283 if (l ) {
12431284 hlist_nulls_del_rcu (& l -> hash_node );
12441285 free_htab_elem (htab , l );
1245- ret = 0 ;
1286+ } else {
1287+ ret = - ENOENT ;
12461288 }
12471289
1248- htab_unlock_bucket (htab , b , flags );
1290+ htab_unlock_bucket (htab , b , hash , flags );
12491291 return ret ;
12501292}
12511293
@@ -1257,7 +1299,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
12571299 struct htab_elem * l ;
12581300 unsigned long flags ;
12591301 u32 hash , key_size ;
1260- int ret = - ENOENT ;
1302+ int ret ;
12611303
12621304 WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
12631305
@@ -1267,16 +1309,18 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
12671309 b = __select_bucket (htab , hash );
12681310 head = & b -> head ;
12691311
1270- flags = htab_lock_bucket (htab , b );
1312+ ret = htab_lock_bucket (htab , b , hash , & flags );
1313+ if (ret )
1314+ return ret ;
12711315
12721316 l = lookup_elem_raw (head , hash , key , key_size );
12731317
1274- if (l ) {
1318+ if (l )
12751319 hlist_nulls_del_rcu (& l -> hash_node );
1276- ret = 0 ;
1277- }
1320+ else
1321+ ret = - ENOENT ;
12781322
1279- htab_unlock_bucket (htab , b , flags );
1323+ htab_unlock_bucket (htab , b , hash , flags );
12801324 if (l )
12811325 bpf_lru_push_free (& htab -> lru , & l -> lru_node );
12821326 return ret ;
@@ -1302,6 +1346,7 @@ static void delete_all_elements(struct bpf_htab *htab)
13021346static void htab_map_free (struct bpf_map * map )
13031347{
13041348 struct bpf_htab * htab = container_of (map , struct bpf_htab , map );
1349+ int i ;
13051350
13061351 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
13071352 * bpf_free_used_maps() is called after bpf prog is no longer executing.
@@ -1320,6 +1365,8 @@ static void htab_map_free(struct bpf_map *map)
13201365 free_percpu (htab -> extra_elems );
13211366 bpf_map_area_free (htab -> buckets );
13221367 lockdep_unregister_key (& htab -> lockdep_key );
1368+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
1369+ free_percpu (htab -> map_locked [i ]);
13231370 kfree (htab );
13241371}
13251372
@@ -1423,8 +1470,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14231470 b = & htab -> buckets [batch ];
14241471 head = & b -> head ;
14251472 /* do not grab the lock unless need it (bucket_cnt > 0). */
1426- if (locked )
1427- flags = htab_lock_bucket (htab , b );
1473+ if (locked ) {
1474+ ret = htab_lock_bucket (htab , b , batch , & flags );
1475+ if (ret )
1476+ goto next_batch ;
1477+ }
14281478
14291479 bucket_cnt = 0 ;
14301480 hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
@@ -1441,7 +1491,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14411491 /* Note that since bucket_cnt > 0 here, it is implicit
14421492 * that the locked was grabbed, so release it.
14431493 */
1444- htab_unlock_bucket (htab , b , flags );
1494+ htab_unlock_bucket (htab , b , batch , flags );
14451495 rcu_read_unlock ();
14461496 bpf_enable_instrumentation ();
14471497 goto after_loop ;
@@ -1452,7 +1502,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14521502 /* Note that since bucket_cnt > 0 here, it is implicit
14531503 * that the locked was grabbed, so release it.
14541504 */
1455- htab_unlock_bucket (htab , b , flags );
1505+ htab_unlock_bucket (htab , b , batch , flags );
14561506 rcu_read_unlock ();
14571507 bpf_enable_instrumentation ();
14581508 kvfree (keys );
@@ -1505,7 +1555,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
15051555 dst_val += value_size ;
15061556 }
15071557
1508- htab_unlock_bucket (htab , b , flags );
1558+ htab_unlock_bucket (htab , b , batch , flags );
15091559 locked = false;
15101560
15111561 while (node_to_free ) {
0 commit comments