@@ -398,7 +398,7 @@ def _should_recycle_connection(self, conn):
398398
399399 return False
400400
401- def _maybe_connect (self , node_id ):
401+ def _init_connect (self , node_id ):
402402 """Idempotent non-blocking connection attempt to the given node id.
403403
404404 Returns True if connection object exists and is connected / connecting
@@ -427,10 +427,8 @@ def _maybe_connect(self, node_id):
427427 ** self .config )
428428 self ._conns [node_id ] = conn
429429
430- elif conn .connected ():
431- return True
432-
433- conn .connect ()
430+ if conn .disconnected ():
431+ conn .connect ()
434432 return not conn .disconnected ()
435433
436434 def ready (self , node_id , metadata_priority = True ):
@@ -621,15 +619,18 @@ def poll(self, timeout_ms=None, future=None):
621619 if self ._closed :
622620 break
623621
624- # Send a metadata request if needed (or initiate new connection)
625- metadata_timeout_ms = self ._maybe_refresh_metadata ()
626-
627622 # Attempt to complete pending connections
628623 for node_id in list (self ._connecting ):
629624 # False return means no more connection progress is possible
630625 # Connected nodes will update _connecting via state_change callback
631- if not self ._maybe_connect (node_id ):
632- self ._connecting .remove (node_id )
626+ if not self ._init_connect (node_id ):
627+ # It's possible that the connection attempt triggered a state change
628+ # but if not, make sure to remove from _connecting list
629+ if node_id in self ._connecting :
630+ self ._connecting .remove (node_id )
631+
632+ # Send a metadata request if needed (or initiate new connection)
633+ metadata_timeout_ms = self ._maybe_refresh_metadata ()
633634
634635 # If we got a future that is already done, don't block in _poll
635636 if future is not None and future .is_done :
@@ -679,6 +680,8 @@ def _poll(self, timeout):
679680 self ._register_send_sockets ()
680681
681682 start_select = time .time ()
683+ if timeout == float ('inf' ):
684+ timeout = None
682685 ready = self ._selector .select (timeout )
683686 end_select = time .time ()
684687 if self ._sensors :
@@ -893,6 +896,26 @@ def _maybe_refresh_metadata(self, wakeup=False):
893896 log .debug ("Give up sending metadata request since no node is available. (reconnect delay %d ms)" , next_connect_ms )
894897 return next_connect_ms
895898
899+ if not self ._can_send_request (node_id ):
900+ # If there's any connection establishment underway, wait until it completes. This prevents
901+ # the client from unnecessarily connecting to additional nodes while a previous connection
902+ # attempt has not been completed.
903+ if self ._connecting :
904+ return float ('inf' )
905+
906+ elif self ._can_connect (node_id ):
907+ log .debug ("Initializing connection to node %s for metadata request" , node_id )
908+ self ._connecting .add (node_id )
909+ if not self ._init_connect (node_id ):
910+ if node_id in self ._connecting :
911+ self ._connecting .remove (node_id )
912+ # Connection attempt failed immediately, need to retry with a different node
913+ return self .config ['reconnect_backoff_ms' ]
914+ else :
915+ # Existing connection with max in flight requests. Wait for request to complete.
916+ return self .config ['request_timeout_ms' ]
917+
918+ # Recheck node_id in case we were able to connect immediately above
896919 if self ._can_send_request (node_id ):
897920 topics = list (self ._topics )
898921 if not topics and self .cluster .is_bootstrap (node_id ):
@@ -917,20 +940,11 @@ def refresh_done(val_or_error):
917940 future .add_errback (refresh_done )
918941 return self .config ['request_timeout_ms' ]
919942
920- # If there's any connection establishment underway, wait until it completes. This prevents
921- # the client from unnecessarily connecting to additional nodes while a previous connection
922- # attempt has not been completed.
943+ # Should only get here if still connecting
923944 if self ._connecting :
924945 return float ('inf' )
925-
926- if self .maybe_connect (node_id , wakeup = wakeup ):
927- log .debug ("Initializing connection to node %s for metadata request" , node_id )
928- return float ('inf' )
929-
930- # connected but can't send more, OR connecting
931- # In either case we just need to wait for a network event
932- # to let us know the selected connection might be usable again.
933- return float ('inf' )
946+ else :
947+ return self .config ['reconnect_backoff_ms' ]
934948
935949 def get_api_versions (self ):
936950 """Return the ApiVersions map, if available.
@@ -973,7 +987,7 @@ def check_version(self, node_id=None, timeout=None, strict=False):
973987 if try_node is None :
974988 self ._lock .release ()
975989 raise Errors .NoBrokersAvailable ()
976- if not self ._maybe_connect (try_node ):
990+ if not self ._init_connect (try_node ):
977991 if try_node == node_id :
978992 raise Errors .NodeNotReadyError ("Connection failed to %s" % node_id )
979993 else :
0 commit comments