From e83fc793a9edbf14b9dd400c32c2c08d0dadf242 Mon Sep 17 00:00:00 2001 From: cyningsun Date: Sat, 30 Aug 2025 22:14:28 +0800 Subject: [PATCH 1/6] async create conn --- async_handoff_integration_test.go | 20 +- internal/pool/bench_test.go | 22 +- internal/pool/buffer_size_test.go | 36 +-- internal/pool/hooks_test.go | 5 +- internal/pool/pool.go | 114 ++++++++- internal/pool/pool_test.go | 388 +++++++++++++++++++++++++++--- options.go | 12 + options_test.go | 74 ++++++ pool_pubsub_bench_test.go | 39 +-- 9 files changed, 614 insertions(+), 96 deletions(-) diff --git a/async_handoff_integration_test.go b/async_handoff_integration_test.go index 7e34bf9d1..9d6c9e52f 100644 --- a/async_handoff_integration_test.go +++ b/async_handoff_integration_test.go @@ -53,8 +53,9 @@ func TestEventDrivenHandoffIntegration(t *testing.T) { Dialer: func(ctx context.Context) (net.Conn, error) { return &mockNetConn{addr: "original:6379"}, nil }, - PoolSize: int32(5), - PoolTimeout: time.Second, + PoolSize: int32(5), + MaxConcurrentDials: 5, + PoolTimeout: time.Second, }) // Add the hook to the pool after creation @@ -153,8 +154,9 @@ func TestEventDrivenHandoffIntegration(t *testing.T) { return &mockNetConn{addr: "original:6379"}, nil }, - PoolSize: int32(10), - PoolTimeout: time.Second, + PoolSize: int32(10), + MaxConcurrentDials: 10, + PoolTimeout: time.Second, }) defer testPool.Close() @@ -225,8 +227,9 @@ func TestEventDrivenHandoffIntegration(t *testing.T) { return &mockNetConn{addr: "original:6379"}, nil }, - PoolSize: int32(3), - PoolTimeout: time.Second, + PoolSize: int32(3), + MaxConcurrentDials: 3, + PoolTimeout: time.Second, }) defer testPool.Close() @@ -288,8 +291,9 @@ func TestEventDrivenHandoffIntegration(t *testing.T) { return &mockNetConn{addr: "original:6379"}, nil }, - PoolSize: int32(2), - PoolTimeout: time.Second, + PoolSize: int32(2), + MaxConcurrentDials: 2, + PoolTimeout: time.Second, }) defer testPool.Close() diff --git a/internal/pool/bench_test.go b/internal/pool/bench_test.go index fc37b8212..5bbd549df 100644 --- a/internal/pool/bench_test.go +++ b/internal/pool/bench_test.go @@ -31,11 +31,12 @@ func BenchmarkPoolGetPut(b *testing.B) { for _, bm := range benchmarks { b.Run(bm.String(), func(b *testing.B) { connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(bm.poolSize), - PoolTimeout: time.Second, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: time.Hour, + Dialer: dummyDialer, + PoolSize: int32(bm.poolSize), + MaxConcurrentDials: bm.poolSize, + PoolTimeout: time.Second, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: time.Hour, }) b.ResetTimer() @@ -75,11 +76,12 @@ func BenchmarkPoolGetRemove(b *testing.B) { for _, bm := range benchmarks { b.Run(bm.String(), func(b *testing.B) { connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(bm.poolSize), - PoolTimeout: time.Second, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: time.Hour, + Dialer: dummyDialer, + PoolSize: int32(bm.poolSize), + MaxConcurrentDials: bm.poolSize, + PoolTimeout: time.Second, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: time.Hour, }) b.ResetTimer() diff --git a/internal/pool/buffer_size_test.go b/internal/pool/buffer_size_test.go index 71223d708..85fc8f529 100644 --- a/internal/pool/buffer_size_test.go +++ b/internal/pool/buffer_size_test.go @@ -25,9 +25,10 @@ var _ = Describe("Buffer Size Configuration", func() { It("should use default buffer sizes when not specified", func() { connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(1), - PoolTimeout: 1000, + Dialer: dummyDialer, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: 1000, }) cn, err := connPool.NewConn(ctx) @@ -47,11 +48,12 @@ var _ = Describe("Buffer Size Configuration", func() { customWriteSize := 64 * 1024 // 64KB connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(1), - PoolTimeout: 1000, - ReadBufferSize: customReadSize, - WriteBufferSize: customWriteSize, + Dialer: dummyDialer, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: 1000, + ReadBufferSize: customReadSize, + WriteBufferSize: customWriteSize, }) cn, err := connPool.NewConn(ctx) @@ -68,11 +70,12 @@ var _ = Describe("Buffer Size Configuration", func() { It("should handle zero buffer sizes by using defaults", func() { connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(1), - PoolTimeout: 1000, - ReadBufferSize: 0, // Should use default - WriteBufferSize: 0, // Should use default + Dialer: dummyDialer, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: 1000, + ReadBufferSize: 0, // Should use default + WriteBufferSize: 0, // Should use default }) cn, err := connPool.NewConn(ctx) @@ -104,9 +107,10 @@ var _ = Describe("Buffer Size Configuration", func() { // Test the scenario where someone creates a pool directly (like in tests) // without setting ReadBufferSize and WriteBufferSize connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(1), - PoolTimeout: 1000, + Dialer: dummyDialer, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: 1000, // ReadBufferSize and WriteBufferSize are not set (will be 0) }) diff --git a/internal/pool/hooks_test.go b/internal/pool/hooks_test.go index e6100115c..18ad1ec5a 100644 --- a/internal/pool/hooks_test.go +++ b/internal/pool/hooks_test.go @@ -177,8 +177,9 @@ func TestPoolWithHooks(t *testing.T) { Dialer: func(ctx context.Context) (net.Conn, error) { return &net.TCPConn{}, nil // Mock connection }, - PoolSize: 1, - DialTimeout: time.Second, + PoolSize: 1, + MaxConcurrentDials: 1, + DialTimeout: time.Second, } pool := NewConnPool(opt) diff --git a/internal/pool/pool.go b/internal/pool/pool.go index b2cdbef5e..6158ca18a 100644 --- a/internal/pool/pool.go +++ b/internal/pool/pool.go @@ -91,6 +91,7 @@ type Options struct { PoolFIFO bool PoolSize int32 + MaxConcurrentDials int DialTimeout time.Duration PoolTimeout time.Duration MinIdleConns int32 @@ -113,13 +114,65 @@ type lastDialErrorWrap struct { err error } +type wantConn struct { + mu sync.Mutex // protects ctx, done and sending of the result + ctx context.Context // context for dial, cleared after delivered or canceled + cancelCtx context.CancelFunc + done bool // true after delivered or canceled + result chan wantConnResult // channel to deliver connection or error +} + +func (w *wantConn) tryDeliver(cn *Conn, err error) bool { + w.mu.Lock() + defer w.mu.Unlock() + if w.done { + return false + } + + w.done = true + w.ctx = nil + + w.result <- wantConnResult{cn: cn, err: err} + close(w.result) + + return true +} + +func (w *wantConn) cancel(ctx context.Context, p *ConnPool) { + w.mu.Lock() + var cn *Conn + if w.done { + select { + case result := <-w.result: + cn = result.cn + default: + } + } else { + close(w.result) + } + + w.done = true + w.ctx = nil + w.mu.Unlock() + + if cn != nil { + p.Put(ctx, cn) + } +} + +type wantConnResult struct { + cn *Conn + err error +} + type ConnPool struct { cfg *Options dialErrorsNum uint32 // atomic lastDialError atomic.Value - queue chan struct{} + queue chan struct{} + dialsInProgress chan struct{} connsMu sync.Mutex conns map[uint64]*Conn @@ -145,9 +198,10 @@ func NewConnPool(opt *Options) *ConnPool { p := &ConnPool{ cfg: opt, - queue: make(chan struct{}, opt.PoolSize), - conns: make(map[uint64]*Conn), - idleConns: make([]*Conn, 0, opt.PoolSize), + queue: make(chan struct{}, opt.PoolSize), + conns: make(map[uint64]*Conn), + dialsInProgress: make(chan struct{}, opt.MaxConcurrentDials), + idleConns: make([]*Conn, 0, opt.PoolSize), } // Only create MinIdleConns if explicitly requested (> 0) @@ -473,9 +527,8 @@ func (p *ConnPool) getConn(ctx context.Context) (*Conn, error) { atomic.AddUint32(&p.stats.Misses, 1) - newcn, err := p.newConn(ctx, true) + newcn, err := p.asyncNewConn(ctx) if err != nil { - p.freeTurn() return nil, err } @@ -495,6 +548,55 @@ func (p *ConnPool) getConn(ctx context.Context) (*Conn, error) { return newcn, nil } +func (p *ConnPool) asyncNewConn(ctx context.Context) (*Conn, error) { + // First try to acquire permission to create a connection + select { + case p.dialsInProgress <- struct{}{}: + // Got permission, proceed to create connection + case <-ctx.Done(): + p.freeTurn() + return nil, ctx.Err() + } + + dialCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), p.cfg.DialTimeout) + + w := &wantConn{ + ctx: dialCtx, + cancelCtx: cancel, + result: make(chan wantConnResult, 1), + } + var err error + defer func() { + if err != nil { + w.cancel(ctx, p) + } + }() + + go func(w *wantConn) { + defer w.cancelCtx() + defer func() { <-p.dialsInProgress }() // Release connection creation permission + + cn, cnErr := p.newConn(w.ctx, true) + delivered := w.tryDeliver(cn, cnErr) + if cnErr == nil && delivered { + return + } else if cnErr == nil && !delivered { + p.Put(w.ctx, cn) + } else { // freeTurn after error + p.freeTurn() + } + }(w) + + select { + case <-ctx.Done(): + err = ctx.Err() + return nil, err + case result := <-w.result: + err = result.err + return result.cn, err + } +} + func (p *ConnPool) waitTurn(ctx context.Context) error { select { case <-ctx.Done(): diff --git a/internal/pool/pool_test.go b/internal/pool/pool_test.go index 6a7870b56..3b3a9db24 100644 --- a/internal/pool/pool_test.go +++ b/internal/pool/pool_test.go @@ -3,6 +3,7 @@ package pool_test import ( "context" "errors" + "fmt" "net" "sync" "sync/atomic" @@ -21,11 +22,12 @@ var _ = Describe("ConnPool", func() { BeforeEach(func() { connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(10), - PoolTimeout: time.Hour, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: time.Millisecond, + Dialer: dummyDialer, + PoolSize: int32(10), + MaxConcurrentDials: 10, + PoolTimeout: time.Hour, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: time.Millisecond, }) }) @@ -47,11 +49,12 @@ var _ = Describe("ConnPool", func() { <-closedChan return &net.TCPConn{}, nil }, - PoolSize: int32(10), - PoolTimeout: time.Hour, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: time.Millisecond, - MinIdleConns: int32(minIdleConns), + PoolSize: int32(10), + MaxConcurrentDials: 10, + PoolTimeout: time.Hour, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: time.Millisecond, + MinIdleConns: int32(minIdleConns), }) wg.Wait() Expect(connPool.Close()).NotTo(HaveOccurred()) @@ -131,12 +134,13 @@ var _ = Describe("MinIdleConns", func() { newConnPool := func() *pool.ConnPool { connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(poolSize), - MinIdleConns: int32(minIdleConns), - PoolTimeout: 100 * time.Millisecond, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: -1, + Dialer: dummyDialer, + PoolSize: int32(poolSize), + MaxConcurrentDials: poolSize, + MinIdleConns: int32(minIdleConns), + PoolTimeout: 100 * time.Millisecond, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: -1, }) Eventually(func() int { return connPool.Len() @@ -310,11 +314,12 @@ var _ = Describe("race", func() { It("does not happen on Get, Put, and Remove", func() { connPool = pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(10), - PoolTimeout: time.Minute, - DialTimeout: 1 * time.Second, - ConnMaxIdleTime: time.Millisecond, + Dialer: dummyDialer, + PoolSize: int32(10), + MaxConcurrentDials: 10, + PoolTimeout: time.Minute, + DialTimeout: 1 * time.Second, + ConnMaxIdleTime: time.Millisecond, }) perform(C, func(id int) { @@ -341,10 +346,11 @@ var _ = Describe("race", func() { Dialer: func(ctx context.Context) (net.Conn, error) { return &net.TCPConn{}, nil }, - PoolSize: int32(1000), - MinIdleConns: int32(50), - PoolTimeout: 3 * time.Second, - DialTimeout: 1 * time.Second, + PoolSize: int32(1000), + MaxConcurrentDials: 1000, + MinIdleConns: int32(50), + PoolTimeout: 3 * time.Second, + DialTimeout: 1 * time.Second, } p := pool.NewConnPool(opt) @@ -368,8 +374,9 @@ var _ = Describe("race", func() { Dialer: func(ctx context.Context) (net.Conn, error) { panic("test panic") }, - PoolSize: int32(100), - MinIdleConns: int32(30), + PoolSize: int32(100), + MaxConcurrentDials: 100, + MinIdleConns: int32(30), } p := pool.NewConnPool(opt) @@ -386,8 +393,9 @@ var _ = Describe("race", func() { Dialer: func(ctx context.Context) (net.Conn, error) { return &net.TCPConn{}, nil }, - PoolSize: int32(1), - PoolTimeout: 3 * time.Second, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: 3 * time.Second, } p := pool.NewConnPool(opt) @@ -417,8 +425,9 @@ var _ = Describe("race", func() { return &net.TCPConn{}, nil }, - PoolSize: int32(1), - PoolTimeout: testPoolTimeout, + PoolSize: int32(1), + MaxConcurrentDials: 1, + PoolTimeout: testPoolTimeout, } p := pool.NewConnPool(opt) @@ -452,9 +461,10 @@ func TestDialerRetryConfiguration(t *testing.T) { connPool := pool.NewConnPool(&pool.Options{ Dialer: failingDialer, PoolSize: 1, + MaxConcurrentDials: 1, PoolTimeout: time.Second, DialTimeout: time.Second, - DialerRetries: 3, // Custom retry count + DialerRetries: 3, // Custom retry count DialerRetryTimeout: 10 * time.Millisecond, // Fast retries for testing }) defer connPool.Close() @@ -483,10 +493,11 @@ func TestDialerRetryConfiguration(t *testing.T) { } connPool := pool.NewConnPool(&pool.Options{ - Dialer: failingDialer, - PoolSize: 1, - PoolTimeout: time.Second, - DialTimeout: time.Second, + Dialer: failingDialer, + PoolSize: 1, + MaxConcurrentDials: 1, + PoolTimeout: time.Second, + DialTimeout: time.Second, // DialerRetries and DialerRetryTimeout not set - should use defaults }) defer connPool.Close() @@ -504,6 +515,311 @@ func TestDialerRetryConfiguration(t *testing.T) { }) } +var _ = Describe("asyncNewConn", func() { + ctx := context.Background() + + It("should successfully create connection when pool is exhausted", func() { + testPool := pool.NewConnPool(&pool.Options{ + Dialer: dummyDialer, + PoolSize: 1, + MaxConcurrentDials: 2, + DialTimeout: 1 * time.Second, + PoolTimeout: 2 * time.Second, + }) + defer testPool.Close() + + // Fill the pool + conn1, err := testPool.Get(ctx) + Expect(err).NotTo(HaveOccurred()) + Expect(conn1).NotTo(BeNil()) + + // Get second connection in another goroutine + done := make(chan struct{}) + var conn2 *pool.Conn + var err2 error + + go func() { + defer GinkgoRecover() + conn2, err2 = testPool.Get(ctx) + close(done) + }() + + // Wait a bit to let the second Get start waiting + time.Sleep(100 * time.Millisecond) + + // Release first connection to let second Get acquire Turn + testPool.Put(ctx, conn1) + + // Wait for second Get to complete + <-done + Expect(err2).NotTo(HaveOccurred()) + Expect(conn2).NotTo(BeNil()) + + // Clean up second connection + testPool.Put(ctx, conn2) + }) + + It("should handle context cancellation before acquiring dialsInProgress", func() { + slowDialer := func(ctx context.Context) (net.Conn, error) { + // Simulate slow dialing to let first connection creation occupy dialsInProgress + time.Sleep(200 * time.Millisecond) + return newDummyConn(), nil + } + + testPool := pool.NewConnPool(&pool.Options{ + Dialer: slowDialer, + PoolSize: 2, + MaxConcurrentDials: 1, // Limit to 1 so second request cannot get dialsInProgress permission + DialTimeout: 1 * time.Second, + PoolTimeout: 1 * time.Second, + }) + defer testPool.Close() + + // Start first connection creation, this will occupy dialsInProgress + done1 := make(chan struct{}) + go func() { + defer GinkgoRecover() + conn1, err := testPool.Get(ctx) + if err == nil { + defer testPool.Put(ctx, conn1) + } + close(done1) + }() + + // Wait a bit to ensure first request starts and occupies dialsInProgress + time.Sleep(50 * time.Millisecond) + + // Create a context that will be cancelled quickly + cancelCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + + // Second request should timeout while waiting for dialsInProgress + _, err := testPool.Get(cancelCtx) + Expect(err).To(Equal(context.DeadlineExceeded)) + + // Wait for first request to complete + <-done1 + }) + + It("should handle context cancellation while waiting for connection result", func() { + // This test focuses on proper error handling when context is cancelled + // during asyncNewConn execution (not testing connection reuse) + + slowDialer := func(ctx context.Context) (net.Conn, error) { + // Simulate slow dialing + time.Sleep(500 * time.Millisecond) + return newDummyConn(), nil + } + + testPool := pool.NewConnPool(&pool.Options{ + Dialer: slowDialer, + PoolSize: 1, + MaxConcurrentDials: 2, + DialTimeout: 2 * time.Second, + PoolTimeout: 2 * time.Second, + }) + defer testPool.Close() + + // Get first connection to fill the pool + conn1, err := testPool.Get(ctx) + Expect(err).NotTo(HaveOccurred()) + + // Create a context that will be cancelled during connection creation + cancelCtx, cancel := context.WithTimeout(ctx, 200*time.Millisecond) + defer cancel() + + // This request should timeout while waiting for connection creation result + // Testing the error handling path in asyncNewConn select statement + done := make(chan struct{}) + var err2 error + go func() { + defer GinkgoRecover() + _, err2 = testPool.Get(cancelCtx) + close(done) + }() + + <-done + Expect(err2).To(Equal(context.DeadlineExceeded)) + + // Clean up - release the first connection + testPool.Put(ctx, conn1) + }) + + It("should handle dial failures gracefully", func() { + alwaysFailDialer := func(ctx context.Context) (net.Conn, error) { + return nil, fmt.Errorf("dial failed") + } + + testPool := pool.NewConnPool(&pool.Options{ + Dialer: alwaysFailDialer, + PoolSize: 1, + MaxConcurrentDials: 1, + DialTimeout: 1 * time.Second, + PoolTimeout: 1 * time.Second, + }) + defer testPool.Close() + + // This call should fail, testing error handling branch in goroutine + _, err := testPool.Get(ctx) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("dial failed")) + }) + + It("should handle connection creation success with normal delivery", func() { + // This test verifies normal case where connection creation and delivery both succeed + testPool := pool.NewConnPool(&pool.Options{ + Dialer: dummyDialer, + PoolSize: 1, + MaxConcurrentDials: 2, + DialTimeout: 1 * time.Second, + PoolTimeout: 2 * time.Second, + }) + defer testPool.Close() + + // Get first connection + conn1, err := testPool.Get(ctx) + Expect(err).NotTo(HaveOccurred()) + + // Get second connection in another goroutine + done := make(chan struct{}) + var conn2 *pool.Conn + var err2 error + + go func() { + defer GinkgoRecover() + conn2, err2 = testPool.Get(ctx) + close(done) + }() + + // Wait a bit to let second Get start waiting + time.Sleep(100 * time.Millisecond) + + // Release first connection + testPool.Put(ctx, conn1) + + // Wait for second Get to complete + <-done + Expect(err2).NotTo(HaveOccurred()) + Expect(conn2).NotTo(BeNil()) + + // Clean up second connection + testPool.Put(ctx, conn2) + }) + + It("should handle MaxConcurrentDials limit", func() { + testPool := pool.NewConnPool(&pool.Options{ + Dialer: dummyDialer, + PoolSize: 3, + MaxConcurrentDials: 1, // Only allow 1 concurrent dial + DialTimeout: 1 * time.Second, + PoolTimeout: 1 * time.Second, + }) + defer testPool.Close() + + // Get all connections to fill the pool + var conns []*pool.Conn + for i := 0; i < 3; i++ { + conn, err := testPool.Get(ctx) + Expect(err).NotTo(HaveOccurred()) + conns = append(conns, conn) + } + + // Now pool is full, next request needs to create new connection + // But due to MaxConcurrentDials=1, only one concurrent dial is allowed + done := make(chan struct{}) + var err4 error + go func() { + defer GinkgoRecover() + _, err4 = testPool.Get(ctx) + close(done) + }() + + // Release one connection to let the request complete + time.Sleep(100 * time.Millisecond) + testPool.Put(ctx, conns[0]) + + <-done + Expect(err4).NotTo(HaveOccurred()) + + // Clean up remaining connections + for i := 1; i < len(conns); i++ { + testPool.Put(ctx, conns[i]) + } + }) + + It("should reuse connections created in background after request timeout", func() { + // This test focuses on connection reuse mechanism: + // When a request times out but background connection creation succeeds, + // the created connection should be added to pool for future reuse + + slowDialer := func(ctx context.Context) (net.Conn, error) { + // Simulate delay for connection creation + time.Sleep(100 * time.Millisecond) + return newDummyConn(), nil + } + + testPool := pool.NewConnPool(&pool.Options{ + Dialer: slowDialer, + PoolSize: 1, + MaxConcurrentDials: 1, + DialTimeout: 1 * time.Second, + PoolTimeout: 150 * time.Millisecond, // Short timeout for waiting Turn + }) + defer testPool.Close() + + // Fill the pool with one connection + conn1, err := testPool.Get(ctx) + Expect(err).NotTo(HaveOccurred()) + // Don't put it back yet, so pool is full + + // Start a goroutine that will create a new connection but take time + done1 := make(chan struct{}) + go func() { + defer GinkgoRecover() + defer close(done1) + // This will trigger asyncNewConn since pool is full + conn, err := testPool.Get(ctx) + if err == nil { + // Put connection back to pool after creation + time.Sleep(50 * time.Millisecond) + testPool.Put(ctx, conn) + } + }() + + // Wait a bit to let the goroutine start and begin connection creation + time.Sleep(50 * time.Millisecond) + + // Now make a request that should timeout waiting for Turn + start := time.Now() + _, err = testPool.Get(ctx) + duration := time.Since(start) + + Expect(err).To(Equal(pool.ErrPoolTimeout)) + // Should timeout around PoolTimeout + Expect(duration).To(BeNumerically("~", 150*time.Millisecond, 50*time.Millisecond)) + + // Release the first connection to allow the background creation to complete + testPool.Put(ctx, conn1) + + // Wait for background connection creation to complete + <-done1 + time.Sleep(100 * time.Millisecond) + + // CORE TEST: Verify connection reuse mechanism + // The connection created in background should now be available in pool + start = time.Now() + conn3, err := testPool.Get(ctx) + duration = time.Since(start) + + Expect(err).NotTo(HaveOccurred()) + Expect(conn3).NotTo(BeNil()) + // Should be fast since connection is from pool (not newly created) + Expect(duration).To(BeNumerically("<", 50*time.Millisecond)) + + testPool.Put(ctx, conn3) + }) +}) + func init() { logging.Disable() } diff --git a/options.go b/options.go index 0e154ac09..d07a19d6e 100644 --- a/options.go +++ b/options.go @@ -140,6 +140,10 @@ type Options struct { // default: 3 seconds WriteTimeout time.Duration + // MaxConcurrentDials is the maximum number of concurrent connection creation goroutines. + // If 0, defaults to PoolSize/4+1. If negative, unlimited goroutines (not recommended). + MaxConcurrentDials int + // ContextTimeoutEnabled controls whether the client respects context timeouts and deadlines. // See https://redis.uptrace.dev/guide/go-redis-debugging.html#timeouts ContextTimeoutEnabled bool @@ -299,6 +303,11 @@ func (opt *Options) init() { if opt.PoolSize == 0 { opt.PoolSize = 10 * runtime.GOMAXPROCS(0) } + if opt.MaxConcurrentDials <= 0 { + opt.MaxConcurrentDials = opt.PoolSize + } else if opt.MaxConcurrentDials > opt.PoolSize { + opt.MaxConcurrentDials = opt.PoolSize + } if opt.ReadBufferSize == 0 { opt.ReadBufferSize = proto.DefaultBufferSize } @@ -626,6 +635,7 @@ func setupConnParams(u *url.URL, o *Options) (*Options, error) { o.MinIdleConns = q.int("min_idle_conns") o.MaxIdleConns = q.int("max_idle_conns") o.MaxActiveConns = q.int("max_active_conns") + o.MaxConcurrentDials = q.int("max_concurrent_dials") if q.has("conn_max_idle_time") { o.ConnMaxIdleTime = q.duration("conn_max_idle_time") } else { @@ -692,6 +702,7 @@ func newConnPool( }, PoolFIFO: opt.PoolFIFO, PoolSize: poolSize, + MaxConcurrentDials: opt.MaxConcurrentDials, PoolTimeout: opt.PoolTimeout, DialTimeout: opt.DialTimeout, DialerRetries: opt.DialerRetries, @@ -732,6 +743,7 @@ func newPubSubPool(opt *Options, dialer func(ctx context.Context, network, addr return pool.NewPubSubPool(&pool.Options{ PoolFIFO: opt.PoolFIFO, PoolSize: poolSize, + MaxConcurrentDials: opt.MaxConcurrentDials, PoolTimeout: opt.PoolTimeout, DialTimeout: opt.DialTimeout, DialerRetries: opt.DialerRetries, diff --git a/options_test.go b/options_test.go index 8de4986b3..32d75e259 100644 --- a/options_test.go +++ b/options_test.go @@ -67,6 +67,12 @@ func TestParseURL(t *testing.T) { }, { url: "redis://localhost:123/?db=2&protocol=2", // RESP Protocol o: &Options{Addr: "localhost:123", DB: 2, Protocol: 2}, + }, { + url: "redis://localhost:123/?max_concurrent_dials=5", // MaxConcurrentDials parameter + o: &Options{Addr: "localhost:123", MaxConcurrentDials: 5}, + }, { + url: "redis://localhost:123/?max_concurrent_dials=0", // MaxConcurrentDials zero value + o: &Options{Addr: "localhost:123", MaxConcurrentDials: 0}, }, { url: "unix:///tmp/redis.sock", o: &Options{Addr: "/tmp/redis.sock"}, @@ -197,6 +203,9 @@ func comprareOptions(t *testing.T, actual, expected *Options) { if actual.ConnMaxLifetime != expected.ConnMaxLifetime { t.Errorf("ConnMaxLifetime: got %v, expected %v", actual.ConnMaxLifetime, expected.ConnMaxLifetime) } + if actual.MaxConcurrentDials != expected.MaxConcurrentDials { + t.Errorf("MaxConcurrentDials: got %v, expected %v", actual.MaxConcurrentDials, expected.MaxConcurrentDials) + } } // Test ReadTimeout option initialization, including special values -1 and 0. @@ -245,3 +254,68 @@ func TestProtocolOptions(t *testing.T) { } } } + +func TestMaxConcurrentDialsOptions(t *testing.T) { + // Test cases for MaxConcurrentDials initialization logic + testCases := []struct { + name string + poolSize int + maxConcurrentDials int + expectedConcurrentDials int + }{ + // Edge cases and invalid values - negative/zero values set to PoolSize + { + name: "negative value gets set to pool size", + poolSize: 10, + maxConcurrentDials: -1, + expectedConcurrentDials: 10, // negative values are set to PoolSize + }, + // Zero value tests - MaxConcurrentDials should be set to PoolSize + { + name: "zero value with positive pool size", + poolSize: 1, + maxConcurrentDials: 0, + expectedConcurrentDials: 1, // MaxConcurrentDials = PoolSize when 0 + }, + // Explicit positive value tests + { + name: "explicit value within limit", + poolSize: 10, + maxConcurrentDials: 3, + expectedConcurrentDials: 3, // should remain unchanged when < PoolSize + }, + // Capping tests - values exceeding PoolSize should be capped + { + name: "value exceeding pool size", + poolSize: 5, + maxConcurrentDials: 10, + expectedConcurrentDials: 5, // should be capped at PoolSize + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts := &Options{ + PoolSize: tc.poolSize, + MaxConcurrentDials: tc.maxConcurrentDials, + } + opts.init() + + if opts.MaxConcurrentDials != tc.expectedConcurrentDials { + t.Errorf("MaxConcurrentDials: got %v, expected %v (PoolSize=%v)", + opts.MaxConcurrentDials, tc.expectedConcurrentDials, opts.PoolSize) + } + + // Ensure MaxConcurrentDials never exceeds PoolSize (for all inputs) + if opts.MaxConcurrentDials > opts.PoolSize { + t.Errorf("MaxConcurrentDials (%v) should not exceed PoolSize (%v)", + opts.MaxConcurrentDials, opts.PoolSize) + } + + // Ensure MaxConcurrentDials is always positive (for all inputs) + if opts.MaxConcurrentDials <= 0 { + t.Errorf("MaxConcurrentDials should be positive, got %v", opts.MaxConcurrentDials) + } + }) + } +} diff --git a/pool_pubsub_bench_test.go b/pool_pubsub_bench_test.go index 0db8ec55f..d7f0f185c 100644 --- a/pool_pubsub_bench_test.go +++ b/pool_pubsub_bench_test.go @@ -70,12 +70,13 @@ func BenchmarkPoolGetPut(b *testing.B) { for _, poolSize := range poolSizes { b.Run(fmt.Sprintf("PoolSize_%d", poolSize), func(b *testing.B) { connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(poolSize), - PoolTimeout: time.Second, - DialTimeout: time.Second, - ConnMaxIdleTime: time.Hour, - MinIdleConns: int32(0), // Start with no idle connections + Dialer: dummyDialer, + PoolSize: int32(poolSize), + MaxConcurrentDials: poolSize, + PoolTimeout: time.Second, + DialTimeout: time.Second, + ConnMaxIdleTime: time.Hour, + MinIdleConns: int32(0), // Start with no idle connections }) defer connPool.Close() @@ -112,12 +113,13 @@ func BenchmarkPoolGetPutWithMinIdle(b *testing.B) { for _, config := range configs { b.Run(fmt.Sprintf("Pool_%d_MinIdle_%d", config.poolSize, config.minIdleConns), func(b *testing.B) { connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(config.poolSize), - MinIdleConns: int32(config.minIdleConns), - PoolTimeout: time.Second, - DialTimeout: time.Second, - ConnMaxIdleTime: time.Hour, + Dialer: dummyDialer, + PoolSize: int32(config.poolSize), + MaxConcurrentDials: config.poolSize, + MinIdleConns: int32(config.minIdleConns), + PoolTimeout: time.Second, + DialTimeout: time.Second, + ConnMaxIdleTime: time.Hour, }) defer connPool.Close() @@ -142,12 +144,13 @@ func BenchmarkPoolConcurrentGetPut(b *testing.B) { ctx := context.Background() connPool := pool.NewConnPool(&pool.Options{ - Dialer: dummyDialer, - PoolSize: int32(32), - PoolTimeout: time.Second, - DialTimeout: time.Second, - ConnMaxIdleTime: time.Hour, - MinIdleConns: int32(0), + Dialer: dummyDialer, + PoolSize: int32(32), + MaxConcurrentDials: 32, + PoolTimeout: time.Second, + DialTimeout: time.Second, + ConnMaxIdleTime: time.Hour, + MinIdleConns: int32(0), }) defer connPool.Close() From e05684c5ecb9ceffa7fb41df5952ed66228bc9cc Mon Sep 17 00:00:00 2001 From: "yinhang.sun" Date: Sun, 31 Aug 2025 22:29:19 +0800 Subject: [PATCH 2/6] update default values and testcase --- options.go | 1 - 1 file changed, 1 deletion(-) diff --git a/options.go b/options.go index d07a19d6e..19e8cd4cc 100644 --- a/options.go +++ b/options.go @@ -34,7 +34,6 @@ type Limiter interface { // Options keeps the settings to set up redis connection. type Options struct { - // Network type, either tcp or unix. // // default: is tcp. From ce13fa884667099cc9ad096c5e484ce3ee89e3ef Mon Sep 17 00:00:00 2001 From: cyningsun Date: Thu, 4 Sep 2025 22:39:14 +0800 Subject: [PATCH 3/6] fix comments --- options.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/options.go b/options.go index 19e8cd4cc..7cb994c92 100644 --- a/options.go +++ b/options.go @@ -139,10 +139,6 @@ type Options struct { // default: 3 seconds WriteTimeout time.Duration - // MaxConcurrentDials is the maximum number of concurrent connection creation goroutines. - // If 0, defaults to PoolSize/4+1. If negative, unlimited goroutines (not recommended). - MaxConcurrentDials int - // ContextTimeoutEnabled controls whether the client respects context timeouts and deadlines. // See https://redis.uptrace.dev/guide/go-redis-debugging.html#timeouts ContextTimeoutEnabled bool @@ -179,6 +175,10 @@ type Options struct { // default: 10 * runtime.GOMAXPROCS(0) PoolSize int + // MaxConcurrentDials is the maximum number of concurrent connection creation goroutines. + // If <= 0, defaults to PoolSize. If > PoolSize, it will be capped at PoolSize. + MaxConcurrentDials int + // PoolTimeout is the amount of time client waits for connection if all connections // are busy before returning an error. // From b6ad4fdc562bb9e3389bf9e58bf1d8ac255cf43a Mon Sep 17 00:00:00 2001 From: cyningsun Date: Thu, 4 Sep 2025 23:53:43 +0800 Subject: [PATCH 4/6] fix data race --- internal/pool/pool.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/internal/pool/pool.go b/internal/pool/pool.go index 6158ca18a..b837425c5 100644 --- a/internal/pool/pool.go +++ b/internal/pool/pool.go @@ -122,6 +122,14 @@ type wantConn struct { result chan wantConnResult // channel to deliver connection or error } +// getCtxForDial returns context for dial or nil if connection was delivered or canceled. +func (w *wantConn) getCtxForDial() context.Context { + w.mu.Lock() + defer w.mu.Unlock() + + return w.ctx +} + func (w *wantConn) tryDeliver(cn *Conn, err error) bool { w.mu.Lock() defer w.mu.Unlock() @@ -576,12 +584,13 @@ func (p *ConnPool) asyncNewConn(ctx context.Context) (*Conn, error) { defer w.cancelCtx() defer func() { <-p.dialsInProgress }() // Release connection creation permission - cn, cnErr := p.newConn(w.ctx, true) + dialCtx := w.getCtxForDial() + cn, cnErr := p.newConn(dialCtx, true) delivered := w.tryDeliver(cn, cnErr) if cnErr == nil && delivered { return } else if cnErr == nil && !delivered { - p.Put(w.ctx, cn) + p.Put(dialCtx, cn) } else { // freeTurn after error p.freeTurn() } From 45748886611839828566df6aa97a8e31e94dbf53 Mon Sep 17 00:00:00 2001 From: cyningsun Date: Fri, 5 Sep 2025 00:19:23 +0800 Subject: [PATCH 5/6] remove context.WithoutCancel, which is a function introduced in Go 1.21 --- internal/pool/pool.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pool/pool.go b/internal/pool/pool.go index b837425c5..f5bac47a5 100644 --- a/internal/pool/pool.go +++ b/internal/pool/pool.go @@ -566,7 +566,7 @@ func (p *ConnPool) asyncNewConn(ctx context.Context) (*Conn, error) { return nil, ctx.Err() } - dialCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), p.cfg.DialTimeout) + dialCtx, cancel := context.WithTimeout(context.Background(), p.cfg.DialTimeout) w := &wantConn{ ctx: dialCtx, From 1e50d4fd6b1a2f27ba64b27971ed50ad6b06f355 Mon Sep 17 00:00:00 2001 From: cyningsun Date: Fri, 5 Sep 2025 00:41:07 +0800 Subject: [PATCH 6/6] fix TestDialerRetryConfiguration/DefaultDialerRetries, because tryDial are likely done in async flow --- internal/pool/pool_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/pool/pool_test.go b/internal/pool/pool_test.go index 3b3a9db24..71fd31261 100644 --- a/internal/pool/pool_test.go +++ b/internal/pool/pool_test.go @@ -508,9 +508,13 @@ func TestDialerRetryConfiguration(t *testing.T) { } // Should have attempted 5 times (default DialerRetries = 5) + // There might be 1 additional attempt due to tryDial() recovery mechanism finalAttempts := atomic.LoadInt64(&attempts) - if finalAttempts != 5 { - t.Errorf("Expected 5 dial attempts (default), got %d", finalAttempts) + if finalAttempts < 5 { + t.Errorf("Expected at least 5 dial attempts (default), got %d", finalAttempts) + } + if finalAttempts > 6 { + t.Errorf("Expected around 5 dial attempts, got %d (too many)", finalAttempts) } }) }