@@ -74,7 +74,16 @@ func Run(cmd *cobra.Command, args []string) {
7474 logs .Log .Fatalf ("While evaluating configuration: %v" , err )
7575 }
7676
77- go func () {
77+ group , gctx := errgroup .WithContext (ctx )
78+ defer func () {
79+ // TODO: replace Fatalf log calls with Errorf and return the error
80+ cancel ()
81+ if err := group .Wait (); err != nil {
82+ logs .Log .Fatalf ("failed to wait for controller-runtime component to stop: %v" , err )
83+ }
84+ }()
85+
86+ group .Go (func () error {
7887 server := http .NewServeMux ()
7988
8089 if Flags .Profiling {
@@ -105,21 +114,25 @@ func Run(cmd *cobra.Command, args []string) {
105114
106115 err := http .ListenAndServe (":8081" , server )
107116 if err != nil && ! errors .Is (err , http .ErrServerClosed ) {
108- logs . Log . Fatalf ("failed to run the health check server: %s" , err )
117+ return fmt . Errorf ("failed to run the health check server: %s" , err )
109118 }
110- }()
119+ // The agent must stop if the management server stops
120+ cancel ()
121+ return nil
122+ })
111123
112124 _ , isVenConn := preflightClient .(* client.VenConnClient )
113125 if isVenConn {
114- go func () {
115- err := preflightClient .(manager.Runnable ).Start (ctx )
126+ group . Go ( func () error {
127+ err := preflightClient .(manager.Runnable ).Start (gctx )
116128 if err != nil {
117- logs . Log . Fatalf ("failed to start a controller-runtime component: %v" , err )
129+ return fmt . Errorf ("failed to start a controller-runtime component: %v" , err )
118130 }
119131
120132 // The agent must stop if the controller-runtime component stops.
121133 cancel ()
122- }()
134+ return nil
135+ })
123136 }
124137
125138 // To help users notice issues with the agent, we show the error messages in
@@ -130,15 +143,6 @@ func Run(cmd *cobra.Command, args []string) {
130143 }
131144
132145 dataGatherers := map [string ]datagatherer.DataGatherer {}
133- group , gctx := errgroup .WithContext (ctx )
134-
135- defer func () {
136- // TODO: replace Fatalf log calls with Errorf and return the error
137- cancel ()
138- if err := group .Wait (); err != nil {
139- logs .Log .Fatalf ("failed to wait for controller-runtime component to stop: %v" , err )
140- }
141- }()
142146
143147 // load datagatherer config and boot each one
144148 for _ , dgConfig := range config .DataGatherers {
@@ -160,6 +164,8 @@ func Run(cmd *cobra.Command, args []string) {
160164 if err := newDg .Run (gctx .Done ()); err != nil {
161165 return fmt .Errorf ("failed to start %q data gatherer %q: %v" , kind , dgConfig .Name , err )
162166 }
167+ // The agent must stop if any of the data gatherers stops
168+ cancel ()
163169 return nil
164170 })
165171
@@ -192,15 +198,24 @@ func Run(cmd *cobra.Command, args []string) {
192198
193199 // begin the datagathering loop, periodically sending data to the
194200 // configured output using data in datagatherer caches or refreshing from
195- // APIs each cycle depending on datagatherer implementation
201+ // APIs each cycle depending on datagatherer implementation.
202+ // If any of the go routines exit (with nil or error) the main context will
203+ // be cancelled, which will cause this blocking loop to exit
204+ // instead of waiting for the time period.
205+ // TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't
206+ // have to wait for it to finish before exiting the process.
196207 for {
197208 gatherAndOutputData (eventf , config , preflightClient , dataGatherers )
198209
199210 if config .OneShot {
200211 break
201212 }
202213
203- time .Sleep (config .Period )
214+ select {
215+ case <- gctx .Done ():
216+ return
217+ case <- time .After (config .Period ):
218+ }
204219 }
205220}
206221
0 commit comments