diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1dd25c8192..6ca236651e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -108,7 +108,7 @@ Use clang-format to format all source files (\*.h, \*.cc, \*.proto,
 source files before submitting a pull request:
 
 ```
-$ apt-get install clang-format clang-format-6.0
+$ apt-get install clang-format-15
 ```
 
 For convenience there is a format.py script in the
diff --git a/compose.py b/compose.py
index 79e64f0646..0a00883727 100644
--- a/compose.py
+++ b/compose.py
@@ -130,6 +130,7 @@ def add_requested_repoagents(ddir, dockerfile_name, repoagents):
     with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
         dfile.write(df)
 
+
 def add_requested_caches(ddir, dockerfile_name, caches):
     df = "#  Copying over caches \n"
     for cache in caches:
@@ -143,6 +144,7 @@ def add_requested_caches(ddir, dockerfile_name, caches):
     with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
         dfile.write(df)
 
+
 def end_dockerfile(ddir, dockerfile_name, argmap):
     # Install additional dependencies
     df = ""
@@ -372,8 +374,7 @@ def create_argmap(images, skip_pull):
         '--cache',
         action='append',
         required=False,
-        help=
-        'Include <cache-name> in the generated Docker image. The flag may '
+        help='Include <cache-name> in the generated Docker image. The flag may '
         'be specified multiple times.')
     parser.add_argument(
         '--skip-pull',
diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/config.py b/deploy/mlflow-triton-plugin/mlflow_triton/config.py
index 229a6b35ad..484b026227 100644
--- a/deploy/mlflow-triton-plugin/mlflow_triton/config.py
+++ b/deploy/mlflow-triton-plugin/mlflow_triton/config.py
@@ -48,13 +48,12 @@ def __init__(self):
                 protocol = "http://"
             endpoint_url = None
             if uri.host_name != "" and uri.host_port != "":
-                endpoint_url = '{}{}:{}'.format(
-                    protocol, uri.host_name, uri.host_port)
+                endpoint_url = '{}{}:{}'.format(protocol, uri.host_name,
+                                                uri.host_port)
 
             import boto3
             # boto3 handles AWS credentials
-            self['s3'] = boto3.client(
-                's3', endpoint_url=endpoint_url)
+            self['s3'] = boto3.client('s3', endpoint_url=endpoint_url)
             self['s3_bucket'] = uri.bucket
             self['s3_prefix'] = uri.prefix
             self['triton_model_repo'] = 's3://{}'.format(
diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
index 5009e4a545..0a22ba6c88 100644
--- a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
+++ b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
@@ -188,11 +188,12 @@ def list_deployments(self):
                                                 d['name'],
                                                 _MLFLOW_META_FILENAME)
                 if 's3' in self.server_config:
-                    meta_dict = ast.literal_eval(self.server_config['s3'].get_object(
-                        Bucket=self.server_config['s3_bucket'],
-                        Key=os.path.join(
-                            self.server_config['s3_prefix'], d['name'], _MLFLOW_META_FILENAME),
-                    )['Body'].read().decode('utf-8'))
+                    meta_dict = ast.literal_eval(
+                        self.server_config['s3'].get_object(
+                            Bucket=self.server_config['s3_bucket'],
+                            Key=os.path.join(self.server_config['s3_prefix'],
+                                             d['name'], _MLFLOW_META_FILENAME),
+                        )['Body'].read().decode('utf-8'))
                 elif os.path.isfile(mlflow_meta_path):
                     meta_dict = self._get_mlflow_meta_dict(d['name'])
                 else:
@@ -280,12 +281,13 @@ def _generate_mlflow_meta_file(self, name, flavor, model_uri):
             self.server_config['s3'].put_object(
                 Body=json.dumps(meta_dict, indent=4).encode('utf-8'),
                 Bucket=self.server_config["s3_bucket"],
-                Key=os.path.join(
-                    self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME),
+                Key=os.path.join(self.server_config['s3_prefix'], name,
+                                 _MLFLOW_META_FILENAME),
             )
         else:
-            with open(os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME),
-                      "w") as outfile:
+            with open(
+                    os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME),
+                    "w") as outfile:
                 json.dump(meta_dict, outfile, indent=4)
 
         print("Saved", _MLFLOW_META_FILENAME, "to", triton_deployment_dir)
@@ -295,11 +297,12 @@ def _get_mlflow_meta_dict(self, name):
                                         _MLFLOW_META_FILENAME)
 
         if 's3' in self.server_config:
-            mlflow_meta_dict = ast.literal_eval(self.server_config['s3'].get_object(
-                Bucket=self.server_config['s3_bucket'],
-                Key=os.path.join(
-                    self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME),
-            )['Body'].read().decode('utf-8'))
+            mlflow_meta_dict = ast.literal_eval(
+                self.server_config['s3'].get_object(
+                    Bucket=self.server_config['s3_bucket'],
+                    Key=os.path.join(self.server_config['s3_prefix'], name,
+                                     _MLFLOW_META_FILENAME),
+                )['Body'].read().decode('utf-8'))
         else:
             with open(mlflow_meta_path, 'r') as metafile:
                 mlflow_meta_dict = json.load(metafile)
@@ -392,7 +395,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
                             s3_path = os.path.join(
                                 self.server_config['s3_prefix'],
                                 copy_paths[key]['to'].replace(
-                                    self.server_config['triton_model_repo'], '').strip('/'),
+                                    self.server_config['triton_model_repo'],
+                                    '').strip('/'),
                                 filename,
                             )
 
@@ -413,8 +417,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
                 if os.path.isdir(copy_paths[key]['from']):
                     if os.path.isdir(copy_paths[key]['to']):
                         shutil.rmtree(copy_paths[key]['to'])
-                    shutil.copytree(
-                        copy_paths[key]['from'], copy_paths[key]['to'])
+                    shutil.copytree(copy_paths[key]['from'],
+                                    copy_paths[key]['to'])
                 else:
                     if not os.path.isdir(copy_paths[key]['to']):
                         os.makedirs(copy_paths[key]['to'])
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
index 4a0a27ac08..b55c8b71c5 100644
--- a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
@@ -27,6 +27,7 @@
 
 #include <iostream>
 #include <string>
+
 #include "triton/core/tritonserver.h"
 
 #define RETURN_IF_ERR(X)             \
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
index 0affacb3f1..ce22bdcba9 100644
--- a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
@@ -27,24 +27,23 @@
 #include <rapidjson/document.h>
 #include <rapidjson/error/en.h>
 #include <unistd.h>
+
 #include <chrono>
 #include <cstring>
 #include <future>
 #include <iostream>
+#include <opencv2/dnn.hpp>
 #include <string>
 #include <thread>
 #include <unordered_map>
 #include <vector>
 
-#include "triton/core/tritonserver.h"
-
 #include "common.h"
-
-#include <opencv2/dnn.hpp>
 #include "opencv2/core.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/opencv.hpp"
+#include "triton/core/tritonserver.h"
 
 #ifdef TRITON_ENABLE_GPU
 #include <cuda_runtime_api.h>
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
index 8850f0e031..84e43eccf9 100644
--- a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
@@ -108,7 +108,7 @@ def execute(self, requests):
             with self.inflight_thread_count_lck:
                 self.inflight_thread_count += 1
             thread1.start()
-        
+
         logger = pb_utils.Logger
         logger.log("Execute-Specific Msg!", logger.INFO)
         logger.log_info("Execute-Info Msg!")
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
index d278e07a0e..81bb397115 100644
--- a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
@@ -57,15 +57,14 @@ def execute(self, requests):
 
         for request in requests:
             thread = threading.Thread(target=self.response_thread,
-                                       args=(request.get_response_sender(),
-                                             pb_utils.get_input_tensor_by_name(
-                                                 request, 'IN').as_numpy()))
+                                      args=(request.get_response_sender(),
+                                            pb_utils.get_input_tensor_by_name(
+                                                request, 'IN').as_numpy()))
             thread.daemon = True
             with self.inflight_thread_count_lck:
                 self.inflight_thread_count += 1
             thread.start()
 
-
         return None
 
     def response_thread(self, response_sender, in_value):
@@ -95,22 +94,22 @@ def response_thread(self, response_sender, in_value):
                         response,
                         flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
                 else:
-                    output_tensors = [pb_utils.Tensor('OUT', output0.as_numpy())]
+                    output_tensors = [
+                        pb_utils.Tensor('OUT', output0.as_numpy())
+                    ]
                     response = pb_utils.InferenceResponse(
                         output_tensors=output_tensors)
                     response_sender.send(response)
 
             response_count += 1
 
-        if in_value != response_count-1:
-            error_message = (
-                "Expected {} responses, got {}".format(
-                    in_value, len(infer_responses)-1))
-            response = pb_utils.InferenceResponse(
-                error=error_message)
+        if in_value != response_count - 1:
+            error_message = ("Expected {} responses, got {}".format(
+                in_value,
+                len(infer_responses) - 1))
+            response = pb_utils.InferenceResponse(error=error_message)
             response_sender.send(
-                response,
-                flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
         else:
             response_sender.send(
                 flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
diff --git a/qa/L0_backend_python/logging/logging_test.py b/qa/L0_backend_python/logging/logging_test.py
index b72d9b2b7a..1070d240a7 100644
--- a/qa/L0_backend_python/logging/logging_test.py
+++ b/qa/L0_backend_python/logging/logging_test.py
@@ -31,10 +31,10 @@
 import numpy as np
 import test_util as tu
 
-
 from tritonclient.utils import *
 import tritonclient.http as httpclient
 
+
 class LogTest(tu.TestResultCollector):
 
     def test_log_output(self):
@@ -43,7 +43,7 @@ def test_log_output(self):
             input_data = np.array([[1.0]], dtype=np.float32)
             inputs = [
                 httpclient.InferInput("INPUT0", input_data.shape,
-                                        np_to_triton_dtype(input_data.dtype))
+                                      np_to_triton_dtype(input_data.dtype))
             ]
             inputs[0].set_data_from_numpy(input_data)
             result = client.infer(model_name, inputs)
@@ -51,5 +51,6 @@ def test_log_output(self):
             self.assertIsNotNone(output0)
             self.assertTrue(np.all(output0 == input_data))
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
index 49413bce55..ccd4d985b3 100644
--- a/qa/L0_backend_python/python_test.py
+++ b/qa/L0_backend_python/python_test.py
@@ -182,9 +182,8 @@ def test_async_infer(self):
 
                 # Make sure the requests ran in parallel.
                 stats = client.get_inference_statistics(model_name)
-                test_cond = (len(stats['model_stats'])
-                             != 1) or (stats['model_stats'][0]['name']
-                                       != model_name)
+                test_cond = (len(stats['model_stats']) != 1) or (
+                    stats['model_stats'][0]['name'] != model_name)
                 self.assertFalse(
                     test_cond,
                     "error: expected statistics for {}".format(model_name))
diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/python_unittest.py
index af583fc560..9ff1b30e02 100644
--- a/qa/L0_backend_python/python_unittest.py
+++ b/qa/L0_backend_python/python_unittest.py
@@ -53,7 +53,7 @@ def _run_unittest(self, model_name):
 
     def test_python_unittest(self):
         model_name = os.environ['MODEL_NAME']
-        bls_kind =  os.environ.get('BLS_KIND', 'non_decoupled')
+        bls_kind = os.environ.get('BLS_KIND', 'non_decoupled')
 
         if bls_kind == "decoupled":
             # Skip the shared memory probe for decoupled models for now as
diff --git a/qa/L0_cuda_graph/trt_cuda_graph_test.py b/qa/L0_cuda_graph/trt_cuda_graph_test.py
index 07b744c0b1..6cb68255ae 100644
--- a/qa/L0_cuda_graph/trt_cuda_graph_test.py
+++ b/qa/L0_cuda_graph/trt_cuda_graph_test.py
@@ -36,7 +36,7 @@
 
 
 class TrtCudaGraphTest(tu.TestResultCollector):
-    MODELNAME= "plan"
+    MODELNAME = "plan"
 
     def setUp(self):
         self.dtype_ = np.float32
@@ -50,7 +50,8 @@ def _check_infer(self, tensor_shape, batch_size=1):
             else:
                 full_shape = tensor_shape
             iu.infer_exact(self,
-                           self.model_name_, full_shape,
+                           self.model_name_,
+                           full_shape,
                            batch_size,
                            self.dtype_,
                            self.dtype_,
@@ -140,6 +141,7 @@ def test_range_dynamic_shape(self):
     def test_nobatch_fixed_shape(self):
         self._check_infer((16,), 0)
 
+
 if __name__ == '__main__':
     if len(sys.argv) > 2:
         TrtCudaGraphTest.MODELNAME = sys.argv.pop()
diff --git a/qa/L0_decoupled/decoupled_test.py b/qa/L0_decoupled/decoupled_test.py
index ade5f9f2ec..0ce47e5b80 100644
--- a/qa/L0_decoupled/decoupled_test.py
+++ b/qa/L0_decoupled/decoupled_test.py
@@ -39,10 +39,13 @@
 import tritonclient.http as httpclient
 from tritonclient.utils import InferenceServerException
 
+
 class UserData:
+
     def __init__(self):
         self._response_queue = queue.Queue()
 
+
 def callback(user_data, result, error):
     if error:
         user_data._response_queue.put(error)
@@ -73,17 +76,17 @@ def setUp(self):
 
     # Client can receive a "triton_final_response" response parameter
     # from Triton server that indicates when a response is the final response for
-    # its request. 
+    # its request.
     #
     # For non-decoupled models, there is a 1:1 request:response ratio, so every
-    # response is the final response, and this parameter is unnecessary. 
-    # 
+    # response is the final response, and this parameter is unnecessary.
+    #
     # For decoupled models, there is a 1:N request:response ratio, so there may be
-    # more one response before receiving the "final" response. 
+    # more one response before receiving the "final" response.
     #
     # However, decoupled models have the unique property in that they can return
     # a flags-only response to the server to indicate completion, which is not
-    # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags). 
+    # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags).
     #
     # To forward this flags-only response to the client, users must opt-in to this
     # behavior by adding the following argument:
@@ -92,11 +95,12 @@ def setUp(self):
     # If the decoupled backend/model always sends the final response flag along
     # with a non-null response, no opt-in is needed.
     #
-    # With this behavior, the client can programatically detect when all responses 
+    # With this behavior, the client can programatically detect when all responses
     # for an individual request have been received without knowing the expected
     # number of responses in advance and without closing the stream.
     def _stream_infer_with_params(self, request_count, request_delay, _,
-                                  delay_data, delay_factor, user_data, result_dict):
+                                  delay_data, delay_factor, user_data,
+                                  result_dict):
         with grpcclient.InferenceServerClient(url="localhost:8001",
                                               verbose=True) as triton_client:
             # Establish stream
@@ -112,8 +116,7 @@ def _stream_infer_with_params(self, request_count, request_delay, _,
                     outputs=self.requested_outputs_,
                     # Opt-in to receiving flags-only responses from model/backend
                     # to help detect final responses for decoupled models.
-                    enable_empty_final_response=True
-                )
+                    enable_empty_final_response=True)
                 # Update delay input in accordance with the scaling factor
                 delay_data = delay_data * delay_factor
                 delay_data = delay_data.astype(np.uint32)
@@ -130,18 +133,20 @@ def _stream_infer_with_params(self, request_count, request_delay, _,
                     # Request IDs should generally be provided with each request
                     # to associate decoupled responses with their requests.
                     if not response.id:
-                      raise ValueError("No response id found. Was a request_id provided?")
+                        raise ValueError(
+                            "No response id found. Was a request_id provided?")
 
                     # Detect final response. Parameters are oneof and we expect bool_param
-                    if response.parameters.get("triton_final_response").bool_param:
-                      completed_requests += 1
+                    if response.parameters.get(
+                            "triton_final_response").bool_param:
+                        completed_requests += 1
 
-                    # Only process non-empty response, ignore if empty (no outputs) 
+                    # Only process non-empty response, ignore if empty (no outputs)
                     if response.outputs:
-                      if response.id not in result_dict:
-                          result_dict[response.id] = []
-                      result_dict[response.id].append((recv_count, data_item))
-                      recv_count += 1
+                        if response.id not in result_dict:
+                            result_dict[response.id] = []
+                        result_dict[response.id].append((recv_count, data_item))
+                        recv_count += 1
 
     def _stream_infer(self, request_count, request_delay, expected_count,
                       delay_data, delay_factor, user_data, result_dict):
@@ -171,7 +176,7 @@ def _stream_infer(self, request_count, request_delay, expected_count,
                 else:
                     this_id = data_item.get_response().id
                     if this_id not in result_dict:
-                      result_dict[this_id] = []
+                        result_dict[this_id] = []
                     result_dict[this_id].append((recv_count, data_item))
 
                 recv_count += 1
@@ -232,8 +237,9 @@ def _decoupled_infer(self,
         self.requested_outputs_ = self.outputs_ if validate_fn is None else self.outputs_[
             0:1]
 
-
-        for infer_helper in [self._stream_infer, self._stream_infer_with_params]:
+        for infer_helper in [
+                self._stream_infer, self._stream_infer_with_params
+        ]:
             user_data = UserData()
             result_dict = {}
 
@@ -254,8 +260,8 @@ def _decoupled_infer(self,
                 this_id = str(i)
                 if repeat_count != 0 and this_id not in result_dict.keys():
                     self.assertTrue(
-                        False,
-                        "response for request id {} not received".format(this_id))
+                        False, "response for request id {} not received".format(
+                            this_id))
                 elif repeat_count == 0 and this_id in result_dict.keys():
                     self.assertTrue(
                         False,
@@ -263,7 +269,8 @@ def _decoupled_infer(self,
                             this_id))
                 if repeat_count != 0:
                     if validate_fn is None:
-                        self.assertEqual(len(result_dict[this_id]), repeat_count)
+                        self.assertEqual(len(result_dict[this_id]),
+                                         repeat_count)
                         expected_data = data_offset
                         result_list = result_dict[this_id]
                         for j in range(len(result_list)):
@@ -278,7 +285,8 @@ def _decoupled_infer(self,
                             self.assertEqual(this_idx[0], j)
                             expected_data += 1
                     else:
-                        validate_fn(result_dict[this_id], data_offset, repeat_count)
+                        validate_fn(result_dict[this_id], data_offset,
+                                    repeat_count)
 
     def test_one_to_none(self):
         # Test cases where each request generates no response.
diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
index fa71374c47..64993e86b1 100644
--- a/qa/L0_implicit_state/implicit_state.py
+++ b/qa/L0_implicit_state/implicit_state.py
@@ -61,9 +61,7 @@ def test_no_implicit_state(self):
         self.assertIn("unable to add state 'undefined_state'", err_str)
         self.assertIn(
             "state configuration is missing for model 'no_implicit_state'",
-            err_str
-        )
-
+            err_str)
 
     def test_wrong_implicit_state_name(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
@@ -81,10 +79,8 @@ def test_wrong_implicit_state_name(self):
                                 sequence_start=True)
 
         err_str = str(e.exception).lower()
-        self.assertIn(
-            "state 'undefined_state' is not a valid state name",
-            err_str
-        )
+        self.assertIn("state 'undefined_state' is not a valid state name",
+                      err_str)
 
     def test_no_update(self):
         # Test implicit state without updating any state
@@ -134,8 +130,7 @@ def test_request_output_not_allowed(self):
                     sequence_end=True)
             self.assertIn(
                 "unexpected inference output 'OUTPUT_STATE' for model",
-                str(e.exception)
-            )
+                str(e.exception))
 
     def test_request_output(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
diff --git a/qa/L0_infer/infer_test.py b/qa/L0_infer/infer_test.py
index 7e76fa43ef..1e0e172a13 100644
--- a/qa/L0_infer/infer_test.py
+++ b/qa/L0_infer/infer_test.py
@@ -59,15 +59,16 @@
 
 class InferTest(tu.TestResultCollector):
 
-    def _full_exact(self,
-                    input_dtype,
-                    output0_dtype,
-                    output1_dtype,
-                    output0_raw,
-                    output1_raw,
-                    swap,
-                    # 60 sec is the default value
-                    network_timeout=60.0):
+    def _full_exact(
+            self,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw,
+            output1_raw,
+            swap,
+            # 60 sec is the default value
+            network_timeout=60.0):
 
         def _infer_exact_helper(tester,
                                 pf,
diff --git a/qa/L0_logging/logging_endpoint_test.py b/qa/L0_logging/logging_endpoint_test.py
index 73e9476138..2058d941c2 100644
--- a/qa/L0_logging/logging_endpoint_test.py
+++ b/qa/L0_logging/logging_endpoint_test.py
@@ -74,9 +74,8 @@ def check_server_initial_state(self):
             "log_format": "default"
         }
         triton_client = httpclient.InferenceServerClient("localhost:8000")
-        self.assertEqual(initial_settings,
-                         triton_client.get_log_settings())
-    
+        self.assertEqual(initial_settings, triton_client.get_log_settings())
+
     def test_http_get_settings(self):
         # Log settings will be the same as default settings since
         # no update has been made.
@@ -89,8 +88,7 @@ def test_http_get_settings(self):
             "log_format": "default"
         }
         triton_client = httpclient.InferenceServerClient("localhost:8000")
-        self.assertEqual(initial_settings,
-                         triton_client.get_log_settings(),
+        self.assertEqual(initial_settings, triton_client.get_log_settings(),
                          "Unexpected initial log settings")
 
     def test_grpc_get_settings(self):
@@ -121,10 +119,9 @@ def test_grpc_get_settings(self):
                 }
             }), initial_settings)
         triton_client = grpcclient.InferenceServerClient("localhost:8001")
-        self.assertEqual(initial_settings,
-                         triton_client.get_log_settings(),
+        self.assertEqual(initial_settings, triton_client.get_log_settings(),
                          "Unexpected initial log settings")
-    
+
     def test_http_update_settings(self):
         # Update each possible log configuration
         # field and check that they are reflected
@@ -205,8 +202,7 @@ def test_http_update_settings(self):
             expected_log_settings_6,
             triton_client.update_log_settings(settings=expected_log_settings_6),
             "Unexpected updated log settings")
-        
-    
+
     def test_grpc_update_settings(self):
         # Update each possible log configuration
         # field and check that they are reflected
@@ -251,7 +247,7 @@ def test_grpc_update_settings(self):
             expected_log_settings_1,
             triton_client.update_log_settings(settings=log_settings_1),
             "Unexpected updated log settings")
-            
+
         log_settings_2 = {
             "log_file": "log_file.log",
             "log_info": False,
@@ -436,11 +432,12 @@ def test_grpc_update_settings(self):
                     },
                 }
             }), expected_log_settings_6)
-        
+
         self.assertEqual(
             expected_log_settings_6,
             triton_client.update_log_settings(settings=log_settings_6),
             "Unexpected updated log settings")
-    
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/qa/L0_metrics/metrics_test.py b/qa/L0_metrics/metrics_test.py
index 6beb506601..36d732cdfa 100755
--- a/qa/L0_metrics/metrics_test.py
+++ b/qa/L0_metrics/metrics_test.py
@@ -34,31 +34,25 @@
 import test_util as tu
 
 INF_COUNTER_PATTERNS = [
-  'nv_inference_request_duration', 
-  'nv_inference_queue_duration',
-  'nv_inference_compute_input_duration',
-  'nv_inference_compute_infer_duration',
-  'nv_inference_compute_output_duration'
+    'nv_inference_request_duration', 'nv_inference_queue_duration',
+    'nv_inference_compute_input_duration',
+    'nv_inference_compute_infer_duration',
+    'nv_inference_compute_output_duration'
 ]
 INF_SUMMARY_PATTERNS = [
-  'nv_inference_request_summary',
-  'nv_inference_queue_summary',
-  'nv_inference_compute_input_summary',
-  'nv_inference_compute_infer_summary',
-  'nv_inference_compute_output_summary'
+    'nv_inference_request_summary', 'nv_inference_queue_summary',
+    'nv_inference_compute_input_summary', 'nv_inference_compute_infer_summary',
+    'nv_inference_compute_output_summary'
 ]
 CACHE_COUNTER_PATTERNS = [
-  'nv_cache_num_hits_per_model',
-  'nv_cache_num_misses_per_model',
-  'nv_cache_hit_duration_per_model',
-  'nv_cache_miss_duration_per_model'
-]
-CACHE_SUMMARY_PATTERNS = [
-  'nv_cache_hit_summary',
-  'nv_cache_miss_summary'
+    'nv_cache_num_hits_per_model', 'nv_cache_num_misses_per_model',
+    'nv_cache_hit_duration_per_model', 'nv_cache_miss_duration_per_model'
 ]
+CACHE_SUMMARY_PATTERNS = ['nv_cache_hit_summary', 'nv_cache_miss_summary']
+
 
 class MetricsTest(tu.TestResultCollector):
+
     def _get_metrics(self):
         metrics_url = "http://localhost:8002/metrics"
         r = requests.get(metrics_url)
@@ -129,5 +123,6 @@ def test_inf_summaries_exist_with_cache(self):
         for metric in bad_patterns:
             self.assertNotIn(metric, metrics)
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/qa/L0_model_namespacing/python_addsub/__init__.py b/qa/L0_model_namespacing/python_addsub/__init__.py
index 2e122f1cd3..e14880ceba 100644
--- a/qa/L0_model_namespacing/python_addsub/__init__.py
+++ b/qa/L0_model_namespacing/python_addsub/__init__.py
@@ -78,13 +78,12 @@ def execute(self, requests):
         """ This function is called on inference request.
         """
 
-                
         responses = []
         for request in requests:
             in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
             in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
-            responses.append(
-                pb_utils.InferenceResponse(self.addsub(in_0, in_1)))
+            responses.append(pb_utils.InferenceResponse(self.addsub(in_0,
+                                                                    in_1)))
         return responses
 
     def addsub(self, in_0, in_1):
@@ -97,7 +96,7 @@ def addsub(self, in_0, in_1):
                             in_0.as_numpy() - in_1.as_numpy())
 
         out_tensor_0 = pb_utils.Tensor("OUTPUT0",
-                                        out_0.astype(self.output0_dtype))
+                                       out_0.astype(self.output0_dtype))
         out_tensor_1 = pb_utils.Tensor("OUTPUT1",
-                                        out_1.astype(self.output1_dtype))
+                                       out_1.astype(self.output1_dtype))
         return [out_tensor_0, out_tensor_1]
diff --git a/qa/L0_model_namespacing/python_subadd/__init__.py b/qa/L0_model_namespacing/python_subadd/__init__.py
index a53bab0da7..6d38542bf0 100644
--- a/qa/L0_model_namespacing/python_subadd/__init__.py
+++ b/qa/L0_model_namespacing/python_subadd/__init__.py
@@ -77,13 +77,13 @@ def initialize(self, args):
     def execute(self, requests):
         """ This function is called on inference request.
         """
-                
+
         responses = []
         for request in requests:
             in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
             in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
-            responses.append(
-                pb_utils.InferenceResponse(self.subadd(in_0, in_1)))
+            responses.append(pb_utils.InferenceResponse(self.subadd(in_0,
+                                                                    in_1)))
         return responses
 
     def subadd(self, in_0, in_1):
@@ -96,7 +96,7 @@ def subadd(self, in_0, in_1):
                             in_0.as_numpy() + in_1.as_numpy())
 
         out_tensor_0 = pb_utils.Tensor("OUTPUT0",
-                                        out_0.astype(self.output0_dtype))
+                                       out_0.astype(self.output0_dtype))
         out_tensor_1 = pb_utils.Tensor("OUTPUT1",
-                                        out_1.astype(self.output1_dtype))
+                                       out_1.astype(self.output1_dtype))
         return [out_tensor_0, out_tensor_1]
diff --git a/qa/L0_model_namespacing/test.py b/qa/L0_model_namespacing/test.py
index ff38918854..9de6ac749c 100644
--- a/qa/L0_model_namespacing/test.py
+++ b/qa/L0_model_namespacing/test.py
@@ -43,6 +43,7 @@
 # Test utilities
 #
 
+
 # Checker to perform inference on given model, expecting model to have
 # [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
 # OUTPUT0 = INPUT0 + INPUT1
@@ -56,9 +57,11 @@ def __init__(self, checker_client=None):
         if checker_client is None:
             import tritonclient.http as checker_client
         if "http" in checker_client.__name__:
-            self.client_ = checker_client.InferenceServerClient("localhost:8000")
+            self.client_ = checker_client.InferenceServerClient(
+                "localhost:8000")
         else:
-            self.client_ = checker_client.InferenceServerClient("localhost:8001")
+            self.client_ = checker_client.InferenceServerClient(
+                "localhost:8001")
 
         # Create infer input tensors
         self.inputs_ = []
@@ -70,37 +73,45 @@ def __init__(self, checker_client=None):
         self.inputs_[0].set_data_from_numpy(input_data)
         self.inputs_[1].set_data_from_numpy(input_data)
         self.expected_outputs_ = {
-          "add" : (input_data + input_data),
-          "sub" : (input_data - input_data)
+            "add": (input_data + input_data),
+            "sub": (input_data - input_data)
         }
-        
+
     def infer(self, model):
         res = self.client_.infer(model, self.inputs_)
-        np.testing.assert_allclose(res.as_numpy('OUTPUT0'), self.expected_outputs_["add"])
-        np.testing.assert_allclose(res.as_numpy('OUTPUT1'), self.expected_outputs_["sub"])
+        np.testing.assert_allclose(res.as_numpy('OUTPUT0'),
+                                   self.expected_outputs_["add"])
+        np.testing.assert_allclose(res.as_numpy('OUTPUT1'),
+                                   self.expected_outputs_["sub"])
+
 
 # Checker to perform inference on given model, expecting model to have
 # [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
 # OUTPUT0 = INPUT0 - INPUT1
 # OUTPUT1 = INPUT0 + INPUT1
 class SubAddChecker(AddSubChecker):
+
     def infer(self, model):
         res = self.client_.infer(model, self.inputs_)
-        np.testing.assert_allclose(res.as_numpy('OUTPUT0'), self.expected_outputs_["sub"])
-        np.testing.assert_allclose(res.as_numpy('OUTPUT1'), self.expected_outputs_["add"])
+        np.testing.assert_allclose(res.as_numpy('OUTPUT0'),
+                                   self.expected_outputs_["sub"])
+        np.testing.assert_allclose(res.as_numpy('OUTPUT1'),
+                                   self.expected_outputs_["add"])
+
 
 #
 # Test suites and cases
 #
 
+
 class ModelNamespacePoll(tu.TestResultCollector):
+
     def setUp(self):
         self.addsub_ = AddSubChecker()
         self.subadd_ = SubAddChecker()
         # For other server interaction
         self.client_ = httpclient.InferenceServerClient("localhost:8000")
 
-
     def check_health(self, expect_live=True, expect_ready=True):
         self.assertEqual(self.client_.is_server_live(), expect_live)
         self.assertEqual(self.client_.is_server_ready(), expect_ready)
@@ -126,16 +137,20 @@ def test_duplication(self):
         self.check_health()
 
         # infer check
-        for model in ["simple_addsub",]:
+        for model in [
+                "simple_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["simple_subadd",]:
+        for model in [
+                "simple_subadd",
+        ]:
             self.subadd_.infer(model)
-            
+
         # error check
         try:
             self.addsub_.infer("composing_model")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
@@ -149,16 +164,20 @@ def test_ensemble_duplication(self):
         self.check_health()
 
         # infer
-        for model in ["composing_addsub",]:
+        for model in [
+                "composing_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["composing_subadd",]:
+        for model in [
+                "composing_subadd",
+        ]:
             self.subadd_.infer(model)
 
         # error check
         try:
             self.addsub_.infer("simple_ensemble")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
@@ -172,7 +191,8 @@ def test_dynamic_resolution(self):
         #    same as before the removal.
         self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
         td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
-        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
+        composing_before_path = os.path.join(td, "addsub_repo",
+                                             "composing_model")
         composing_after_path = os.path.join(td, "composing_model")
 
         self.check_health()
@@ -183,27 +203,32 @@ def test_dynamic_resolution(self):
         # infer
         for model in ["simple_subadd", "simple_addsub", "composing_model"]:
             self.subadd_.infer(model)
-  
+
         # step 2.
         shutil.move(composing_after_path, composing_before_path)
         time.sleep(5)
 
         # infer
-        for model in ["simple_addsub",]:
+        for model in [
+                "simple_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["simple_subadd",]:
+        for model in [
+                "simple_subadd",
+        ]:
             self.subadd_.infer(model)
 
         # error check
         try:
             self.addsub_.infer("composing_model")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
 
 class ModelNamespaceExplicit(tu.TestResultCollector):
+
     def setUp(self):
         self.addsub_ = AddSubChecker()
         self.subadd_ = SubAddChecker()
@@ -241,16 +266,20 @@ def test_duplication(self):
             self.client_.load_model(model)
 
         # infer
-        for model in ["simple_addsub",]:
+        for model in [
+                "simple_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["simple_subadd",]:
+        for model in [
+                "simple_subadd",
+        ]:
             self.subadd_.infer(model)
 
         # error check
         try:
             self.addsub_.infer("composing_model")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
@@ -265,18 +294,22 @@ def test_ensemble_duplication(self):
         # load ensembles, cascadingly load composing model
         for model in ["simple_ensemble"]:
             self.client_.load_model(model)
-        
+
         # infer
-        for model in ["composing_addsub",]:
+        for model in [
+                "composing_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["composing_subadd",]:
+        for model in [
+                "composing_subadd",
+        ]:
             self.subadd_.infer(model)
 
         # error check
         try:
             self.addsub_.infer("simple_ensemble")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
@@ -290,7 +323,8 @@ def test_dynamic_resolution(self):
         #    same as before the removal.
         self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
         td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
-        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
+        composing_before_path = os.path.join(td, "addsub_repo",
+                                             "composing_model")
         composing_after_path = os.path.join(td, "composing_model")
 
         self.check_health()
@@ -303,25 +337,31 @@ def test_dynamic_resolution(self):
         # infer
         for model in ["simple_subadd", "simple_addsub", "composing_model"]:
             self.subadd_.infer(model)
-  
+
         # step 2.
         shutil.move(composing_after_path, composing_before_path)
         # Explicitly load one of the ensembel, should still trigger cascading
         # (re-)load
-        for model in ["simple_addsub", ]:
+        for model in [
+                "simple_addsub",
+        ]:
             self.client_.load_model(model)
 
         # infer
-        for model in ["simple_addsub",]:
+        for model in [
+                "simple_addsub",
+        ]:
             self.addsub_.infer(model)
-        for model in ["simple_subadd",]:
+        for model in [
+                "simple_subadd",
+        ]:
             self.subadd_.infer(model)
 
         # error check
         try:
             self.addsub_.infer("composing_model")
-            self.assertTrue(False,
-                            "expected error for inferring ambiguous named model")
+            self.assertTrue(
+                False, "expected error for inferring ambiguous named model")
         except InferenceServerException as ex:
             self.assertIn("ambiguity", ex.message())
 
diff --git a/qa/L0_model_queue/model_queue_test.py b/qa/L0_model_queue/model_queue_test.py
index 5fef013321..e0875205ff 100644
--- a/qa/L0_model_queue/model_queue_test.py
+++ b/qa/L0_model_queue/model_queue_test.py
@@ -412,7 +412,8 @@ def test_max_priority_levels(self):
                                  kwargs=trial))
             threads.append(
                 threading.Thread(target=self.check_response,
-                                 args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0, (15000, 10000)),
+                                 args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0,
+                                       (15000, 10000)),
                                  kwargs=trial))
             threads.append(
                 threading.Thread(target=self.check_response,
diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
index b3fa2a62be..39f5bfc8d4 100644
--- a/qa/L0_model_update/instance_update_test.py
+++ b/qa/L0_model_update/instance_update_test.py
@@ -63,14 +63,18 @@ def __infer(self, batching=False):
     def __concurrent_infer(self, concurrency=4, batching=False):
         pool = concurrent.futures.ThreadPoolExecutor()
         stop = [False]
+
         def repeat_infer():
             while not stop[0]:
                 self.__infer(batching)
+
         infer_threads = [pool.submit(repeat_infer) for i in range(concurrency)]
+
         def stop_infer():
             stop[0] = True
             [t.result() for t in infer_threads]
             pool.shutdown()
+
         return stop_infer
 
     def __check_count(self, kind, expected_count, poll=False):
@@ -382,10 +386,12 @@ def test_instance_resource_increase(self):
         # possibly not updated to the larger resource requirement.
         infer_count = 8
         infer_complete = [False for i in range(infer_count)]
+
         def infer():
             for i in range(infer_count):
                 self.__infer()
                 infer_complete[i] = True
+
         with concurrent.futures.ThreadPoolExecutor() as pool:
             infer_thread = pool.submit(infer)
             time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
diff --git a/qa/L0_sagemaker/sagemaker_multi_model_test.py b/qa/L0_sagemaker/sagemaker_multi_model_test.py
index 42847f1014..06cd48edd7 100644
--- a/qa/L0_sagemaker/sagemaker_multi_model_test.py
+++ b/qa/L0_sagemaker/sagemaker_multi_model_test.py
@@ -282,7 +282,10 @@ def test_sm_5_model_unload(self):
     def test_sm_6_ensemble_model(self):
         # Load ensemble model
         request_body = {"model_name": self.model3_name, "url": self.model3_url}
-        headers = {"Content-Type": "application/json", "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}"}
+        headers = {
+            "Content-Type": "application/json",
+            "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}"
+        }
         r = requests.post(self.url_mme_,
                           data=json.dumps(request_body),
                           headers=headers)
@@ -326,7 +329,6 @@ def test_sm_6_ensemble_model(self):
         self.assertEqual(
             r.status_code, 200,
             "Expected status code 200, received {}".format(r.status_code))
-        
 
 
 if __name__ == "__main__":
diff --git a/qa/L0_sdk/grpc_test.cc b/qa/L0_sdk/grpc_test.cc
index 09fe5bbc51..3f45e4ae25 100644
--- a/qa/L0_sdk/grpc_test.cc
+++ b/qa/L0_sdk/grpc_test.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <iostream>
+
 #include "grpc_client.h"
 
 namespace tc = triton::client;
diff --git a/qa/L0_sdk/http_test.cc b/qa/L0_sdk/http_test.cc
index 2c8e231fb2..0b2a4da597 100644
--- a/qa/L0_sdk/http_test.cc
+++ b/qa/L0_sdk/http_test.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <iostream>
+
 #include "http_client.h"
 
 namespace tc = triton::client;
diff --git a/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
index 8c4b590ddc..3f2eeeaa40 100644
--- a/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
+++ b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
@@ -33,10 +33,12 @@
 import test_util as tu
 import tritonclient.http as client
 
+
 class TrtDataDependentShapeTest(tu.TestResultCollector):
+
     def setUp(self):
-        self.triton_client = client.InferenceServerClient(
-            "localhost:8000", verbose=True)
+        self.triton_client = client.InferenceServerClient("localhost:8000",
+                                                          verbose=True)
 
     def test_fixed(self):
         model_name = "plan_nobatch_nonzero_fixed"
@@ -47,19 +49,17 @@ def test_fixed(self):
         inputs.append(client.InferInput('INPUT', [4, 4], "INT32"))
         inputs[-1].set_data_from_numpy(input_np)
 
-        results = self.triton_client.infer(model_name=model_name,
-                                           inputs=inputs)
+        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
         # Validate the results by comparing with precomputed values.
         output_np = results.as_numpy('OUTPUT')
         self.assertTrue(
             np.array_equal(output_np, expected_output_np),
-            "OUTPUT expected: {}, got {}".format(expected_output_np,
-                                                  output_np))
+            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np))
 
     def test_dynamic(self):
         model_name = "plan_nobatch_nonzero_dynamic"
         input_data = []
-        for i in range(20*16):
+        for i in range(20 * 16):
             input_data.append(i if (i % 2) == 0 else 0)
         input_np = np.array(input_data, dtype=np.int32).reshape((20, 16))
         expected_output_np = np.nonzero(input_np)
@@ -68,14 +68,12 @@ def test_dynamic(self):
         inputs.append(client.InferInput('INPUT', [20, 16], "INT32"))
         inputs[-1].set_data_from_numpy(input_np)
 
-        results = self.triton_client.infer(model_name=model_name,
-                                           inputs=inputs)
+        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
         # Validate the results by comparing with precomputed values.
         output_np = results.as_numpy('OUTPUT')
         self.assertTrue(
             np.array_equal(output_np, expected_output_np),
-            "OUTPUT expected: {}, got {}".format(expected_output_np,
-                                                  output_np))
+            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np))
 
 
 if __name__ == '__main__':
diff --git a/qa/L0_trt_error_propagation/trt_error_propagation_test.py b/qa/L0_trt_error_propagation/trt_error_propagation_test.py
index 300f8b9705..69c7ecaa28 100644
--- a/qa/L0_trt_error_propagation/trt_error_propagation_test.py
+++ b/qa/L0_trt_error_propagation/trt_error_propagation_test.py
@@ -47,9 +47,8 @@ def test_invalid_trt_model(self):
             "Internal Error "
         ]
         for expected_msg_part in expected_msg_parts:
-            self.assertIn(
-                expected_msg_part, err_msg,
-                "Cannot find an expected part of error message")
+            self.assertIn(expected_msg_part, err_msg,
+                          "Cannot find an expected part of error message")
             _, err_msg = err_msg.split(expected_msg_part)
 
     def test_invalid_trt_model_autocomplete(self):
diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py
index b1d2d9105c..f5d84995e0 100755
--- a/qa/common/check_copyright.py
+++ b/qa/common/check_copyright.py
@@ -32,8 +32,8 @@
 import pathlib
 
 FLAGS = None
-SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png', 'log', 'preprocessed',
-             'jmx', 'gz', 'json', 'pdf', 'so', 'onnx', 'svg')
+SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png', 'log', 'preprocessed', 'jmx', 'gz',
+             'json', 'pdf', 'so', 'onnx', 'svg')
 REPO_PATH_FROM_THIS_FILE = '../..'
 SKIP_PATHS = (
     'build', 'deploy/gke-marketplace-app/.gitignore',
diff --git a/qa/common/gen_qa_reshape_models.py b/qa/common/gen_qa_reshape_models.py
index b881410961..b6ff48003e 100644
--- a/qa/common/gen_qa_reshape_models.py
+++ b/qa/common/gen_qa_reshape_models.py
@@ -182,14 +182,11 @@ def create_tf_modelfile(create_savedmodel, models_dir, model_version, max_batch,
             tf.identity(tin, name=output_name)
         else:
             if max_batch == 0:
-                tf.reshape(tin,
-                                     output_shapes[io_num],
-                                     name=output_name)
+                tf.reshape(tin, output_shapes[io_num], name=output_name)
             else:
                 tf.reshape(tin, [
                     -1,
-                ] + output_shapes[io_num],
-                                     name=output_name)
+                ] + output_shapes[io_num], name=output_name)
 
     # Use model name based on input/output count and non-batching variant
     if create_savedmodel:
diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py
index 48336237f8..adf02684cc 100644
--- a/qa/common/gen_qa_trt_data_dependent_shape.py
+++ b/qa/common/gen_qa_trt_data_dependent_shape.py
@@ -30,6 +30,7 @@
 import tensorrt as trt
 import test_util as tu
 
+
 def np_to_model_dtype(np_dtype):
     if np_dtype == bool:
         return "TYPE_BOOL"
@@ -69,6 +70,7 @@ def np_to_trt_dtype(np_dtype):
         return trt.float32
     return None
 
+
 # The 'nonzero' model that we use for data dependent shape is naturally
 # not support batching, because the layer output is not trivially separable
 # based on the request batch size.
@@ -117,7 +119,7 @@ def create_data_dependent_modelfile(models_dir,
     config = builder.create_builder_config()
     config.add_optimization_profile(profile)
     config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
-    
+
     # serialized model
     engine_bytes = builder.build_serialized_network(network, config)
 
@@ -130,8 +132,9 @@ def create_data_dependent_modelfile(models_dir,
     with open(model_version_dir + "/model.plan", "wb") as f:
         f.write(engine_bytes)
 
+
 def create_data_dependent_modelconfig(models_dir,
-                                    model_name,
+                                      model_name,
                                       input_shape,
                                       input_dtype=np.int32):
     config_dir = models_dir + "/" + model_name
@@ -153,9 +156,8 @@ def create_data_dependent_modelconfig(models_dir,
     dims: [ {} ]
    }}
 ]
-'''.format(model_name,
-           np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape),
-           np_to_model_dtype(np.int32),
+'''.format(model_name, np_to_model_dtype(input_dtype),
+           tu.shape_to_dims_str(input_shape), np_to_model_dtype(np.int32),
            tu.shape_to_dims_str((len(input_shape), -1)))
 
     try:
@@ -176,10 +178,13 @@ def create_data_dependent_modelconfig(models_dir,
     FLAGS, unparsed = parser.parse_known_args()
 
     # Fixed input shape
-    create_data_dependent_modelfile(FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4))
-    create_data_dependent_modelconfig(FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4))
+    create_data_dependent_modelfile(FLAGS.models_dir,
+                                    "plan_nobatch_nonzero_fixed", (4, 4))
+    create_data_dependent_modelconfig(FLAGS.models_dir,
+                                      "plan_nobatch_nonzero_fixed", (4, 4))
 
     # Dynamic input shape
-    create_data_dependent_modelfile(FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1))
-    create_data_dependent_modelconfig(FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1))
-
+    create_data_dependent_modelfile(FLAGS.models_dir,
+                                    "plan_nobatch_nonzero_dynamic", (-1, -1))
+    create_data_dependent_modelconfig(FLAGS.models_dir,
+                                      "plan_nobatch_nonzero_dynamic", (-1, -1))
diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
index 86e2621b45..7520af80ee 100644
--- a/qa/common/gen_qa_trt_plugin_models.py
+++ b/qa/common/gen_qa_trt_plugin_models.py
@@ -37,6 +37,7 @@
 trt.init_libnvinfer_plugins(TRT_LOGGER, '')
 PLUGIN_CREATORS = trt.get_plugin_registry().plugin_creator_list
 
+
 def np_to_model_dtype(np_dtype):
     if np_dtype == bool:
         return "TYPE_BOOL"
diff --git a/qa/common/infer_test.py b/qa/common/infer_test.py
index 3976d3cfe4..cd954f250e 100644
--- a/qa/common/infer_test.py
+++ b/qa/common/infer_test.py
@@ -37,9 +37,10 @@
 np_dtype_string = np.dtype(object)
 
 # Allow caller to setup specific set of backends to test
-DEFAULT_BACKENDS="graphdef savedmodel plan onnx libtorch"
+DEFAULT_BACKENDS = "graphdef savedmodel plan onnx libtorch"
 TEST_BACKENDS = os.environ.get("BACKENDS", DEFAULT_BACKENDS).split()
 
+
 class InferTest(tu.TestResultCollector):
 
     def _full_exact(self, input_dtype, output0_dtype, output1_dtype,
@@ -80,7 +81,6 @@ def _infer_exact_helper(tester,
                                use_streaming=use_streaming,
                                correlation_id=correlation_id)
 
-
         input_size = 16
 
         if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype,
@@ -107,7 +107,8 @@ def _infer_exact_helper(tester,
                 else:
                     shape = (input_size,)
                 _infer_exact_helper(self,
-                                    'plan', shape,
+                                    'plan',
+                                    shape,
                                     8,
                                     input_dtype,
                                     output0_dtype,
diff --git a/qa/python_models/bls/model.py b/qa/python_models/bls/model.py
index 995d3b2b14..cf7946e1ec 100644
--- a/qa/python_models/bls/model.py
+++ b/qa/python_models/bls/model.py
@@ -69,6 +69,7 @@ def bls_add_sub(_=None):
 
     return True
 
+
 def bls_square(_=None):
     input0_np = np.random.randint(16, size=1, dtype=np.int32)
     input0 = pb_utils.Tensor('IN', input0_np)
@@ -85,7 +86,8 @@ def bls_square(_=None):
                 return False
 
             if len(infer_response.output_tensors()) > 0:
-                output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT')
+                output0 = pb_utils.get_output_tensor_by_name(
+                    infer_response, 'OUT')
                 if output0 is None:
                     return False
 
@@ -96,11 +98,12 @@ def bls_square(_=None):
 
             response_count += 1
 
-    if not np.all(input0.as_numpy() == response_count-1):
+    if not np.all(input0.as_numpy() == response_count - 1):
         return False
 
     return True
 
+
 def bls_libtorch(model_name, result_device):
     shape = [16]
     input0_np = np.random.rand(*shape).astype(np.float32)
@@ -116,11 +119,11 @@ def bls_libtorch(model_name, result_device):
             pb_utils.TRITONSERVER_MEMORY_GPU, 0)
 
     infer_request = pb_utils.InferenceRequest(
-            model_name=model_name,
-            model_version=1,
-            inputs=[input0, input1],
-            requested_output_names=['OUTPUT__0', 'OUTPUT__1'],
-            preferred_memory=preferred_memory)
+        model_name=model_name,
+        model_version=1,
+        inputs=[input0, input1],
+        requested_output_names=['OUTPUT__0', 'OUTPUT__1'],
+        preferred_memory=preferred_memory)
 
     infer_response = infer_request.exec()
     if infer_response.has_error():
@@ -146,8 +149,10 @@ def bls_libtorch(model_name, result_device):
     else:
         if output0.is_cpu() or output1.is_cpu():
             return False
-        output0 = from_dlpack(output0.to_dlpack()).to('cpu').cpu().detach().numpy()
-        output1 = from_dlpack(output1.to_dlpack()).to('cpu').cpu().detach().numpy()
+        output0 = from_dlpack(
+            output0.to_dlpack()).to('cpu').cpu().detach().numpy()
+        output1 = from_dlpack(
+            output1.to_dlpack()).to('cpu').cpu().detach().numpy()
 
         if not np.all(output0 == expected_output_0):
             return False
@@ -156,6 +161,7 @@ def bls_libtorch(model_name, result_device):
 
     return True
 
+
 class PBBLSTest(unittest.TestCase):
 
     def setUp(self):
@@ -605,13 +611,11 @@ def test_timeout(self):
 
         # Expect timeout error
         self.assertTrue(infer_response.has_error())
-        self.assertIn(
-            "Request timeout expired",
-            infer_response.error().message())
+        self.assertIn("Request timeout expired",
+                      infer_response.error().message())
         self.assertTrue(len(infer_response.output_tensors()) == 0)
 
-    def _test_response_iterator_square(self,
-                                       expected_output_cnt,
+    def _test_response_iterator_square(self, expected_output_cnt,
                                        expected_output_value,
                                        response_iterator):
         response_count = 0
@@ -620,7 +624,8 @@ def _test_response_iterator_square(self,
         for infer_response in response_iterator:
             self.assertFalse(infer_response.has_error())
             if len(infer_response.output_tensors()) > 0:
-                output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT')
+                output0 = pb_utils.get_output_tensor_by_name(
+                    infer_response, 'OUT')
                 self.assertIsNotNone(output0)
                 self.assertEqual(expected_output_value, output0.as_numpy())
 
@@ -671,8 +676,8 @@ def test_response_iterator(self):
             response_count = 0
             for infer_response in infer_responses:
                 self.assertFalse(infer_response.has_error())
-                output0 = pb_utils.get_output_tensor_by_name(infer_response,
-                                                             'OUT')
+                output0 = pb_utils.get_output_tensor_by_name(
+                    infer_response, 'OUT')
                 self.assertIsNotNone(output0)
                 self.assertEqual(response_value, output0.as_numpy())
 
diff --git a/qa/python_models/bls_async/model.py b/qa/python_models/bls_async/model.py
index 109f1df644..4158c82e9d 100644
--- a/qa/python_models/bls_async/model.py
+++ b/qa/python_models/bls_async/model.py
@@ -97,7 +97,6 @@ def verify_square_results(input0, infer_responses):
                   flush=True)
             return False
 
-        
         if len(infer_response.output_tensors()) > 0:
             output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT')
 
@@ -118,8 +117,9 @@ def verify_square_results(input0, infer_responses):
 
         response_count += 1
 
-    if not np.all(input0 == response_count-1):
-        print('Expected {} responses, got {}'.format(input0, response_count-1))
+    if not np.all(input0 == response_count - 1):
+        print('Expected {} responses, got {}'.format(input0,
+                                                     response_count - 1))
         return False
 
     return True
diff --git a/qa/python_models/bls_finalize_error/model.py b/qa/python_models/bls_finalize_error/model.py
index 50baf4d9a8..f3db1d6bbe 100644
--- a/qa/python_models/bls_finalize_error/model.py
+++ b/qa/python_models/bls_finalize_error/model.py
@@ -27,7 +27,9 @@
 import triton_python_backend_utils as pb_utils
 import numpy as np
 
+
 class TritonPythonModel:
+
     def initialize(self, args):
         pass
 
@@ -38,7 +40,8 @@ def finalize(self):
         print('Cleaning up...')
         input0_np = np.random.randint(3, size=1, dtype=np.int32)
         input0 = pb_utils.Tensor('IN', input0_np)
-        infer_request = pb_utils.InferenceRequest(model_name='square_int32',
-                                                inputs=[input0],
-                                                requested_output_names=['OUT'])
+        infer_request = pb_utils.InferenceRequest(
+            model_name='square_int32',
+            inputs=[input0],
+            requested_output_names=['OUT'])
         infer_responses = infer_request.exec(decoupled=True)
diff --git a/qa/python_models/bls_init_error/model.py b/qa/python_models/bls_init_error/model.py
index f79b144bcb..f95ce4eff8 100644
--- a/qa/python_models/bls_init_error/model.py
+++ b/qa/python_models/bls_init_error/model.py
@@ -27,13 +27,16 @@
 import triton_python_backend_utils as pb_utils
 import numpy as np
 
+
 class TritonPythonModel:
+
     def initialize(self, args):
         input0_np = np.random.randint(3, size=1, dtype=np.int32)
         input0 = pb_utils.Tensor('IN', input0_np)
-        infer_request = pb_utils.InferenceRequest(model_name='square_int32',
-                                                inputs=[input0],
-                                                requested_output_names=['OUT'])
+        infer_request = pb_utils.InferenceRequest(
+            model_name='square_int32',
+            inputs=[input0],
+            requested_output_names=['OUT'])
         infer_responses = infer_request.exec(decoupled=True)
 
     def execute(self, requests):
diff --git a/qa/python_models/bls_undefined/model.py b/qa/python_models/bls_undefined/model.py
index ca66c77b13..4b52c6e54f 100644
--- a/qa/python_models/bls_undefined/model.py
+++ b/qa/python_models/bls_undefined/model.py
@@ -26,10 +26,9 @@
 
 
 class TritonPythonModel:
+
     def execute(self, requests):
         undefined_variable
 
     def finalize(self):
         print('Cleaning up...')
-
-
diff --git a/qa/python_models/dlpack_test/model.py b/qa/python_models/dlpack_test/model.py
index a850afc1ce..2beab4af7c 100644
--- a/qa/python_models/dlpack_test/model.py
+++ b/qa/python_models/dlpack_test/model.py
@@ -246,7 +246,7 @@ def test_cuda_multi_gpu(self):
                 self.assertTrue(cp.cuda.Stream(null=True).done)
             cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
 
-        with cp.cuda.Device(1):       
+        with cp.cuda.Device(1):
             self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
 
         self.assertFalse(pb_tensor.is_cpu())
@@ -279,8 +279,8 @@ def test_cuda_blocking_stream_multi_gpu(self):
                 # all compute work
                 self.assertTrue(blocking_stream.done)
             cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
-            
-        with cp.cuda.Device(1):    
+
+        with cp.cuda.Device(1):
             self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
 
         self.assertFalse(pb_tensor.is_cpu())
@@ -317,8 +317,8 @@ def test_cuda_non_blocking_stream_multi_gpu(self):
                 # all compute work
                 self.assertTrue(non_blocking_stream.done)
             cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
-            
-        with cp.cuda.Device(2):   
+
+        with cp.cuda.Device(2):
             self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
 
         self.assertFalse(pb_tensor.is_cpu())
diff --git a/qa/python_models/execute_return_error/model.py b/qa/python_models/execute_return_error/model.py
index 85196c15d8..29367d4a0d 100644
--- a/qa/python_models/execute_return_error/model.py
+++ b/qa/python_models/execute_return_error/model.py
@@ -25,7 +25,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-
 class TritonPythonModel:
 
     def initialize(self, args):
diff --git a/qa/python_models/ground_truth/model.py b/qa/python_models/ground_truth/model.py
index 1abe39e5a0..ee04c3a073 100644
--- a/qa/python_models/ground_truth/model.py
+++ b/qa/python_models/ground_truth/model.py
@@ -35,7 +35,7 @@ def execute(self, requests):
         Mock Model that uses the input data to determine how long to wait
         before returning identity data
         """
-        assert(len(requests) == 1)
+        assert (len(requests) == 1)
         delay = 0
         request = requests[0]
         responses = []
diff --git a/qa/python_models/identity_fp32_logging/model.py b/qa/python_models/identity_fp32_logging/model.py
index 93aa0df998..9bc24ce488 100644
--- a/qa/python_models/identity_fp32_logging/model.py
+++ b/qa/python_models/identity_fp32_logging/model.py
@@ -36,7 +36,7 @@ def initialize(self, args):
         logger.log_warn("Initialize-Warning Msg!")
         logger.log_error("Initialize-Error Msg!")
         logger.log_verbose("Initialize-Verbose Msg!")
-        
+
     def execute(self, requests):
         """
         Identity model in Python backend.
diff --git a/qa/python_models/identity_fp32_timeout/model.py b/qa/python_models/identity_fp32_timeout/model.py
index 5e12aa87d8..7235e33d83 100644
--- a/qa/python_models/identity_fp32_timeout/model.py
+++ b/qa/python_models/identity_fp32_timeout/model.py
@@ -27,6 +27,7 @@
 import triton_python_backend_utils as pb_utils
 import time
 
+
 class TritonPythonModel:
 
     def execute(self, requests):
diff --git a/qa/python_models/init_args/model.py b/qa/python_models/init_args/model.py
index a3044a3457..0b6b16800e 100644
--- a/qa/python_models/init_args/model.py
+++ b/qa/python_models/init_args/model.py
@@ -28,6 +28,7 @@
 import numpy as np
 import triton_python_backend_utils as pb_utils
 
+
 def check_init_args(args):
     expected_args = {
         'model_name':
@@ -39,7 +40,8 @@ def check_init_args(args):
         'model_instance_device_id':
             '0',
         'model_repository':
-            os.getenv("TRITON_DIR", "/opt/tironserver") + '/qa/L0_backend_python/models/init_args',
+            os.getenv("TRITON_DIR", "/opt/tironserver") +
+            '/qa/L0_backend_python/models/init_args',
         'model_version':
             '1'
     }
diff --git a/qa/python_models/model_init_del/util.py b/qa/python_models/model_init_del/util.py
index 6b77dde806..10b9df724a 100644
--- a/qa/python_models/model_init_del/util.py
+++ b/qa/python_models/model_init_del/util.py
@@ -33,6 +33,7 @@
 # Helper functions for reading/writing state to disk
 #
 
+
 def __get_number(filename):
     full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
     try:
@@ -43,6 +44,7 @@ def __get_number(filename):
         txt = "0"
     return int(txt)
 
+
 def __store_number(filename, number):
     full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
     txt = str(number)
@@ -50,6 +52,7 @@ def __store_number(filename, number):
         fcntl.lockf(f, fcntl.LOCK_EX)
         f.write(txt)
 
+
 def __inc_number(filename):
     full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
     try:
@@ -66,49 +69,60 @@ def __inc_number(filename):
         __store_number(filename, number)
     return number
 
+
 #
 # Functions for communicating initialize and finalize count between the model
 # and test
 #
 
+
 def __get_count_filename(kind):
     if kind != "initialize" and kind != "finalize":
         raise KeyError("Invalid count kind: " + str(kind))
     filename = __model_name + "_" + kind + "_count.txt"
     return filename
 
+
 def get_count(kind):
     return __get_number(__get_count_filename(kind))
 
+
 def inc_count(kind):
     return __inc_number(__get_count_filename(kind))
 
+
 def reset_count(kind):
     count = 0
     __store_number(__get_count_filename(kind), count)
     return count
 
+
 #
 # Functions for communicating varies of delay (in seconds) to the model
 #
 
+
 def __get_delay_filename(kind):
     if kind != "initialize" and kind != "infer":
         raise KeyError("Invalid delay kind: " + str(kind))
     filename = __model_name + "_" + kind + "_delay.txt"
     return filename
 
+
 def get_delay(kind):
     return __get_number(__get_delay_filename(kind))
 
+
 def set_delay(kind, delay):
     __store_number(__get_delay_filename(kind), delay)
     return delay
 
+
 #
 # Functions for modifying the model
 #
 
+
 def update_instance_group(instance_group_str):
     full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
     with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
@@ -122,11 +136,13 @@ def update_instance_group(instance_group_str):
         f.write(txt)
     return txt
 
+
 def update_model_file():
     full_path = os.path.join(os.path.dirname(__file__), "1", "model.py")
     with open(full_path, mode="a", encoding="utf-8", errors="strict") as f:
         f.write("\n# dummy model file update\n")
 
+
 def enable_batching():
     full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
     with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
@@ -137,6 +153,7 @@ def enable_batching():
         f.write(txt)
     return txt
 
+
 def disable_batching():
     full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
     with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
diff --git a/src/classification.cc b/src/classification.cc
index d8dab03817..2d8cd26b9e 100644
--- a/src/classification.cc
+++ b/src/classification.cc
@@ -28,6 +28,7 @@
 
 #include <algorithm>
 #include <numeric>
+
 #include "common.h"
 
 namespace triton { namespace server {
diff --git a/src/classification.h b/src/classification.h
index 27c8ba1ef6..9264baa2b0 100644
--- a/src/classification.h
+++ b/src/classification.h
@@ -27,6 +27,7 @@
 
 #include <string>
 #include <vector>
+
 #include "triton/core/tritonserver.h"
 
 namespace triton { namespace server {
diff --git a/src/command_line_parser.cc b/src/command_line_parser.cc
index e5ae96bd93..9b3470b454 100644
--- a/src/command_line_parser.cc
+++ b/src/command_line_parser.cc
@@ -345,9 +345,9 @@ TritonParser::SetupOptions()
        "finish. After the timeout expires the server exits even if inferences "
        "are still in flight."});
 
-  model_repo_options_.push_back({OPTION_MODEL_REPOSITORY, "model-store",
-                                 Option::ArgStr,
-                                 "Equivalent to --model-repository."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_REPOSITORY, "model-store", Option::ArgStr,
+       "Equivalent to --model-repository."});
   model_repo_options_.push_back(
       {OPTION_MODEL_REPOSITORY, "model-repository", Option::ArgStr,
        "Path to model repository directory. It may be specified multiple times "
@@ -407,14 +407,16 @@ TritonParser::SetupOptions()
        "same name can be served if they are in different namespace."});
 
 #if defined(TRITON_ENABLE_HTTP)
-  http_options_.push_back({OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool,
-                           "Allow the server to listen for HTTP requests."});
+  http_options_.push_back(
+      {OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool,
+       "Allow the server to listen for HTTP requests."});
   http_options_.push_back(
       {OPTION_HTTP_ADDRESS, "http-address", Option::ArgStr,
        "The address for the http server to bind to. Default is 0.0.0.0"});
-  http_options_.push_back({OPTION_HTTP_PORT, "http-port", Option::ArgInt,
-                           "The port for the server to listen on for HTTP "
-                           "requests. Default is 8000."});
+  http_options_.push_back(
+      {OPTION_HTTP_PORT, "http-port", Option::ArgInt,
+       "The port for the server to listen on for HTTP "
+       "requests. Default is 8000."});
   http_options_.push_back(
       {OPTION_REUSE_HTTP_PORT, "reuse-http-port", Option::ArgBool,
        "Allow multiple servers to listen on the same HTTP port when every "
@@ -426,19 +428,22 @@ TritonParser::SetupOptions()
        Option::ArgStr,
        "The regular expression pattern that will be used for forwarding HTTP "
        "headers as inference request parameters."});
-  http_options_.push_back({OPTION_HTTP_THREAD_COUNT, "http-thread-count",
-                           Option::ArgInt,
-                           "Number of threads handling HTTP requests."});
+  http_options_.push_back(
+      {OPTION_HTTP_THREAD_COUNT, "http-thread-count", Option::ArgInt,
+       "Number of threads handling HTTP requests."});
 #endif  // TRITON_ENABLE_HTTP
 
 #if defined(TRITON_ENABLE_GRPC)
-  grpc_options_.push_back({OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool,
-                           "Allow the server to listen for GRPC requests."});
-  grpc_options_.push_back({OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr,
-                           "The address for the grpc server to binds to. Default is 0.0.0.0"});
-  grpc_options_.push_back({OPTION_GRPC_PORT, "grpc-port", Option::ArgInt,
-                           "The port for the server to listen on for GRPC "
-                           "requests. Default is 8001."});
+  grpc_options_.push_back(
+      {OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool,
+       "Allow the server to listen for GRPC requests."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr,
+       "The address for the grpc server to binds to. Default is 0.0.0.0"});
+  grpc_options_.push_back(
+      {OPTION_GRPC_PORT, "grpc-port", Option::ArgInt,
+       "The port for the server to listen on for GRPC "
+       "requests. Default is 8001."});
   grpc_options_.push_back(
       {OPTION_REUSE_GRPC_PORT, "reuse-grpc-port", Option::ArgBool,
        "Allow multiple servers to listen on the same GRPC port when every "
@@ -536,13 +541,15 @@ TritonParser::SetupOptions()
       {OPTION_LOG_VERBOSE, "log-verbose", Option::ArgInt,
        "Set verbose logging level. Zero (0) disables verbose logging and "
        "values >= 1 enable verbose logging."});
-  logging_options_.push_back({OPTION_LOG_INFO, "log-info", Option::ArgBool,
-                              "Enable/disable info-level logging."});
-  logging_options_.push_back({OPTION_LOG_WARNING, "log-warning",
-                              Option::ArgBool,
-                              "Enable/disable warning-level logging."});
-  logging_options_.push_back({OPTION_LOG_ERROR, "log-error", Option::ArgBool,
-                              "Enable/disable error-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_INFO, "log-info", Option::ArgBool,
+       "Enable/disable info-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_WARNING, "log-warning", Option::ArgBool,
+       "Enable/disable warning-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_ERROR, "log-error", Option::ArgBool,
+       "Enable/disable error-level logging."});
   logging_options_.push_back(
       {OPTION_LOG_FORMAT, "log-format", Option::ArgStr,
        "Set the logging format. Options are \"default\" and \"ISO8601\". "
diff --git a/src/command_line_parser.h b/src/command_line_parser.h
index 0c57c6a345..ace5b6f232 100644
--- a/src/command_line_parser.h
+++ b/src/command_line_parser.h
@@ -34,6 +34,7 @@
 #include <thread>
 #include <unordered_map>
 #include <vector>
+
 #include "triton/common/logging.h"
 #include "triton/core/tritonserver.h"
 #ifdef TRITON_ENABLE_GRPC
diff --git a/src/common.cc b/src/common.cc
index fe7de36948..83fe3c6c25 100644
--- a/src/common.cc
+++ b/src/common.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "common.h"
+
 #include <algorithm>
 #include <iterator>
 
diff --git a/src/common.h b/src/common.h
index 4551d5cb3a..b7b4f845ac 100644
--- a/src/common.h
+++ b/src/common.h
@@ -29,6 +29,7 @@
 #include <sstream>
 #include <string>
 #include <vector>
+
 #include "triton/core/tritonserver.h"
 
 namespace triton { namespace server {
@@ -95,17 +96,17 @@ const std::vector<std::string> TRITON_RESERVED_REQUEST_PARAMS{
     }                                                             \
   } while (false)
 
-#define THROW_IF_ERR(EX_TYPE, X, MSG)                   \
-  do {                                                  \
-    TRITONSERVER_Error* err__ = (X);                    \
-    if (err__ != nullptr) {                             \
+#define THROW_IF_ERR(EX_TYPE, X, MSG)                     \
+  do {                                                    \
+    TRITONSERVER_Error* err__ = (X);                      \
+    if (err__ != nullptr) {                               \
       auto ex__ = (EX_TYPE)(                            \
           std::string("error: ") + (MSG) + ": " +       \
           TRITONSERVER_ErrorCodeString(err__) + " - " + \
-          TRITONSERVER_ErrorMessage(err__));            \
-      TRITONSERVER_ErrorDelete(err__);                  \
-      throw ex__;                                       \
-    }                                                   \
+          TRITONSERVER_ErrorMessage(err__)); \
+      TRITONSERVER_ErrorDelete(err__);                    \
+      throw ex__;                                         \
+    }                                                     \
   } while (false)
 
 #define IGNORE_ERR(X)                  \
diff --git a/src/data_compressor.h b/src/data_compressor.h
index d8eafb9662..e417558901 100644
--- a/src/data_compressor.h
+++ b/src/data_compressor.h
@@ -25,6 +25,9 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #pragma once
 
+#include <event2/buffer.h>
+#include <zlib.h>
+
 #include <cassert>
 #include <cstring>
 #include <iostream>
@@ -32,8 +35,6 @@
 #include <string>
 #include <vector>
 
-#include <event2/buffer.h>
-#include <zlib.h>
 #include "common.h"
 #include "triton/core/tritonserver.h"
 
diff --git a/src/grpc/grpc_server.cc b/src/grpc/grpc_server.cc
index a5b340efc2..717766a366 100644
--- a/src/grpc/grpc_server.cc
+++ b/src/grpc/grpc_server.cc
@@ -28,6 +28,7 @@
 
 #include <google/protobuf/arena.h>
 #include <grpc++/alarm.h>
+
 #include <chrono>
 #include <condition_variable>
 #include <cstdint>
@@ -38,6 +39,7 @@
 #include <queue>
 #include <sstream>
 #include <thread>
+
 #include "../classification.h"
 #include "../common.h"
 #include "grpc++/grpc++.h"
@@ -301,8 +303,8 @@ class CommonHandler : public HandlerBase {
   static std::pair<std::string, std::string> empty_restricted_key_;
 };
 
-std::pair<std::string, std::string> CommonHandler::empty_restricted_key_{"",
-                                                                         ""};
+std::pair<std::string, std::string> CommonHandler::empty_restricted_key_{
+    "", ""};
 
 CommonHandler::CommonHandler(
     const std::string& name,
@@ -2315,16 +2317,19 @@ Server::Server(
         std::to_string(keepalive_options.keepalive_time_ms_)};
     table_printer.InsertRow(row);
 
-    row = {"keepalive_timeout_ms",
-           std::to_string(keepalive_options.keepalive_timeout_ms_)};
+    row = {
+        "keepalive_timeout_ms",
+        std::to_string(keepalive_options.keepalive_timeout_ms_)};
     table_printer.InsertRow(row);
 
-    row = {"keepalive_permit_without_calls",
-           std::to_string(keepalive_options.keepalive_permit_without_calls_)};
+    row = {
+        "keepalive_permit_without_calls",
+        std::to_string(keepalive_options.keepalive_permit_without_calls_)};
     table_printer.InsertRow(row);
 
-    row = {"http2_max_pings_without_data",
-           std::to_string(keepalive_options.http2_max_pings_without_data_)};
+    row = {
+        "http2_max_pings_without_data",
+        std::to_string(keepalive_options.http2_max_pings_without_data_)};
     table_printer.InsertRow(row);
 
     row = {
@@ -2333,8 +2338,9 @@ Server::Server(
             keepalive_options.http2_min_recv_ping_interval_without_data_ms_)};
     table_printer.InsertRow(row);
 
-    row = {"http2_max_ping_strikes",
-           std::to_string(keepalive_options.http2_max_ping_strikes_)};
+    row = {
+        "http2_max_ping_strikes",
+        std::to_string(keepalive_options.http2_max_ping_strikes_)};
     table_printer.InsertRow(row);
     LOG_VERBOSE(1) << table_printer.PrintTable();
   }
diff --git a/src/grpc/grpc_server.h b/src/grpc/grpc_server.h
index b2c932a614..4bbb54594f 100644
--- a/src/grpc/grpc_server.h
+++ b/src/grpc/grpc_server.h
@@ -26,7 +26,9 @@
 #pragma once
 
 #include <grpc++/grpc++.h>
+
 #include <vector>
+
 #include "../shared_memory_manager.h"
 #include "../tracer.h"
 #include "grpc_handler.h"
diff --git a/src/grpc/grpc_utils.h b/src/grpc/grpc_utils.h
index 58e02d5cbb..1f4ad8003e 100644
--- a/src/grpc/grpc_utils.h
+++ b/src/grpc/grpc_utils.h
@@ -28,6 +28,7 @@
 #include <list>
 #include <memory>
 #include <unordered_map>
+
 #include "../classification.h"
 #include "../common.h"
 #include "../shared_memory_manager.h"
diff --git a/src/grpc/infer_handler.h b/src/grpc/infer_handler.h
index 0b87549b3e..b2ce3f13e2 100644
--- a/src/grpc/infer_handler.h
+++ b/src/grpc/infer_handler.h
@@ -28,10 +28,12 @@
 #include <grpc++/alarm.h>
 #include <grpc++/grpc++.h>
 #include <re2/re2.h>
+
 #include <condition_variable>
 #include <queue>
 #include <regex>
 #include <thread>
+
 #include "../tracer.h"
 #include "grpc_handler.h"
 #include "grpc_service.grpc.pb.h"
diff --git a/src/grpc/stream_infer_handler.cc b/src/grpc/stream_infer_handler.cc
index 03b4f209a9..268c6aafa0 100644
--- a/src/grpc/stream_infer_handler.cc
+++ b/src/grpc/stream_infer_handler.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "stream_infer_handler.h"
+
 #include <regex>
 
 namespace triton { namespace server { namespace grpc {
diff --git a/src/http_server.cc b/src/http_server.cc
index 4748d1990c..3493e620be 100644
--- a/src/http_server.cc
+++ b/src/http_server.cc
@@ -32,10 +32,12 @@
 
 #include <event2/buffer.h>
 #include <re2/re2.h>
+
 #include <algorithm>
 #include <list>
 #include <regex>
 #include <thread>
+
 #include "classification.h"
 
 #define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
@@ -1344,7 +1346,7 @@ HTTPAPIServer::HandleRepositoryControl(
           };
       std::unique_ptr<
           std::vector<TRITONSERVER_Parameter*>, decltype(param_deleter)>
-      params(new std::vector<TRITONSERVER_Parameter*>(), param_deleter);
+          params(new std::vector<TRITONSERVER_Parameter*>(), param_deleter);
       // local variables to store the decoded file content, the data must
       // be valid until TRITONSERVER_ServerLoadModelWithParameters returns.
       std::list<std::vector<char>> binary_files;
diff --git a/src/http_server.h b/src/http_server.h
index 46982d76cb..44b013fd84 100644
--- a/src/http_server.h
+++ b/src/http_server.h
@@ -27,12 +27,14 @@
 
 #include <evhtp/evhtp.h>
 #include <re2/re2.h>
+
 #include <list>
 #include <map>
 #include <memory>
 #include <string>
 #include <thread>
 #include <unordered_map>
+
 #include "common.h"
 #include "data_compressor.h"
 #include "shared_memory_manager.h"
diff --git a/src/memory_alloc.cc b/src/memory_alloc.cc
index 4b0ad6f6ec..64f61510e9 100644
--- a/src/memory_alloc.cc
+++ b/src/memory_alloc.cc
@@ -28,12 +28,14 @@
 #include <rapidjson/document.h>
 #include <rapidjson/error/en.h>
 #include <unistd.h>
+
 #include <chrono>
 #include <future>
 #include <iostream>
 #include <string>
 #include <thread>
 #include <vector>
+
 #include "common.h"
 #include "triton/core/tritonserver.h"
 
diff --git a/src/multi_server.cc b/src/multi_server.cc
index cc89000f28..d575931b58 100644
--- a/src/multi_server.cc
+++ b/src/multi_server.cc
@@ -27,6 +27,7 @@
 #include <rapidjson/document.h>
 #include <rapidjson/error/en.h>
 #include <unistd.h>
+
 #include <chrono>
 #include <cstring>
 #include <future>
@@ -35,6 +36,7 @@
 #include <thread>
 #include <unordered_map>
 #include <vector>
+
 #include "common.h"
 #include "triton/core/tritonserver.h"
 
diff --git a/src/sagemaker_server.h b/src/sagemaker_server.h
index 2ed041d101..45c6b22044 100644
--- a/src/sagemaker_server.h
+++ b/src/sagemaker_server.h
@@ -78,7 +78,8 @@ class SagemakerAPIServer : public HTTPAPIServer {
         model_path_regex_(
             R"((\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)"),
         platform_ensemble_regex_(R"(platform:(\s)*\"ensemble\")"),
-        ping_mode_(GetEnvironmentVariableOrDefault("SAGEMAKER_TRITON_PING_MODE", "ready")),
+        ping_mode_(GetEnvironmentVariableOrDefault(
+            "SAGEMAKER_TRITON_PING_MODE", "ready")),
         model_name_(GetEnvironmentVariableOrDefault(
             "SAGEMAKER_TRITON_DEFAULT_MODEL_NAME",
             "unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME")),
diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc
index 9b4ce8fc29..d4d99b1889 100644
--- a/src/shared_memory_manager.cc
+++ b/src/shared_memory_manager.cc
@@ -121,6 +121,7 @@ SharedMemoryManager::UnregisterHelper(
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <unistd.h>
+
 #include "common.h"
 #include "triton/common/logging.h"
 
diff --git a/src/shared_memory_manager.h b/src/shared_memory_manager.h
index 8b39c3115b..b282f77bc7 100644
--- a/src/shared_memory_manager.h
+++ b/src/shared_memory_manager.h
@@ -29,6 +29,7 @@
 #include <map>
 #include <memory>
 #include <mutex>
+
 #include "triton/core/tritonserver.h"
 
 #define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
diff --git a/src/simple.cc b/src/simple.cc
index b0673620d7..5a6bd3b04b 100644
--- a/src/simple.cc
+++ b/src/simple.cc
@@ -27,6 +27,7 @@
 #include <rapidjson/document.h>
 #include <rapidjson/error/en.h>
 #include <unistd.h>
+
 #include <chrono>
 #include <cstring>
 #include <future>
@@ -35,6 +36,7 @@
 #include <thread>
 #include <unordered_map>
 #include <vector>
+
 #include "common.h"
 #include "triton/core/tritonserver.h"
 
diff --git a/src/test/caffe2plan.cc b/src/test/caffe2plan.cc
index 7bda39c2eb..301129f10a 100644
--- a/src/test/caffe2plan.cc
+++ b/src/test/caffe2plan.cc
@@ -30,6 +30,7 @@
 #include <errno.h>
 #include <stddef.h>
 #include <unistd.h>
+
 #include <algorithm>
 #include <cstring>
 #include <fstream>
@@ -415,9 +416,9 @@ main(int argc, char** argv)
 
   if (!CaffeToPlan(
           output_filename, prototxt_filename, model_filename, output_names,
-          (use_fp16) ? nvinfer1::DataType::kHALF
-                     : (use_int8) ? nvinfer1::DataType::kINT8
-                                  : nvinfer1::DataType::kFLOAT,
+          (use_fp16)   ? nvinfer1::DataType::kHALF
+          : (use_int8) ? nvinfer1::DataType::kINT8
+                       : nvinfer1::DataType::kFLOAT,
           calibration_filename, max_batch_size, max_workspace_size)) {
     std::cerr << "Failed to create PLAN file" << std::endl;
     return 1;
diff --git a/src/test/data_compressor_test.cc b/src/test/data_compressor_test.cc
index e1b46cb641..292c8c544a 100644
--- a/src/test/data_compressor_test.cc
+++ b/src/test/data_compressor_test.cc
@@ -33,6 +33,7 @@
 #endif
 
 #include <event2/buffer.h>
+
 #include <chrono>
 #include <condition_variable>
 #include <fstream>
@@ -43,6 +44,7 @@
 #include <string>
 #include <thread>
 #include <vector>
+
 #include "data_compressor.h"
 
 namespace ni = triton::server;
@@ -140,8 +142,8 @@ class DataCompressorTest : public ::testing::Test {
       : raw_data_length_(0), deflate_compressed_length_(0),
         gzip_compressed_length_(0)
   {
-    std::vector<std::string> files{"raw_data", "deflate_compressed_data",
-                                   "gzip_compressed_data"};
+    std::vector<std::string> files{
+        "raw_data", "deflate_compressed_data", "gzip_compressed_data"};
     for (const auto& file : files) {
       std::fstream fs(file);
       // get length of file
diff --git a/src/test/distributed_addsub/src/distributed_addsub.cc b/src/test/distributed_addsub/src/distributed_addsub.cc
index 4949ca4444..4444e6a735 100644
--- a/src/test/distributed_addsub/src/distributed_addsub.cc
+++ b/src/test/distributed_addsub/src/distributed_addsub.cc
@@ -27,6 +27,7 @@
 #include <atomic>
 #include <memory>
 #include <thread>
+
 #include "triton/backend/backend_common.h"
 #include "triton/backend/backend_model.h"
 #include "triton/backend/backend_model_instance.h"
@@ -661,10 +662,14 @@ TRITONBACKEND_ModelInstanceExecute(
     uint64_t input_1_byte_size = input_byte_size;
     GUARDED_RESPOND_IF_ERROR(
         responses, r,
-        ReadInputTensor(request, "INPUT0", input_0.data(), reinterpret_cast<size_t*>(&input_0_byte_size)));
+        ReadInputTensor(
+            request, "INPUT0", input_0.data(),
+            reinterpret_cast<size_t*>(&input_0_byte_size)));
     GUARDED_RESPOND_IF_ERROR(
         responses, r,
-        ReadInputTensor(request, "INPUT1", input_1.data(), reinterpret_cast<size_t*>(&input_1_byte_size)));
+        ReadInputTensor(
+            request, "INPUT1", input_1.data(),
+            reinterpret_cast<size_t*>(&input_1_byte_size)));
     if (responses[r] == nullptr) {
       LOG_MESSAGE(
           TRITONSERVER_LOG_ERROR,
diff --git a/src/test/dyna_sequence/src/dyna_sequence.cc b/src/test/dyna_sequence/src/dyna_sequence.cc
index b78df20142..91f83db7c9 100644
--- a/src/test/dyna_sequence/src/dyna_sequence.cc
+++ b/src/test/dyna_sequence/src/dyna_sequence.cc
@@ -27,6 +27,7 @@
 #include <algorithm>
 #include <memory>
 #include <thread>
+
 #include "triton/backend/backend_common.h"
 #include "triton/backend/backend_model.h"
 #include "triton/backend/backend_model_instance.h"
diff --git a/src/test/implicit_state/src/implicit_state.cc b/src/test/implicit_state/src/implicit_state.cc
index 773715afcd..74c3142dea 100644
--- a/src/test/implicit_state/src/implicit_state.cc
+++ b/src/test/implicit_state/src/implicit_state.cc
@@ -26,6 +26,7 @@
 
 #include <algorithm>
 #include <vector>
+
 #include "triton/backend/backend_common.h"
 #include "triton/backend/backend_model.h"
 #include "triton/backend/backend_model_instance.h"
diff --git a/src/test/query_backend/src/query.cc b/src/test/query_backend/src/query.cc
index dcbabe6c0b..8cc2fd4a06 100644
--- a/src/test/query_backend/src/query.cc
+++ b/src/test/query_backend/src/query.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <vector>
+
 #include "triton/backend/backend_common.h"
 #include "triton/backend/backend_model.h"
 #include "triton/backend/backend_model_instance.h"
@@ -104,8 +105,8 @@ TRITONBACKEND_ModelInstanceExecute(
     } else {
       names = {"OUTPUT0", "OUTPUT1"};
     }
-    std::vector<TRITONSERVER_MemoryType> types{TRITONSERVER_MEMORY_CPU_PINNED,
-                                               TRITONSERVER_MEMORY_CPU_PINNED};
+    std::vector<TRITONSERVER_MemoryType> types{
+        TRITONSERVER_MEMORY_CPU_PINNED, TRITONSERVER_MEMORY_CPU_PINNED};
     std::vector<int64_t> type_ids{1, 1};
     for (size_t i = 0; i < names.size(); ++i) {
       auto err = TRITONBACKEND_RequestOutputBufferProperties(
diff --git a/src/test/repoagent/relocation_repoagent/src/relocation.cc b/src/test/repoagent/relocation_repoagent/src/relocation.cc
index 8ad25a4ad5..1db8c35c97 100644
--- a/src/test/repoagent/relocation_repoagent/src/relocation.cc
+++ b/src/test/repoagent/relocation_repoagent/src/relocation.cc
@@ -24,15 +24,15 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include "triton/core/tritonrepoagent.h"
-#include "triton/core/tritonserver.h"
-
 #include <cctype>
 #include <cstring>
 #include <iomanip>
 #include <stdexcept>
 #include <string>
 
+#include "triton/core/tritonrepoagent.h"
+#include "triton/core/tritonserver.h"
+
 //
 // Relocation Repository Agent that is for test only.
 //
diff --git a/src/test/sequence/src/sequence.cc b/src/test/sequence/src/sequence.cc
index c599ca46f3..44896d2974 100644
--- a/src/test/sequence/src/sequence.cc
+++ b/src/test/sequence/src/sequence.cc
@@ -26,6 +26,7 @@
 
 #include <memory>
 #include <thread>
+
 #include "triton/backend/backend_common.h"
 #include "triton/backend/backend_model.h"
 #include "triton/backend/backend_model_instance.h"
diff --git a/src/tracer.cc b/src/tracer.cc
index cd442e83b9..26750fc3b5 100644
--- a/src/tracer.cc
+++ b/src/tracer.cc
@@ -27,7 +27,9 @@
 #include "tracer.h"
 
 #include <stdlib.h>
+
 #include <unordered_map>
+
 #include "common.h"
 #include "triton/common/logging.h"
 #ifdef TRITON_ENABLE_GPU
@@ -342,8 +344,7 @@ TraceManager::Trace::CaptureTimestamp(
       if (trace_span_ == nullptr) {
         InitSpan(otel_timestamp);
       }
-      trace_span_->AddEvent(
-          name, otel_timestamp);
+      trace_span_->AddEvent(name, otel_timestamp);
 #else
       LOG_ERROR << "Unsupported trace mode: "
                 << TraceManager::InferenceTraceModeString(setting_->mode_);
@@ -369,10 +370,10 @@ TraceManager::Trace::InitTracer(
     }
   }
   exporter_ = otlp::OtlpHttpExporterFactory::Create(opts);
-  processor_ = otel_trace_sdk::SimpleSpanProcessorFactory::Create(
-      std::move(exporter_));
-  provider_ = otel_trace_sdk::TracerProviderFactory::Create(
-      std::move(processor_));
+  processor_ =
+      otel_trace_sdk::SimpleSpanProcessorFactory::Create(std::move(exporter_));
+  provider_ =
+      otel_trace_sdk::TracerProviderFactory::Create(std::move(processor_));
 }
 
 void
diff --git a/src/tracer.h b/src/tracer.h
index 88736c536b..c310921f14 100644
--- a/src/tracer.h
+++ b/src/tracer.h
@@ -157,7 +157,7 @@ class TraceManager {
 
     uint64_t trace_id_;
 
-#if !defined (_WIN32) && defined (TRITON_ENABLE_TRACING)
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
     // OpenTelemetry SDK relies on system's clock for event timestamps.
     // Triton Tracing records timestamps using steady_clock. This is a
     // monotonic clock, i.e. time is always moving forward. It is not related
diff --git a/src/triton_signal.h b/src/triton_signal.h
index 870df7ed43..d5aefbf0bf 100644
--- a/src/triton_signal.h
+++ b/src/triton_signal.h
@@ -27,6 +27,7 @@
 
 #include <condition_variable>
 #include <mutex>
+
 #include "triton/core/tritonserver.h"
 
 namespace triton { namespace server {
diff --git a/src/vertex_ai_server.cc b/src/vertex_ai_server.cc
index 95613c36dd..f14143f5b6 100644
--- a/src/vertex_ai_server.cc
+++ b/src/vertex_ai_server.cc
@@ -26,6 +26,7 @@
 #include "vertex_ai_server.h"
 
 #include <memory>
+
 #include "common.h"
 
 namespace triton { namespace server {