diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1dd25c8192..6ca236651e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -108,7 +108,7 @@ Use clang-format to format all source files (\*.h, \*.cc, \*.proto, source files before submitting a pull request: ``` -$ apt-get install clang-format clang-format-6.0 +$ apt-get install clang-format-15 ``` For convenience there is a format.py script in the diff --git a/compose.py b/compose.py index 79e64f0646..0a00883727 100644 --- a/compose.py +++ b/compose.py @@ -130,6 +130,7 @@ def add_requested_repoagents(ddir, dockerfile_name, repoagents): with open(os.path.join(ddir, dockerfile_name), "a") as dfile: dfile.write(df) + def add_requested_caches(ddir, dockerfile_name, caches): df = "# Copying over caches \n" for cache in caches: @@ -143,6 +144,7 @@ def add_requested_caches(ddir, dockerfile_name, caches): with open(os.path.join(ddir, dockerfile_name), "a") as dfile: dfile.write(df) + def end_dockerfile(ddir, dockerfile_name, argmap): # Install additional dependencies df = "" @@ -372,8 +374,7 @@ def create_argmap(images, skip_pull): '--cache', action='append', required=False, - help= - 'Include in the generated Docker image. The flag may ' + help='Include in the generated Docker image. The flag may ' 'be specified multiple times.') parser.add_argument( '--skip-pull', diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/config.py b/deploy/mlflow-triton-plugin/mlflow_triton/config.py index 229a6b35ad..484b026227 100644 --- a/deploy/mlflow-triton-plugin/mlflow_triton/config.py +++ b/deploy/mlflow-triton-plugin/mlflow_triton/config.py @@ -48,13 +48,12 @@ def __init__(self): protocol = "http://" endpoint_url = None if uri.host_name != "" and uri.host_port != "": - endpoint_url = '{}{}:{}'.format( - protocol, uri.host_name, uri.host_port) + endpoint_url = '{}{}:{}'.format(protocol, uri.host_name, + uri.host_port) import boto3 # boto3 handles AWS credentials - self['s3'] = boto3.client( - 's3', endpoint_url=endpoint_url) + self['s3'] = boto3.client('s3', endpoint_url=endpoint_url) self['s3_bucket'] = uri.bucket self['s3_prefix'] = uri.prefix self['triton_model_repo'] = 's3://{}'.format( diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py index 5009e4a545..0a22ba6c88 100644 --- a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py +++ b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py @@ -188,11 +188,12 @@ def list_deployments(self): d['name'], _MLFLOW_META_FILENAME) if 's3' in self.server_config: - meta_dict = ast.literal_eval(self.server_config['s3'].get_object( - Bucket=self.server_config['s3_bucket'], - Key=os.path.join( - self.server_config['s3_prefix'], d['name'], _MLFLOW_META_FILENAME), - )['Body'].read().decode('utf-8')) + meta_dict = ast.literal_eval( + self.server_config['s3'].get_object( + Bucket=self.server_config['s3_bucket'], + Key=os.path.join(self.server_config['s3_prefix'], + d['name'], _MLFLOW_META_FILENAME), + )['Body'].read().decode('utf-8')) elif os.path.isfile(mlflow_meta_path): meta_dict = self._get_mlflow_meta_dict(d['name']) else: @@ -280,12 +281,13 @@ def _generate_mlflow_meta_file(self, name, flavor, model_uri): self.server_config['s3'].put_object( Body=json.dumps(meta_dict, indent=4).encode('utf-8'), Bucket=self.server_config["s3_bucket"], - Key=os.path.join( - self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME), + Key=os.path.join(self.server_config['s3_prefix'], name, + _MLFLOW_META_FILENAME), ) else: - with open(os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME), - "w") as outfile: + with open( + os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME), + "w") as outfile: json.dump(meta_dict, outfile, indent=4) print("Saved", _MLFLOW_META_FILENAME, "to", triton_deployment_dir) @@ -295,11 +297,12 @@ def _get_mlflow_meta_dict(self, name): _MLFLOW_META_FILENAME) if 's3' in self.server_config: - mlflow_meta_dict = ast.literal_eval(self.server_config['s3'].get_object( - Bucket=self.server_config['s3_bucket'], - Key=os.path.join( - self.server_config['s3_prefix'], name, _MLFLOW_META_FILENAME), - )['Body'].read().decode('utf-8')) + mlflow_meta_dict = ast.literal_eval( + self.server_config['s3'].get_object( + Bucket=self.server_config['s3_bucket'], + Key=os.path.join(self.server_config['s3_prefix'], name, + _MLFLOW_META_FILENAME), + )['Body'].read().decode('utf-8')) else: with open(mlflow_meta_path, 'r') as metafile: mlflow_meta_dict = json.load(metafile) @@ -392,7 +395,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor): s3_path = os.path.join( self.server_config['s3_prefix'], copy_paths[key]['to'].replace( - self.server_config['triton_model_repo'], '').strip('/'), + self.server_config['triton_model_repo'], + '').strip('/'), filename, ) @@ -413,8 +417,8 @@ def _copy_files_to_triton_repo(self, artifact_path, name, flavor): if os.path.isdir(copy_paths[key]['from']): if os.path.isdir(copy_paths[key]['to']): shutil.rmtree(copy_paths[key]['to']) - shutil.copytree( - copy_paths[key]['from'], copy_paths[key]['to']) + shutil.copytree(copy_paths[key]['from'], + copy_paths[key]['to']) else: if not os.path.isdir(copy_paths[key]['to']): os.makedirs(copy_paths[key]['to']) diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h index 4a0a27ac08..b55c8b71c5 100644 --- a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h +++ b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h @@ -27,6 +27,7 @@ #include #include + #include "triton/core/tritonserver.h" #define RETURN_IF_ERR(X) \ diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc index 0affacb3f1..ce22bdcba9 100644 --- a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc +++ b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc @@ -27,24 +27,23 @@ #include #include #include + #include #include #include #include +#include #include #include #include #include -#include "triton/core/tritonserver.h" - #include "common.h" - -#include #include "opencv2/core.hpp" #include "opencv2/highgui.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/opencv.hpp" +#include "triton/core/tritonserver.h" #ifdef TRITON_ENABLE_GPU #include diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py index 8850f0e031..84e43eccf9 100644 --- a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py +++ b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py @@ -108,7 +108,7 @@ def execute(self, requests): with self.inflight_thread_count_lck: self.inflight_thread_count += 1 thread1.start() - + logger = pb_utils.Logger logger.log("Execute-Specific Msg!", logger.INFO) logger.log_info("Execute-Info Msg!") diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py index d278e07a0e..81bb397115 100644 --- a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py +++ b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py @@ -57,15 +57,14 @@ def execute(self, requests): for request in requests: thread = threading.Thread(target=self.response_thread, - args=(request.get_response_sender(), - pb_utils.get_input_tensor_by_name( - request, 'IN').as_numpy())) + args=(request.get_response_sender(), + pb_utils.get_input_tensor_by_name( + request, 'IN').as_numpy())) thread.daemon = True with self.inflight_thread_count_lck: self.inflight_thread_count += 1 thread.start() - return None def response_thread(self, response_sender, in_value): @@ -95,22 +94,22 @@ def response_thread(self, response_sender, in_value): response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) else: - output_tensors = [pb_utils.Tensor('OUT', output0.as_numpy())] + output_tensors = [ + pb_utils.Tensor('OUT', output0.as_numpy()) + ] response = pb_utils.InferenceResponse( output_tensors=output_tensors) response_sender.send(response) response_count += 1 - if in_value != response_count-1: - error_message = ( - "Expected {} responses, got {}".format( - in_value, len(infer_responses)-1)) - response = pb_utils.InferenceResponse( - error=error_message) + if in_value != response_count - 1: + error_message = ("Expected {} responses, got {}".format( + in_value, + len(infer_responses) - 1)) + response = pb_utils.InferenceResponse(error=error_message) response_sender.send( - response, - flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) + response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) else: response_sender.send( flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) diff --git a/qa/L0_backend_python/logging/logging_test.py b/qa/L0_backend_python/logging/logging_test.py index b72d9b2b7a..1070d240a7 100644 --- a/qa/L0_backend_python/logging/logging_test.py +++ b/qa/L0_backend_python/logging/logging_test.py @@ -31,10 +31,10 @@ import numpy as np import test_util as tu - from tritonclient.utils import * import tritonclient.http as httpclient + class LogTest(tu.TestResultCollector): def test_log_output(self): @@ -43,7 +43,7 @@ def test_log_output(self): input_data = np.array([[1.0]], dtype=np.float32) inputs = [ httpclient.InferInput("INPUT0", input_data.shape, - np_to_triton_dtype(input_data.dtype)) + np_to_triton_dtype(input_data.dtype)) ] inputs[0].set_data_from_numpy(input_data) result = client.infer(model_name, inputs) @@ -51,5 +51,6 @@ def test_log_output(self): self.assertIsNotNone(output0) self.assertTrue(np.all(output0 == input_data)) + if __name__ == '__main__': unittest.main() diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py index 49413bce55..ccd4d985b3 100644 --- a/qa/L0_backend_python/python_test.py +++ b/qa/L0_backend_python/python_test.py @@ -182,9 +182,8 @@ def test_async_infer(self): # Make sure the requests ran in parallel. stats = client.get_inference_statistics(model_name) - test_cond = (len(stats['model_stats']) - != 1) or (stats['model_stats'][0]['name'] - != model_name) + test_cond = (len(stats['model_stats']) != 1) or ( + stats['model_stats'][0]['name'] != model_name) self.assertFalse( test_cond, "error: expected statistics for {}".format(model_name)) diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/python_unittest.py index af583fc560..9ff1b30e02 100644 --- a/qa/L0_backend_python/python_unittest.py +++ b/qa/L0_backend_python/python_unittest.py @@ -53,7 +53,7 @@ def _run_unittest(self, model_name): def test_python_unittest(self): model_name = os.environ['MODEL_NAME'] - bls_kind = os.environ.get('BLS_KIND', 'non_decoupled') + bls_kind = os.environ.get('BLS_KIND', 'non_decoupled') if bls_kind == "decoupled": # Skip the shared memory probe for decoupled models for now as diff --git a/qa/L0_cuda_graph/trt_cuda_graph_test.py b/qa/L0_cuda_graph/trt_cuda_graph_test.py index 07b744c0b1..6cb68255ae 100644 --- a/qa/L0_cuda_graph/trt_cuda_graph_test.py +++ b/qa/L0_cuda_graph/trt_cuda_graph_test.py @@ -36,7 +36,7 @@ class TrtCudaGraphTest(tu.TestResultCollector): - MODELNAME= "plan" + MODELNAME = "plan" def setUp(self): self.dtype_ = np.float32 @@ -50,7 +50,8 @@ def _check_infer(self, tensor_shape, batch_size=1): else: full_shape = tensor_shape iu.infer_exact(self, - self.model_name_, full_shape, + self.model_name_, + full_shape, batch_size, self.dtype_, self.dtype_, @@ -140,6 +141,7 @@ def test_range_dynamic_shape(self): def test_nobatch_fixed_shape(self): self._check_infer((16,), 0) + if __name__ == '__main__': if len(sys.argv) > 2: TrtCudaGraphTest.MODELNAME = sys.argv.pop() diff --git a/qa/L0_decoupled/decoupled_test.py b/qa/L0_decoupled/decoupled_test.py index ade5f9f2ec..0ce47e5b80 100644 --- a/qa/L0_decoupled/decoupled_test.py +++ b/qa/L0_decoupled/decoupled_test.py @@ -39,10 +39,13 @@ import tritonclient.http as httpclient from tritonclient.utils import InferenceServerException + class UserData: + def __init__(self): self._response_queue = queue.Queue() + def callback(user_data, result, error): if error: user_data._response_queue.put(error) @@ -73,17 +76,17 @@ def setUp(self): # Client can receive a "triton_final_response" response parameter # from Triton server that indicates when a response is the final response for - # its request. + # its request. # # For non-decoupled models, there is a 1:1 request:response ratio, so every - # response is the final response, and this parameter is unnecessary. - # + # response is the final response, and this parameter is unnecessary. + # # For decoupled models, there is a 1:N request:response ratio, so there may be - # more one response before receiving the "final" response. + # more one response before receiving the "final" response. # # However, decoupled models have the unique property in that they can return # a flags-only response to the server to indicate completion, which is not - # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags). + # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags). # # To forward this flags-only response to the client, users must opt-in to this # behavior by adding the following argument: @@ -92,11 +95,12 @@ def setUp(self): # If the decoupled backend/model always sends the final response flag along # with a non-null response, no opt-in is needed. # - # With this behavior, the client can programatically detect when all responses + # With this behavior, the client can programatically detect when all responses # for an individual request have been received without knowing the expected # number of responses in advance and without closing the stream. def _stream_infer_with_params(self, request_count, request_delay, _, - delay_data, delay_factor, user_data, result_dict): + delay_data, delay_factor, user_data, + result_dict): with grpcclient.InferenceServerClient(url="localhost:8001", verbose=True) as triton_client: # Establish stream @@ -112,8 +116,7 @@ def _stream_infer_with_params(self, request_count, request_delay, _, outputs=self.requested_outputs_, # Opt-in to receiving flags-only responses from model/backend # to help detect final responses for decoupled models. - enable_empty_final_response=True - ) + enable_empty_final_response=True) # Update delay input in accordance with the scaling factor delay_data = delay_data * delay_factor delay_data = delay_data.astype(np.uint32) @@ -130,18 +133,20 @@ def _stream_infer_with_params(self, request_count, request_delay, _, # Request IDs should generally be provided with each request # to associate decoupled responses with their requests. if not response.id: - raise ValueError("No response id found. Was a request_id provided?") + raise ValueError( + "No response id found. Was a request_id provided?") # Detect final response. Parameters are oneof and we expect bool_param - if response.parameters.get("triton_final_response").bool_param: - completed_requests += 1 + if response.parameters.get( + "triton_final_response").bool_param: + completed_requests += 1 - # Only process non-empty response, ignore if empty (no outputs) + # Only process non-empty response, ignore if empty (no outputs) if response.outputs: - if response.id not in result_dict: - result_dict[response.id] = [] - result_dict[response.id].append((recv_count, data_item)) - recv_count += 1 + if response.id not in result_dict: + result_dict[response.id] = [] + result_dict[response.id].append((recv_count, data_item)) + recv_count += 1 def _stream_infer(self, request_count, request_delay, expected_count, delay_data, delay_factor, user_data, result_dict): @@ -171,7 +176,7 @@ def _stream_infer(self, request_count, request_delay, expected_count, else: this_id = data_item.get_response().id if this_id not in result_dict: - result_dict[this_id] = [] + result_dict[this_id] = [] result_dict[this_id].append((recv_count, data_item)) recv_count += 1 @@ -232,8 +237,9 @@ def _decoupled_infer(self, self.requested_outputs_ = self.outputs_ if validate_fn is None else self.outputs_[ 0:1] - - for infer_helper in [self._stream_infer, self._stream_infer_with_params]: + for infer_helper in [ + self._stream_infer, self._stream_infer_with_params + ]: user_data = UserData() result_dict = {} @@ -254,8 +260,8 @@ def _decoupled_infer(self, this_id = str(i) if repeat_count != 0 and this_id not in result_dict.keys(): self.assertTrue( - False, - "response for request id {} not received".format(this_id)) + False, "response for request id {} not received".format( + this_id)) elif repeat_count == 0 and this_id in result_dict.keys(): self.assertTrue( False, @@ -263,7 +269,8 @@ def _decoupled_infer(self, this_id)) if repeat_count != 0: if validate_fn is None: - self.assertEqual(len(result_dict[this_id]), repeat_count) + self.assertEqual(len(result_dict[this_id]), + repeat_count) expected_data = data_offset result_list = result_dict[this_id] for j in range(len(result_list)): @@ -278,7 +285,8 @@ def _decoupled_infer(self, self.assertEqual(this_idx[0], j) expected_data += 1 else: - validate_fn(result_dict[this_id], data_offset, repeat_count) + validate_fn(result_dict[this_id], data_offset, + repeat_count) def test_one_to_none(self): # Test cases where each request generates no response. diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py index fa71374c47..64993e86b1 100644 --- a/qa/L0_implicit_state/implicit_state.py +++ b/qa/L0_implicit_state/implicit_state.py @@ -61,9 +61,7 @@ def test_no_implicit_state(self): self.assertIn("unable to add state 'undefined_state'", err_str) self.assertIn( "state configuration is missing for model 'no_implicit_state'", - err_str - ) - + err_str) def test_wrong_implicit_state_name(self): triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") @@ -81,10 +79,8 @@ def test_wrong_implicit_state_name(self): sequence_start=True) err_str = str(e.exception).lower() - self.assertIn( - "state 'undefined_state' is not a valid state name", - err_str - ) + self.assertIn("state 'undefined_state' is not a valid state name", + err_str) def test_no_update(self): # Test implicit state without updating any state @@ -134,8 +130,7 @@ def test_request_output_not_allowed(self): sequence_end=True) self.assertIn( "unexpected inference output 'OUTPUT_STATE' for model", - str(e.exception) - ) + str(e.exception)) def test_request_output(self): triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") diff --git a/qa/L0_infer/infer_test.py b/qa/L0_infer/infer_test.py index 7e76fa43ef..1e0e172a13 100644 --- a/qa/L0_infer/infer_test.py +++ b/qa/L0_infer/infer_test.py @@ -59,15 +59,16 @@ class InferTest(tu.TestResultCollector): - def _full_exact(self, - input_dtype, - output0_dtype, - output1_dtype, - output0_raw, - output1_raw, - swap, - # 60 sec is the default value - network_timeout=60.0): + def _full_exact( + self, + input_dtype, + output0_dtype, + output1_dtype, + output0_raw, + output1_raw, + swap, + # 60 sec is the default value + network_timeout=60.0): def _infer_exact_helper(tester, pf, diff --git a/qa/L0_logging/logging_endpoint_test.py b/qa/L0_logging/logging_endpoint_test.py index 73e9476138..2058d941c2 100644 --- a/qa/L0_logging/logging_endpoint_test.py +++ b/qa/L0_logging/logging_endpoint_test.py @@ -74,9 +74,8 @@ def check_server_initial_state(self): "log_format": "default" } triton_client = httpclient.InferenceServerClient("localhost:8000") - self.assertEqual(initial_settings, - triton_client.get_log_settings()) - + self.assertEqual(initial_settings, triton_client.get_log_settings()) + def test_http_get_settings(self): # Log settings will be the same as default settings since # no update has been made. @@ -89,8 +88,7 @@ def test_http_get_settings(self): "log_format": "default" } triton_client = httpclient.InferenceServerClient("localhost:8000") - self.assertEqual(initial_settings, - triton_client.get_log_settings(), + self.assertEqual(initial_settings, triton_client.get_log_settings(), "Unexpected initial log settings") def test_grpc_get_settings(self): @@ -121,10 +119,9 @@ def test_grpc_get_settings(self): } }), initial_settings) triton_client = grpcclient.InferenceServerClient("localhost:8001") - self.assertEqual(initial_settings, - triton_client.get_log_settings(), + self.assertEqual(initial_settings, triton_client.get_log_settings(), "Unexpected initial log settings") - + def test_http_update_settings(self): # Update each possible log configuration # field and check that they are reflected @@ -205,8 +202,7 @@ def test_http_update_settings(self): expected_log_settings_6, triton_client.update_log_settings(settings=expected_log_settings_6), "Unexpected updated log settings") - - + def test_grpc_update_settings(self): # Update each possible log configuration # field and check that they are reflected @@ -251,7 +247,7 @@ def test_grpc_update_settings(self): expected_log_settings_1, triton_client.update_log_settings(settings=log_settings_1), "Unexpected updated log settings") - + log_settings_2 = { "log_file": "log_file.log", "log_info": False, @@ -436,11 +432,12 @@ def test_grpc_update_settings(self): }, } }), expected_log_settings_6) - + self.assertEqual( expected_log_settings_6, triton_client.update_log_settings(settings=log_settings_6), "Unexpected updated log settings") - + + if __name__ == '__main__': unittest.main() diff --git a/qa/L0_metrics/metrics_test.py b/qa/L0_metrics/metrics_test.py index 6beb506601..36d732cdfa 100755 --- a/qa/L0_metrics/metrics_test.py +++ b/qa/L0_metrics/metrics_test.py @@ -34,31 +34,25 @@ import test_util as tu INF_COUNTER_PATTERNS = [ - 'nv_inference_request_duration', - 'nv_inference_queue_duration', - 'nv_inference_compute_input_duration', - 'nv_inference_compute_infer_duration', - 'nv_inference_compute_output_duration' + 'nv_inference_request_duration', 'nv_inference_queue_duration', + 'nv_inference_compute_input_duration', + 'nv_inference_compute_infer_duration', + 'nv_inference_compute_output_duration' ] INF_SUMMARY_PATTERNS = [ - 'nv_inference_request_summary', - 'nv_inference_queue_summary', - 'nv_inference_compute_input_summary', - 'nv_inference_compute_infer_summary', - 'nv_inference_compute_output_summary' + 'nv_inference_request_summary', 'nv_inference_queue_summary', + 'nv_inference_compute_input_summary', 'nv_inference_compute_infer_summary', + 'nv_inference_compute_output_summary' ] CACHE_COUNTER_PATTERNS = [ - 'nv_cache_num_hits_per_model', - 'nv_cache_num_misses_per_model', - 'nv_cache_hit_duration_per_model', - 'nv_cache_miss_duration_per_model' -] -CACHE_SUMMARY_PATTERNS = [ - 'nv_cache_hit_summary', - 'nv_cache_miss_summary' + 'nv_cache_num_hits_per_model', 'nv_cache_num_misses_per_model', + 'nv_cache_hit_duration_per_model', 'nv_cache_miss_duration_per_model' ] +CACHE_SUMMARY_PATTERNS = ['nv_cache_hit_summary', 'nv_cache_miss_summary'] + class MetricsTest(tu.TestResultCollector): + def _get_metrics(self): metrics_url = "http://localhost:8002/metrics" r = requests.get(metrics_url) @@ -129,5 +123,6 @@ def test_inf_summaries_exist_with_cache(self): for metric in bad_patterns: self.assertNotIn(metric, metrics) + if __name__ == '__main__': unittest.main() diff --git a/qa/L0_model_namespacing/python_addsub/__init__.py b/qa/L0_model_namespacing/python_addsub/__init__.py index 2e122f1cd3..e14880ceba 100644 --- a/qa/L0_model_namespacing/python_addsub/__init__.py +++ b/qa/L0_model_namespacing/python_addsub/__init__.py @@ -78,13 +78,12 @@ def execute(self, requests): """ This function is called on inference request. """ - responses = [] for request in requests: in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") - responses.append( - pb_utils.InferenceResponse(self.addsub(in_0, in_1))) + responses.append(pb_utils.InferenceResponse(self.addsub(in_0, + in_1))) return responses def addsub(self, in_0, in_1): @@ -97,7 +96,7 @@ def addsub(self, in_0, in_1): in_0.as_numpy() - in_1.as_numpy()) out_tensor_0 = pb_utils.Tensor("OUTPUT0", - out_0.astype(self.output0_dtype)) + out_0.astype(self.output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", - out_1.astype(self.output1_dtype)) + out_1.astype(self.output1_dtype)) return [out_tensor_0, out_tensor_1] diff --git a/qa/L0_model_namespacing/python_subadd/__init__.py b/qa/L0_model_namespacing/python_subadd/__init__.py index a53bab0da7..6d38542bf0 100644 --- a/qa/L0_model_namespacing/python_subadd/__init__.py +++ b/qa/L0_model_namespacing/python_subadd/__init__.py @@ -77,13 +77,13 @@ def initialize(self, args): def execute(self, requests): """ This function is called on inference request. """ - + responses = [] for request in requests: in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") - responses.append( - pb_utils.InferenceResponse(self.subadd(in_0, in_1))) + responses.append(pb_utils.InferenceResponse(self.subadd(in_0, + in_1))) return responses def subadd(self, in_0, in_1): @@ -96,7 +96,7 @@ def subadd(self, in_0, in_1): in_0.as_numpy() + in_1.as_numpy()) out_tensor_0 = pb_utils.Tensor("OUTPUT0", - out_0.astype(self.output0_dtype)) + out_0.astype(self.output0_dtype)) out_tensor_1 = pb_utils.Tensor("OUTPUT1", - out_1.astype(self.output1_dtype)) + out_1.astype(self.output1_dtype)) return [out_tensor_0, out_tensor_1] diff --git a/qa/L0_model_namespacing/test.py b/qa/L0_model_namespacing/test.py index ff38918854..9de6ac749c 100644 --- a/qa/L0_model_namespacing/test.py +++ b/qa/L0_model_namespacing/test.py @@ -43,6 +43,7 @@ # Test utilities # + # Checker to perform inference on given model, expecting model to have # [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where: # OUTPUT0 = INPUT0 + INPUT1 @@ -56,9 +57,11 @@ def __init__(self, checker_client=None): if checker_client is None: import tritonclient.http as checker_client if "http" in checker_client.__name__: - self.client_ = checker_client.InferenceServerClient("localhost:8000") + self.client_ = checker_client.InferenceServerClient( + "localhost:8000") else: - self.client_ = checker_client.InferenceServerClient("localhost:8001") + self.client_ = checker_client.InferenceServerClient( + "localhost:8001") # Create infer input tensors self.inputs_ = [] @@ -70,37 +73,45 @@ def __init__(self, checker_client=None): self.inputs_[0].set_data_from_numpy(input_data) self.inputs_[1].set_data_from_numpy(input_data) self.expected_outputs_ = { - "add" : (input_data + input_data), - "sub" : (input_data - input_data) + "add": (input_data + input_data), + "sub": (input_data - input_data) } - + def infer(self, model): res = self.client_.infer(model, self.inputs_) - np.testing.assert_allclose(res.as_numpy('OUTPUT0'), self.expected_outputs_["add"]) - np.testing.assert_allclose(res.as_numpy('OUTPUT1'), self.expected_outputs_["sub"]) + np.testing.assert_allclose(res.as_numpy('OUTPUT0'), + self.expected_outputs_["add"]) + np.testing.assert_allclose(res.as_numpy('OUTPUT1'), + self.expected_outputs_["sub"]) + # Checker to perform inference on given model, expecting model to have # [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where: # OUTPUT0 = INPUT0 - INPUT1 # OUTPUT1 = INPUT0 + INPUT1 class SubAddChecker(AddSubChecker): + def infer(self, model): res = self.client_.infer(model, self.inputs_) - np.testing.assert_allclose(res.as_numpy('OUTPUT0'), self.expected_outputs_["sub"]) - np.testing.assert_allclose(res.as_numpy('OUTPUT1'), self.expected_outputs_["add"]) + np.testing.assert_allclose(res.as_numpy('OUTPUT0'), + self.expected_outputs_["sub"]) + np.testing.assert_allclose(res.as_numpy('OUTPUT1'), + self.expected_outputs_["add"]) + # # Test suites and cases # + class ModelNamespacePoll(tu.TestResultCollector): + def setUp(self): self.addsub_ = AddSubChecker() self.subadd_ = SubAddChecker() # For other server interaction self.client_ = httpclient.InferenceServerClient("localhost:8000") - def check_health(self, expect_live=True, expect_ready=True): self.assertEqual(self.client_.is_server_live(), expect_live) self.assertEqual(self.client_.is_server_ready(), expect_ready) @@ -126,16 +137,20 @@ def test_duplication(self): self.check_health() # infer check - for model in ["simple_addsub",]: + for model in [ + "simple_addsub", + ]: self.addsub_.infer(model) - for model in ["simple_subadd",]: + for model in [ + "simple_subadd", + ]: self.subadd_.infer(model) - + # error check try: self.addsub_.infer("composing_model") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) @@ -149,16 +164,20 @@ def test_ensemble_duplication(self): self.check_health() # infer - for model in ["composing_addsub",]: + for model in [ + "composing_addsub", + ]: self.addsub_.infer(model) - for model in ["composing_subadd",]: + for model in [ + "composing_subadd", + ]: self.subadd_.infer(model) # error check try: self.addsub_.infer("simple_ensemble") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) @@ -172,7 +191,8 @@ def test_dynamic_resolution(self): # same as before the removal. self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ) td = os.environ["NAMESPACE_TESTING_DIRCTORY"] - composing_before_path = os.path.join(td, "addsub_repo", "composing_model") + composing_before_path = os.path.join(td, "addsub_repo", + "composing_model") composing_after_path = os.path.join(td, "composing_model") self.check_health() @@ -183,27 +203,32 @@ def test_dynamic_resolution(self): # infer for model in ["simple_subadd", "simple_addsub", "composing_model"]: self.subadd_.infer(model) - + # step 2. shutil.move(composing_after_path, composing_before_path) time.sleep(5) # infer - for model in ["simple_addsub",]: + for model in [ + "simple_addsub", + ]: self.addsub_.infer(model) - for model in ["simple_subadd",]: + for model in [ + "simple_subadd", + ]: self.subadd_.infer(model) # error check try: self.addsub_.infer("composing_model") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) class ModelNamespaceExplicit(tu.TestResultCollector): + def setUp(self): self.addsub_ = AddSubChecker() self.subadd_ = SubAddChecker() @@ -241,16 +266,20 @@ def test_duplication(self): self.client_.load_model(model) # infer - for model in ["simple_addsub",]: + for model in [ + "simple_addsub", + ]: self.addsub_.infer(model) - for model in ["simple_subadd",]: + for model in [ + "simple_subadd", + ]: self.subadd_.infer(model) # error check try: self.addsub_.infer("composing_model") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) @@ -265,18 +294,22 @@ def test_ensemble_duplication(self): # load ensembles, cascadingly load composing model for model in ["simple_ensemble"]: self.client_.load_model(model) - + # infer - for model in ["composing_addsub",]: + for model in [ + "composing_addsub", + ]: self.addsub_.infer(model) - for model in ["composing_subadd",]: + for model in [ + "composing_subadd", + ]: self.subadd_.infer(model) # error check try: self.addsub_.infer("simple_ensemble") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) @@ -290,7 +323,8 @@ def test_dynamic_resolution(self): # same as before the removal. self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ) td = os.environ["NAMESPACE_TESTING_DIRCTORY"] - composing_before_path = os.path.join(td, "addsub_repo", "composing_model") + composing_before_path = os.path.join(td, "addsub_repo", + "composing_model") composing_after_path = os.path.join(td, "composing_model") self.check_health() @@ -303,25 +337,31 @@ def test_dynamic_resolution(self): # infer for model in ["simple_subadd", "simple_addsub", "composing_model"]: self.subadd_.infer(model) - + # step 2. shutil.move(composing_after_path, composing_before_path) # Explicitly load one of the ensembel, should still trigger cascading # (re-)load - for model in ["simple_addsub", ]: + for model in [ + "simple_addsub", + ]: self.client_.load_model(model) # infer - for model in ["simple_addsub",]: + for model in [ + "simple_addsub", + ]: self.addsub_.infer(model) - for model in ["simple_subadd",]: + for model in [ + "simple_subadd", + ]: self.subadd_.infer(model) # error check try: self.addsub_.infer("composing_model") - self.assertTrue(False, - "expected error for inferring ambiguous named model") + self.assertTrue( + False, "expected error for inferring ambiguous named model") except InferenceServerException as ex: self.assertIn("ambiguity", ex.message()) diff --git a/qa/L0_model_queue/model_queue_test.py b/qa/L0_model_queue/model_queue_test.py index 5fef013321..e0875205ff 100644 --- a/qa/L0_model_queue/model_queue_test.py +++ b/qa/L0_model_queue/model_queue_test.py @@ -412,7 +412,8 @@ def test_max_priority_levels(self): kwargs=trial)) threads.append( threading.Thread(target=self.check_response, - args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0, (15000, 10000)), + args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0, + (15000, 10000)), kwargs=trial)) threads.append( threading.Thread(target=self.check_response, diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index b3fa2a62be..39f5bfc8d4 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -63,14 +63,18 @@ def __infer(self, batching=False): def __concurrent_infer(self, concurrency=4, batching=False): pool = concurrent.futures.ThreadPoolExecutor() stop = [False] + def repeat_infer(): while not stop[0]: self.__infer(batching) + infer_threads = [pool.submit(repeat_infer) for i in range(concurrency)] + def stop_infer(): stop[0] = True [t.result() for t in infer_threads] pool.shutdown() + return stop_infer def __check_count(self, kind, expected_count, poll=False): @@ -382,10 +386,12 @@ def test_instance_resource_increase(self): # possibly not updated to the larger resource requirement. infer_count = 8 infer_complete = [False for i in range(infer_count)] + def infer(): for i in range(infer_count): self.__infer() infer_complete[i] = True + with concurrent.futures.ThreadPoolExecutor() as pool: infer_thread = pool.submit(infer) time.sleep(infer_count / 2) # each infer should take < 0.5 seconds diff --git a/qa/L0_sagemaker/sagemaker_multi_model_test.py b/qa/L0_sagemaker/sagemaker_multi_model_test.py index 42847f1014..06cd48edd7 100644 --- a/qa/L0_sagemaker/sagemaker_multi_model_test.py +++ b/qa/L0_sagemaker/sagemaker_multi_model_test.py @@ -282,7 +282,10 @@ def test_sm_5_model_unload(self): def test_sm_6_ensemble_model(self): # Load ensemble model request_body = {"model_name": self.model3_name, "url": self.model3_url} - headers = {"Content-Type": "application/json", "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}"} + headers = { + "Content-Type": "application/json", + "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}" + } r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers) @@ -326,7 +329,6 @@ def test_sm_6_ensemble_model(self): self.assertEqual( r.status_code, 200, "Expected status code 200, received {}".format(r.status_code)) - if __name__ == "__main__": diff --git a/qa/L0_sdk/grpc_test.cc b/qa/L0_sdk/grpc_test.cc index 09fe5bbc51..3f45e4ae25 100644 --- a/qa/L0_sdk/grpc_test.cc +++ b/qa/L0_sdk/grpc_test.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + #include "grpc_client.h" namespace tc = triton::client; diff --git a/qa/L0_sdk/http_test.cc b/qa/L0_sdk/http_test.cc index 2c8e231fb2..0b2a4da597 100644 --- a/qa/L0_sdk/http_test.cc +++ b/qa/L0_sdk/http_test.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + #include "http_client.h" namespace tc = triton::client; diff --git a/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py index 8c4b590ddc..3f2eeeaa40 100644 --- a/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py +++ b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py @@ -33,10 +33,12 @@ import test_util as tu import tritonclient.http as client + class TrtDataDependentShapeTest(tu.TestResultCollector): + def setUp(self): - self.triton_client = client.InferenceServerClient( - "localhost:8000", verbose=True) + self.triton_client = client.InferenceServerClient("localhost:8000", + verbose=True) def test_fixed(self): model_name = "plan_nobatch_nonzero_fixed" @@ -47,19 +49,17 @@ def test_fixed(self): inputs.append(client.InferInput('INPUT', [4, 4], "INT32")) inputs[-1].set_data_from_numpy(input_np) - results = self.triton_client.infer(model_name=model_name, - inputs=inputs) + results = self.triton_client.infer(model_name=model_name, inputs=inputs) # Validate the results by comparing with precomputed values. output_np = results.as_numpy('OUTPUT') self.assertTrue( np.array_equal(output_np, expected_output_np), - "OUTPUT expected: {}, got {}".format(expected_output_np, - output_np)) + "OUTPUT expected: {}, got {}".format(expected_output_np, output_np)) def test_dynamic(self): model_name = "plan_nobatch_nonzero_dynamic" input_data = [] - for i in range(20*16): + for i in range(20 * 16): input_data.append(i if (i % 2) == 0 else 0) input_np = np.array(input_data, dtype=np.int32).reshape((20, 16)) expected_output_np = np.nonzero(input_np) @@ -68,14 +68,12 @@ def test_dynamic(self): inputs.append(client.InferInput('INPUT', [20, 16], "INT32")) inputs[-1].set_data_from_numpy(input_np) - results = self.triton_client.infer(model_name=model_name, - inputs=inputs) + results = self.triton_client.infer(model_name=model_name, inputs=inputs) # Validate the results by comparing with precomputed values. output_np = results.as_numpy('OUTPUT') self.assertTrue( np.array_equal(output_np, expected_output_np), - "OUTPUT expected: {}, got {}".format(expected_output_np, - output_np)) + "OUTPUT expected: {}, got {}".format(expected_output_np, output_np)) if __name__ == '__main__': diff --git a/qa/L0_trt_error_propagation/trt_error_propagation_test.py b/qa/L0_trt_error_propagation/trt_error_propagation_test.py index 300f8b9705..69c7ecaa28 100644 --- a/qa/L0_trt_error_propagation/trt_error_propagation_test.py +++ b/qa/L0_trt_error_propagation/trt_error_propagation_test.py @@ -47,9 +47,8 @@ def test_invalid_trt_model(self): "Internal Error " ] for expected_msg_part in expected_msg_parts: - self.assertIn( - expected_msg_part, err_msg, - "Cannot find an expected part of error message") + self.assertIn(expected_msg_part, err_msg, + "Cannot find an expected part of error message") _, err_msg = err_msg.split(expected_msg_part) def test_invalid_trt_model_autocomplete(self): diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py index b1d2d9105c..f5d84995e0 100755 --- a/qa/common/check_copyright.py +++ b/qa/common/check_copyright.py @@ -32,8 +32,8 @@ import pathlib FLAGS = None -SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png', 'log', 'preprocessed', - 'jmx', 'gz', 'json', 'pdf', 'so', 'onnx', 'svg') +SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png', 'log', 'preprocessed', 'jmx', 'gz', + 'json', 'pdf', 'so', 'onnx', 'svg') REPO_PATH_FROM_THIS_FILE = '../..' SKIP_PATHS = ( 'build', 'deploy/gke-marketplace-app/.gitignore', diff --git a/qa/common/gen_qa_reshape_models.py b/qa/common/gen_qa_reshape_models.py index b881410961..b6ff48003e 100644 --- a/qa/common/gen_qa_reshape_models.py +++ b/qa/common/gen_qa_reshape_models.py @@ -182,14 +182,11 @@ def create_tf_modelfile(create_savedmodel, models_dir, model_version, max_batch, tf.identity(tin, name=output_name) else: if max_batch == 0: - tf.reshape(tin, - output_shapes[io_num], - name=output_name) + tf.reshape(tin, output_shapes[io_num], name=output_name) else: tf.reshape(tin, [ -1, - ] + output_shapes[io_num], - name=output_name) + ] + output_shapes[io_num], name=output_name) # Use model name based on input/output count and non-batching variant if create_savedmodel: diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py index 48336237f8..adf02684cc 100644 --- a/qa/common/gen_qa_trt_data_dependent_shape.py +++ b/qa/common/gen_qa_trt_data_dependent_shape.py @@ -30,6 +30,7 @@ import tensorrt as trt import test_util as tu + def np_to_model_dtype(np_dtype): if np_dtype == bool: return "TYPE_BOOL" @@ -69,6 +70,7 @@ def np_to_trt_dtype(np_dtype): return trt.float32 return None + # The 'nonzero' model that we use for data dependent shape is naturally # not support batching, because the layer output is not trivially separable # based on the request batch size. @@ -117,7 +119,7 @@ def create_data_dependent_modelfile(models_dir, config = builder.create_builder_config() config.add_optimization_profile(profile) config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20) - + # serialized model engine_bytes = builder.build_serialized_network(network, config) @@ -130,8 +132,9 @@ def create_data_dependent_modelfile(models_dir, with open(model_version_dir + "/model.plan", "wb") as f: f.write(engine_bytes) + def create_data_dependent_modelconfig(models_dir, - model_name, + model_name, input_shape, input_dtype=np.int32): config_dir = models_dir + "/" + model_name @@ -153,9 +156,8 @@ def create_data_dependent_modelconfig(models_dir, dims: [ {} ] }} ] -'''.format(model_name, - np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape), - np_to_model_dtype(np.int32), +'''.format(model_name, np_to_model_dtype(input_dtype), + tu.shape_to_dims_str(input_shape), np_to_model_dtype(np.int32), tu.shape_to_dims_str((len(input_shape), -1))) try: @@ -176,10 +178,13 @@ def create_data_dependent_modelconfig(models_dir, FLAGS, unparsed = parser.parse_known_args() # Fixed input shape - create_data_dependent_modelfile(FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)) - create_data_dependent_modelconfig(FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)) + create_data_dependent_modelfile(FLAGS.models_dir, + "plan_nobatch_nonzero_fixed", (4, 4)) + create_data_dependent_modelconfig(FLAGS.models_dir, + "plan_nobatch_nonzero_fixed", (4, 4)) # Dynamic input shape - create_data_dependent_modelfile(FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)) - create_data_dependent_modelconfig(FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)) - + create_data_dependent_modelfile(FLAGS.models_dir, + "plan_nobatch_nonzero_dynamic", (-1, -1)) + create_data_dependent_modelconfig(FLAGS.models_dir, + "plan_nobatch_nonzero_dynamic", (-1, -1)) diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py index 86e2621b45..7520af80ee 100644 --- a/qa/common/gen_qa_trt_plugin_models.py +++ b/qa/common/gen_qa_trt_plugin_models.py @@ -37,6 +37,7 @@ trt.init_libnvinfer_plugins(TRT_LOGGER, '') PLUGIN_CREATORS = trt.get_plugin_registry().plugin_creator_list + def np_to_model_dtype(np_dtype): if np_dtype == bool: return "TYPE_BOOL" diff --git a/qa/common/infer_test.py b/qa/common/infer_test.py index 3976d3cfe4..cd954f250e 100644 --- a/qa/common/infer_test.py +++ b/qa/common/infer_test.py @@ -37,9 +37,10 @@ np_dtype_string = np.dtype(object) # Allow caller to setup specific set of backends to test -DEFAULT_BACKENDS="graphdef savedmodel plan onnx libtorch" +DEFAULT_BACKENDS = "graphdef savedmodel plan onnx libtorch" TEST_BACKENDS = os.environ.get("BACKENDS", DEFAULT_BACKENDS).split() + class InferTest(tu.TestResultCollector): def _full_exact(self, input_dtype, output0_dtype, output1_dtype, @@ -80,7 +81,6 @@ def _infer_exact_helper(tester, use_streaming=use_streaming, correlation_id=correlation_id) - input_size = 16 if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, @@ -107,7 +107,8 @@ def _infer_exact_helper(tester, else: shape = (input_size,) _infer_exact_helper(self, - 'plan', shape, + 'plan', + shape, 8, input_dtype, output0_dtype, diff --git a/qa/python_models/bls/model.py b/qa/python_models/bls/model.py index 995d3b2b14..cf7946e1ec 100644 --- a/qa/python_models/bls/model.py +++ b/qa/python_models/bls/model.py @@ -69,6 +69,7 @@ def bls_add_sub(_=None): return True + def bls_square(_=None): input0_np = np.random.randint(16, size=1, dtype=np.int32) input0 = pb_utils.Tensor('IN', input0_np) @@ -85,7 +86,8 @@ def bls_square(_=None): return False if len(infer_response.output_tensors()) > 0: - output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT') + output0 = pb_utils.get_output_tensor_by_name( + infer_response, 'OUT') if output0 is None: return False @@ -96,11 +98,12 @@ def bls_square(_=None): response_count += 1 - if not np.all(input0.as_numpy() == response_count-1): + if not np.all(input0.as_numpy() == response_count - 1): return False return True + def bls_libtorch(model_name, result_device): shape = [16] input0_np = np.random.rand(*shape).astype(np.float32) @@ -116,11 +119,11 @@ def bls_libtorch(model_name, result_device): pb_utils.TRITONSERVER_MEMORY_GPU, 0) infer_request = pb_utils.InferenceRequest( - model_name=model_name, - model_version=1, - inputs=[input0, input1], - requested_output_names=['OUTPUT__0', 'OUTPUT__1'], - preferred_memory=preferred_memory) + model_name=model_name, + model_version=1, + inputs=[input0, input1], + requested_output_names=['OUTPUT__0', 'OUTPUT__1'], + preferred_memory=preferred_memory) infer_response = infer_request.exec() if infer_response.has_error(): @@ -146,8 +149,10 @@ def bls_libtorch(model_name, result_device): else: if output0.is_cpu() or output1.is_cpu(): return False - output0 = from_dlpack(output0.to_dlpack()).to('cpu').cpu().detach().numpy() - output1 = from_dlpack(output1.to_dlpack()).to('cpu').cpu().detach().numpy() + output0 = from_dlpack( + output0.to_dlpack()).to('cpu').cpu().detach().numpy() + output1 = from_dlpack( + output1.to_dlpack()).to('cpu').cpu().detach().numpy() if not np.all(output0 == expected_output_0): return False @@ -156,6 +161,7 @@ def bls_libtorch(model_name, result_device): return True + class PBBLSTest(unittest.TestCase): def setUp(self): @@ -605,13 +611,11 @@ def test_timeout(self): # Expect timeout error self.assertTrue(infer_response.has_error()) - self.assertIn( - "Request timeout expired", - infer_response.error().message()) + self.assertIn("Request timeout expired", + infer_response.error().message()) self.assertTrue(len(infer_response.output_tensors()) == 0) - def _test_response_iterator_square(self, - expected_output_cnt, + def _test_response_iterator_square(self, expected_output_cnt, expected_output_value, response_iterator): response_count = 0 @@ -620,7 +624,8 @@ def _test_response_iterator_square(self, for infer_response in response_iterator: self.assertFalse(infer_response.has_error()) if len(infer_response.output_tensors()) > 0: - output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT') + output0 = pb_utils.get_output_tensor_by_name( + infer_response, 'OUT') self.assertIsNotNone(output0) self.assertEqual(expected_output_value, output0.as_numpy()) @@ -671,8 +676,8 @@ def test_response_iterator(self): response_count = 0 for infer_response in infer_responses: self.assertFalse(infer_response.has_error()) - output0 = pb_utils.get_output_tensor_by_name(infer_response, - 'OUT') + output0 = pb_utils.get_output_tensor_by_name( + infer_response, 'OUT') self.assertIsNotNone(output0) self.assertEqual(response_value, output0.as_numpy()) diff --git a/qa/python_models/bls_async/model.py b/qa/python_models/bls_async/model.py index 109f1df644..4158c82e9d 100644 --- a/qa/python_models/bls_async/model.py +++ b/qa/python_models/bls_async/model.py @@ -97,7 +97,6 @@ def verify_square_results(input0, infer_responses): flush=True) return False - if len(infer_response.output_tensors()) > 0: output0 = pb_utils.get_output_tensor_by_name(infer_response, 'OUT') @@ -118,8 +117,9 @@ def verify_square_results(input0, infer_responses): response_count += 1 - if not np.all(input0 == response_count-1): - print('Expected {} responses, got {}'.format(input0, response_count-1)) + if not np.all(input0 == response_count - 1): + print('Expected {} responses, got {}'.format(input0, + response_count - 1)) return False return True diff --git a/qa/python_models/bls_finalize_error/model.py b/qa/python_models/bls_finalize_error/model.py index 50baf4d9a8..f3db1d6bbe 100644 --- a/qa/python_models/bls_finalize_error/model.py +++ b/qa/python_models/bls_finalize_error/model.py @@ -27,7 +27,9 @@ import triton_python_backend_utils as pb_utils import numpy as np + class TritonPythonModel: + def initialize(self, args): pass @@ -38,7 +40,8 @@ def finalize(self): print('Cleaning up...') input0_np = np.random.randint(3, size=1, dtype=np.int32) input0 = pb_utils.Tensor('IN', input0_np) - infer_request = pb_utils.InferenceRequest(model_name='square_int32', - inputs=[input0], - requested_output_names=['OUT']) + infer_request = pb_utils.InferenceRequest( + model_name='square_int32', + inputs=[input0], + requested_output_names=['OUT']) infer_responses = infer_request.exec(decoupled=True) diff --git a/qa/python_models/bls_init_error/model.py b/qa/python_models/bls_init_error/model.py index f79b144bcb..f95ce4eff8 100644 --- a/qa/python_models/bls_init_error/model.py +++ b/qa/python_models/bls_init_error/model.py @@ -27,13 +27,16 @@ import triton_python_backend_utils as pb_utils import numpy as np + class TritonPythonModel: + def initialize(self, args): input0_np = np.random.randint(3, size=1, dtype=np.int32) input0 = pb_utils.Tensor('IN', input0_np) - infer_request = pb_utils.InferenceRequest(model_name='square_int32', - inputs=[input0], - requested_output_names=['OUT']) + infer_request = pb_utils.InferenceRequest( + model_name='square_int32', + inputs=[input0], + requested_output_names=['OUT']) infer_responses = infer_request.exec(decoupled=True) def execute(self, requests): diff --git a/qa/python_models/bls_undefined/model.py b/qa/python_models/bls_undefined/model.py index ca66c77b13..4b52c6e54f 100644 --- a/qa/python_models/bls_undefined/model.py +++ b/qa/python_models/bls_undefined/model.py @@ -26,10 +26,9 @@ class TritonPythonModel: + def execute(self, requests): undefined_variable def finalize(self): print('Cleaning up...') - - diff --git a/qa/python_models/dlpack_test/model.py b/qa/python_models/dlpack_test/model.py index a850afc1ce..2beab4af7c 100644 --- a/qa/python_models/dlpack_test/model.py +++ b/qa/python_models/dlpack_test/model.py @@ -246,7 +246,7 @@ def test_cuda_multi_gpu(self): self.assertTrue(cp.cuda.Stream(null=True).done) cupy_tensor_dlpack = cp.from_dlpack(pb_tensor) - with cp.cuda.Device(1): + with cp.cuda.Device(1): self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output)) self.assertFalse(pb_tensor.is_cpu()) @@ -279,8 +279,8 @@ def test_cuda_blocking_stream_multi_gpu(self): # all compute work self.assertTrue(blocking_stream.done) cupy_tensor_dlpack = cp.from_dlpack(pb_tensor) - - with cp.cuda.Device(1): + + with cp.cuda.Device(1): self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output)) self.assertFalse(pb_tensor.is_cpu()) @@ -317,8 +317,8 @@ def test_cuda_non_blocking_stream_multi_gpu(self): # all compute work self.assertTrue(non_blocking_stream.done) cupy_tensor_dlpack = cp.from_dlpack(pb_tensor) - - with cp.cuda.Device(2): + + with cp.cuda.Device(2): self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output)) self.assertFalse(pb_tensor.is_cpu()) diff --git a/qa/python_models/execute_return_error/model.py b/qa/python_models/execute_return_error/model.py index 85196c15d8..29367d4a0d 100644 --- a/qa/python_models/execute_return_error/model.py +++ b/qa/python_models/execute_return_error/model.py @@ -25,7 +25,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - class TritonPythonModel: def initialize(self, args): diff --git a/qa/python_models/ground_truth/model.py b/qa/python_models/ground_truth/model.py index 1abe39e5a0..ee04c3a073 100644 --- a/qa/python_models/ground_truth/model.py +++ b/qa/python_models/ground_truth/model.py @@ -35,7 +35,7 @@ def execute(self, requests): Mock Model that uses the input data to determine how long to wait before returning identity data """ - assert(len(requests) == 1) + assert (len(requests) == 1) delay = 0 request = requests[0] responses = [] diff --git a/qa/python_models/identity_fp32_logging/model.py b/qa/python_models/identity_fp32_logging/model.py index 93aa0df998..9bc24ce488 100644 --- a/qa/python_models/identity_fp32_logging/model.py +++ b/qa/python_models/identity_fp32_logging/model.py @@ -36,7 +36,7 @@ def initialize(self, args): logger.log_warn("Initialize-Warning Msg!") logger.log_error("Initialize-Error Msg!") logger.log_verbose("Initialize-Verbose Msg!") - + def execute(self, requests): """ Identity model in Python backend. diff --git a/qa/python_models/identity_fp32_timeout/model.py b/qa/python_models/identity_fp32_timeout/model.py index 5e12aa87d8..7235e33d83 100644 --- a/qa/python_models/identity_fp32_timeout/model.py +++ b/qa/python_models/identity_fp32_timeout/model.py @@ -27,6 +27,7 @@ import triton_python_backend_utils as pb_utils import time + class TritonPythonModel: def execute(self, requests): diff --git a/qa/python_models/init_args/model.py b/qa/python_models/init_args/model.py index a3044a3457..0b6b16800e 100644 --- a/qa/python_models/init_args/model.py +++ b/qa/python_models/init_args/model.py @@ -28,6 +28,7 @@ import numpy as np import triton_python_backend_utils as pb_utils + def check_init_args(args): expected_args = { 'model_name': @@ -39,7 +40,8 @@ def check_init_args(args): 'model_instance_device_id': '0', 'model_repository': - os.getenv("TRITON_DIR", "/opt/tironserver") + '/qa/L0_backend_python/models/init_args', + os.getenv("TRITON_DIR", "/opt/tironserver") + + '/qa/L0_backend_python/models/init_args', 'model_version': '1' } diff --git a/qa/python_models/model_init_del/util.py b/qa/python_models/model_init_del/util.py index 6b77dde806..10b9df724a 100644 --- a/qa/python_models/model_init_del/util.py +++ b/qa/python_models/model_init_del/util.py @@ -33,6 +33,7 @@ # Helper functions for reading/writing state to disk # + def __get_number(filename): full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename) try: @@ -43,6 +44,7 @@ def __get_number(filename): txt = "0" return int(txt) + def __store_number(filename, number): full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename) txt = str(number) @@ -50,6 +52,7 @@ def __store_number(filename, number): fcntl.lockf(f, fcntl.LOCK_EX) f.write(txt) + def __inc_number(filename): full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename) try: @@ -66,49 +69,60 @@ def __inc_number(filename): __store_number(filename, number) return number + # # Functions for communicating initialize and finalize count between the model # and test # + def __get_count_filename(kind): if kind != "initialize" and kind != "finalize": raise KeyError("Invalid count kind: " + str(kind)) filename = __model_name + "_" + kind + "_count.txt" return filename + def get_count(kind): return __get_number(__get_count_filename(kind)) + def inc_count(kind): return __inc_number(__get_count_filename(kind)) + def reset_count(kind): count = 0 __store_number(__get_count_filename(kind), count) return count + # # Functions for communicating varies of delay (in seconds) to the model # + def __get_delay_filename(kind): if kind != "initialize" and kind != "infer": raise KeyError("Invalid delay kind: " + str(kind)) filename = __model_name + "_" + kind + "_delay.txt" return filename + def get_delay(kind): return __get_number(__get_delay_filename(kind)) + def set_delay(kind, delay): __store_number(__get_delay_filename(kind), delay) return delay + # # Functions for modifying the model # + def update_instance_group(instance_group_str): full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt") with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f: @@ -122,11 +136,13 @@ def update_instance_group(instance_group_str): f.write(txt) return txt + def update_model_file(): full_path = os.path.join(os.path.dirname(__file__), "1", "model.py") with open(full_path, mode="a", encoding="utf-8", errors="strict") as f: f.write("\n# dummy model file update\n") + def enable_batching(): full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt") with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f: @@ -137,6 +153,7 @@ def enable_batching(): f.write(txt) return txt + def disable_batching(): full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt") with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f: diff --git a/src/classification.cc b/src/classification.cc index d8dab03817..2d8cd26b9e 100644 --- a/src/classification.cc +++ b/src/classification.cc @@ -28,6 +28,7 @@ #include #include + #include "common.h" namespace triton { namespace server { diff --git a/src/classification.h b/src/classification.h index 27c8ba1ef6..9264baa2b0 100644 --- a/src/classification.h +++ b/src/classification.h @@ -27,6 +27,7 @@ #include #include + #include "triton/core/tritonserver.h" namespace triton { namespace server { diff --git a/src/command_line_parser.cc b/src/command_line_parser.cc index e5ae96bd93..9b3470b454 100644 --- a/src/command_line_parser.cc +++ b/src/command_line_parser.cc @@ -345,9 +345,9 @@ TritonParser::SetupOptions() "finish. After the timeout expires the server exits even if inferences " "are still in flight."}); - model_repo_options_.push_back({OPTION_MODEL_REPOSITORY, "model-store", - Option::ArgStr, - "Equivalent to --model-repository."}); + model_repo_options_.push_back( + {OPTION_MODEL_REPOSITORY, "model-store", Option::ArgStr, + "Equivalent to --model-repository."}); model_repo_options_.push_back( {OPTION_MODEL_REPOSITORY, "model-repository", Option::ArgStr, "Path to model repository directory. It may be specified multiple times " @@ -407,14 +407,16 @@ TritonParser::SetupOptions() "same name can be served if they are in different namespace."}); #if defined(TRITON_ENABLE_HTTP) - http_options_.push_back({OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool, - "Allow the server to listen for HTTP requests."}); + http_options_.push_back( + {OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool, + "Allow the server to listen for HTTP requests."}); http_options_.push_back( {OPTION_HTTP_ADDRESS, "http-address", Option::ArgStr, "The address for the http server to bind to. Default is 0.0.0.0"}); - http_options_.push_back({OPTION_HTTP_PORT, "http-port", Option::ArgInt, - "The port for the server to listen on for HTTP " - "requests. Default is 8000."}); + http_options_.push_back( + {OPTION_HTTP_PORT, "http-port", Option::ArgInt, + "The port for the server to listen on for HTTP " + "requests. Default is 8000."}); http_options_.push_back( {OPTION_REUSE_HTTP_PORT, "reuse-http-port", Option::ArgBool, "Allow multiple servers to listen on the same HTTP port when every " @@ -426,19 +428,22 @@ TritonParser::SetupOptions() Option::ArgStr, "The regular expression pattern that will be used for forwarding HTTP " "headers as inference request parameters."}); - http_options_.push_back({OPTION_HTTP_THREAD_COUNT, "http-thread-count", - Option::ArgInt, - "Number of threads handling HTTP requests."}); + http_options_.push_back( + {OPTION_HTTP_THREAD_COUNT, "http-thread-count", Option::ArgInt, + "Number of threads handling HTTP requests."}); #endif // TRITON_ENABLE_HTTP #if defined(TRITON_ENABLE_GRPC) - grpc_options_.push_back({OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool, - "Allow the server to listen for GRPC requests."}); - grpc_options_.push_back({OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr, - "The address for the grpc server to binds to. Default is 0.0.0.0"}); - grpc_options_.push_back({OPTION_GRPC_PORT, "grpc-port", Option::ArgInt, - "The port for the server to listen on for GRPC " - "requests. Default is 8001."}); + grpc_options_.push_back( + {OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool, + "Allow the server to listen for GRPC requests."}); + grpc_options_.push_back( + {OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr, + "The address for the grpc server to binds to. Default is 0.0.0.0"}); + grpc_options_.push_back( + {OPTION_GRPC_PORT, "grpc-port", Option::ArgInt, + "The port for the server to listen on for GRPC " + "requests. Default is 8001."}); grpc_options_.push_back( {OPTION_REUSE_GRPC_PORT, "reuse-grpc-port", Option::ArgBool, "Allow multiple servers to listen on the same GRPC port when every " @@ -536,13 +541,15 @@ TritonParser::SetupOptions() {OPTION_LOG_VERBOSE, "log-verbose", Option::ArgInt, "Set verbose logging level. Zero (0) disables verbose logging and " "values >= 1 enable verbose logging."}); - logging_options_.push_back({OPTION_LOG_INFO, "log-info", Option::ArgBool, - "Enable/disable info-level logging."}); - logging_options_.push_back({OPTION_LOG_WARNING, "log-warning", - Option::ArgBool, - "Enable/disable warning-level logging."}); - logging_options_.push_back({OPTION_LOG_ERROR, "log-error", Option::ArgBool, - "Enable/disable error-level logging."}); + logging_options_.push_back( + {OPTION_LOG_INFO, "log-info", Option::ArgBool, + "Enable/disable info-level logging."}); + logging_options_.push_back( + {OPTION_LOG_WARNING, "log-warning", Option::ArgBool, + "Enable/disable warning-level logging."}); + logging_options_.push_back( + {OPTION_LOG_ERROR, "log-error", Option::ArgBool, + "Enable/disable error-level logging."}); logging_options_.push_back( {OPTION_LOG_FORMAT, "log-format", Option::ArgStr, "Set the logging format. Options are \"default\" and \"ISO8601\". " diff --git a/src/command_line_parser.h b/src/command_line_parser.h index 0c57c6a345..ace5b6f232 100644 --- a/src/command_line_parser.h +++ b/src/command_line_parser.h @@ -34,6 +34,7 @@ #include #include #include + #include "triton/common/logging.h" #include "triton/core/tritonserver.h" #ifdef TRITON_ENABLE_GRPC diff --git a/src/common.cc b/src/common.cc index fe7de36948..83fe3c6c25 100644 --- a/src/common.cc +++ b/src/common.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" + #include #include diff --git a/src/common.h b/src/common.h index 4551d5cb3a..b7b4f845ac 100644 --- a/src/common.h +++ b/src/common.h @@ -29,6 +29,7 @@ #include #include #include + #include "triton/core/tritonserver.h" namespace triton { namespace server { @@ -95,17 +96,17 @@ const std::vector TRITON_RESERVED_REQUEST_PARAMS{ } \ } while (false) -#define THROW_IF_ERR(EX_TYPE, X, MSG) \ - do { \ - TRITONSERVER_Error* err__ = (X); \ - if (err__ != nullptr) { \ +#define THROW_IF_ERR(EX_TYPE, X, MSG) \ + do { \ + TRITONSERVER_Error* err__ = (X); \ + if (err__ != nullptr) { \ auto ex__ = (EX_TYPE)( \ std::string("error: ") + (MSG) + ": " + \ TRITONSERVER_ErrorCodeString(err__) + " - " + \ - TRITONSERVER_ErrorMessage(err__)); \ - TRITONSERVER_ErrorDelete(err__); \ - throw ex__; \ - } \ + TRITONSERVER_ErrorMessage(err__)); \ + TRITONSERVER_ErrorDelete(err__); \ + throw ex__; \ + } \ } while (false) #define IGNORE_ERR(X) \ diff --git a/src/data_compressor.h b/src/data_compressor.h index d8eafb9662..e417558901 100644 --- a/src/data_compressor.h +++ b/src/data_compressor.h @@ -25,6 +25,9 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once +#include +#include + #include #include #include @@ -32,8 +35,6 @@ #include #include -#include -#include #include "common.h" #include "triton/core/tritonserver.h" diff --git a/src/grpc/grpc_server.cc b/src/grpc/grpc_server.cc index a5b340efc2..717766a366 100644 --- a/src/grpc/grpc_server.cc +++ b/src/grpc/grpc_server.cc @@ -28,6 +28,7 @@ #include #include + #include #include #include @@ -38,6 +39,7 @@ #include #include #include + #include "../classification.h" #include "../common.h" #include "grpc++/grpc++.h" @@ -301,8 +303,8 @@ class CommonHandler : public HandlerBase { static std::pair empty_restricted_key_; }; -std::pair CommonHandler::empty_restricted_key_{"", - ""}; +std::pair CommonHandler::empty_restricted_key_{ + "", ""}; CommonHandler::CommonHandler( const std::string& name, @@ -2315,16 +2317,19 @@ Server::Server( std::to_string(keepalive_options.keepalive_time_ms_)}; table_printer.InsertRow(row); - row = {"keepalive_timeout_ms", - std::to_string(keepalive_options.keepalive_timeout_ms_)}; + row = { + "keepalive_timeout_ms", + std::to_string(keepalive_options.keepalive_timeout_ms_)}; table_printer.InsertRow(row); - row = {"keepalive_permit_without_calls", - std::to_string(keepalive_options.keepalive_permit_without_calls_)}; + row = { + "keepalive_permit_without_calls", + std::to_string(keepalive_options.keepalive_permit_without_calls_)}; table_printer.InsertRow(row); - row = {"http2_max_pings_without_data", - std::to_string(keepalive_options.http2_max_pings_without_data_)}; + row = { + "http2_max_pings_without_data", + std::to_string(keepalive_options.http2_max_pings_without_data_)}; table_printer.InsertRow(row); row = { @@ -2333,8 +2338,9 @@ Server::Server( keepalive_options.http2_min_recv_ping_interval_without_data_ms_)}; table_printer.InsertRow(row); - row = {"http2_max_ping_strikes", - std::to_string(keepalive_options.http2_max_ping_strikes_)}; + row = { + "http2_max_ping_strikes", + std::to_string(keepalive_options.http2_max_ping_strikes_)}; table_printer.InsertRow(row); LOG_VERBOSE(1) << table_printer.PrintTable(); } diff --git a/src/grpc/grpc_server.h b/src/grpc/grpc_server.h index b2c932a614..4bbb54594f 100644 --- a/src/grpc/grpc_server.h +++ b/src/grpc/grpc_server.h @@ -26,7 +26,9 @@ #pragma once #include + #include + #include "../shared_memory_manager.h" #include "../tracer.h" #include "grpc_handler.h" diff --git a/src/grpc/grpc_utils.h b/src/grpc/grpc_utils.h index 58e02d5cbb..1f4ad8003e 100644 --- a/src/grpc/grpc_utils.h +++ b/src/grpc/grpc_utils.h @@ -28,6 +28,7 @@ #include #include #include + #include "../classification.h" #include "../common.h" #include "../shared_memory_manager.h" diff --git a/src/grpc/infer_handler.h b/src/grpc/infer_handler.h index 0b87549b3e..b2ce3f13e2 100644 --- a/src/grpc/infer_handler.h +++ b/src/grpc/infer_handler.h @@ -28,10 +28,12 @@ #include #include #include + #include #include #include #include + #include "../tracer.h" #include "grpc_handler.h" #include "grpc_service.grpc.pb.h" diff --git a/src/grpc/stream_infer_handler.cc b/src/grpc/stream_infer_handler.cc index 03b4f209a9..268c6aafa0 100644 --- a/src/grpc/stream_infer_handler.cc +++ b/src/grpc/stream_infer_handler.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "stream_infer_handler.h" + #include namespace triton { namespace server { namespace grpc { diff --git a/src/http_server.cc b/src/http_server.cc index 4748d1990c..3493e620be 100644 --- a/src/http_server.cc +++ b/src/http_server.cc @@ -32,10 +32,12 @@ #include #include + #include #include #include #include + #include "classification.h" #define TRITONJSON_STATUSTYPE TRITONSERVER_Error* @@ -1344,7 +1346,7 @@ HTTPAPIServer::HandleRepositoryControl( }; std::unique_ptr< std::vector, decltype(param_deleter)> - params(new std::vector(), param_deleter); + params(new std::vector(), param_deleter); // local variables to store the decoded file content, the data must // be valid until TRITONSERVER_ServerLoadModelWithParameters returns. std::list> binary_files; diff --git a/src/http_server.h b/src/http_server.h index 46982d76cb..44b013fd84 100644 --- a/src/http_server.h +++ b/src/http_server.h @@ -27,12 +27,14 @@ #include #include + #include #include #include #include #include #include + #include "common.h" #include "data_compressor.h" #include "shared_memory_manager.h" diff --git a/src/memory_alloc.cc b/src/memory_alloc.cc index 4b0ad6f6ec..64f61510e9 100644 --- a/src/memory_alloc.cc +++ b/src/memory_alloc.cc @@ -28,12 +28,14 @@ #include #include #include + #include #include #include #include #include #include + #include "common.h" #include "triton/core/tritonserver.h" diff --git a/src/multi_server.cc b/src/multi_server.cc index cc89000f28..d575931b58 100644 --- a/src/multi_server.cc +++ b/src/multi_server.cc @@ -27,6 +27,7 @@ #include #include #include + #include #include #include @@ -35,6 +36,7 @@ #include #include #include + #include "common.h" #include "triton/core/tritonserver.h" diff --git a/src/sagemaker_server.h b/src/sagemaker_server.h index 2ed041d101..45c6b22044 100644 --- a/src/sagemaker_server.h +++ b/src/sagemaker_server.h @@ -78,7 +78,8 @@ class SagemakerAPIServer : public HTTPAPIServer { model_path_regex_( R"((\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)"), platform_ensemble_regex_(R"(platform:(\s)*\"ensemble\")"), - ping_mode_(GetEnvironmentVariableOrDefault("SAGEMAKER_TRITON_PING_MODE", "ready")), + ping_mode_(GetEnvironmentVariableOrDefault( + "SAGEMAKER_TRITON_PING_MODE", "ready")), model_name_(GetEnvironmentVariableOrDefault( "SAGEMAKER_TRITON_DEFAULT_MODEL_NAME", "unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME")), diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc index 9b4ce8fc29..d4d99b1889 100644 --- a/src/shared_memory_manager.cc +++ b/src/shared_memory_manager.cc @@ -121,6 +121,7 @@ SharedMemoryManager::UnregisterHelper( #include #include #include + #include "common.h" #include "triton/common/logging.h" diff --git a/src/shared_memory_manager.h b/src/shared_memory_manager.h index 8b39c3115b..b282f77bc7 100644 --- a/src/shared_memory_manager.h +++ b/src/shared_memory_manager.h @@ -29,6 +29,7 @@ #include #include #include + #include "triton/core/tritonserver.h" #define TRITONJSON_STATUSTYPE TRITONSERVER_Error* diff --git a/src/simple.cc b/src/simple.cc index b0673620d7..5a6bd3b04b 100644 --- a/src/simple.cc +++ b/src/simple.cc @@ -27,6 +27,7 @@ #include #include #include + #include #include #include @@ -35,6 +36,7 @@ #include #include #include + #include "common.h" #include "triton/core/tritonserver.h" diff --git a/src/test/caffe2plan.cc b/src/test/caffe2plan.cc index 7bda39c2eb..301129f10a 100644 --- a/src/test/caffe2plan.cc +++ b/src/test/caffe2plan.cc @@ -30,6 +30,7 @@ #include #include #include + #include #include #include @@ -415,9 +416,9 @@ main(int argc, char** argv) if (!CaffeToPlan( output_filename, prototxt_filename, model_filename, output_names, - (use_fp16) ? nvinfer1::DataType::kHALF - : (use_int8) ? nvinfer1::DataType::kINT8 - : nvinfer1::DataType::kFLOAT, + (use_fp16) ? nvinfer1::DataType::kHALF + : (use_int8) ? nvinfer1::DataType::kINT8 + : nvinfer1::DataType::kFLOAT, calibration_filename, max_batch_size, max_workspace_size)) { std::cerr << "Failed to create PLAN file" << std::endl; return 1; diff --git a/src/test/data_compressor_test.cc b/src/test/data_compressor_test.cc index e1b46cb641..292c8c544a 100644 --- a/src/test/data_compressor_test.cc +++ b/src/test/data_compressor_test.cc @@ -33,6 +33,7 @@ #endif #include + #include #include #include @@ -43,6 +44,7 @@ #include #include #include + #include "data_compressor.h" namespace ni = triton::server; @@ -140,8 +142,8 @@ class DataCompressorTest : public ::testing::Test { : raw_data_length_(0), deflate_compressed_length_(0), gzip_compressed_length_(0) { - std::vector files{"raw_data", "deflate_compressed_data", - "gzip_compressed_data"}; + std::vector files{ + "raw_data", "deflate_compressed_data", "gzip_compressed_data"}; for (const auto& file : files) { std::fstream fs(file); // get length of file diff --git a/src/test/distributed_addsub/src/distributed_addsub.cc b/src/test/distributed_addsub/src/distributed_addsub.cc index 4949ca4444..4444e6a735 100644 --- a/src/test/distributed_addsub/src/distributed_addsub.cc +++ b/src/test/distributed_addsub/src/distributed_addsub.cc @@ -27,6 +27,7 @@ #include #include #include + #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" #include "triton/backend/backend_model_instance.h" @@ -661,10 +662,14 @@ TRITONBACKEND_ModelInstanceExecute( uint64_t input_1_byte_size = input_byte_size; GUARDED_RESPOND_IF_ERROR( responses, r, - ReadInputTensor(request, "INPUT0", input_0.data(), reinterpret_cast(&input_0_byte_size))); + ReadInputTensor( + request, "INPUT0", input_0.data(), + reinterpret_cast(&input_0_byte_size))); GUARDED_RESPOND_IF_ERROR( responses, r, - ReadInputTensor(request, "INPUT1", input_1.data(), reinterpret_cast(&input_1_byte_size))); + ReadInputTensor( + request, "INPUT1", input_1.data(), + reinterpret_cast(&input_1_byte_size))); if (responses[r] == nullptr) { LOG_MESSAGE( TRITONSERVER_LOG_ERROR, diff --git a/src/test/dyna_sequence/src/dyna_sequence.cc b/src/test/dyna_sequence/src/dyna_sequence.cc index b78df20142..91f83db7c9 100644 --- a/src/test/dyna_sequence/src/dyna_sequence.cc +++ b/src/test/dyna_sequence/src/dyna_sequence.cc @@ -27,6 +27,7 @@ #include #include #include + #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" #include "triton/backend/backend_model_instance.h" diff --git a/src/test/implicit_state/src/implicit_state.cc b/src/test/implicit_state/src/implicit_state.cc index 773715afcd..74c3142dea 100644 --- a/src/test/implicit_state/src/implicit_state.cc +++ b/src/test/implicit_state/src/implicit_state.cc @@ -26,6 +26,7 @@ #include #include + #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" #include "triton/backend/backend_model_instance.h" diff --git a/src/test/query_backend/src/query.cc b/src/test/query_backend/src/query.cc index dcbabe6c0b..8cc2fd4a06 100644 --- a/src/test/query_backend/src/query.cc +++ b/src/test/query_backend/src/query.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" #include "triton/backend/backend_model_instance.h" @@ -104,8 +105,8 @@ TRITONBACKEND_ModelInstanceExecute( } else { names = {"OUTPUT0", "OUTPUT1"}; } - std::vector types{TRITONSERVER_MEMORY_CPU_PINNED, - TRITONSERVER_MEMORY_CPU_PINNED}; + std::vector types{ + TRITONSERVER_MEMORY_CPU_PINNED, TRITONSERVER_MEMORY_CPU_PINNED}; std::vector type_ids{1, 1}; for (size_t i = 0; i < names.size(); ++i) { auto err = TRITONBACKEND_RequestOutputBufferProperties( diff --git a/src/test/repoagent/relocation_repoagent/src/relocation.cc b/src/test/repoagent/relocation_repoagent/src/relocation.cc index 8ad25a4ad5..1db8c35c97 100644 --- a/src/test/repoagent/relocation_repoagent/src/relocation.cc +++ b/src/test/repoagent/relocation_repoagent/src/relocation.cc @@ -24,15 +24,15 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "triton/core/tritonrepoagent.h" -#include "triton/core/tritonserver.h" - #include #include #include #include #include +#include "triton/core/tritonrepoagent.h" +#include "triton/core/tritonserver.h" + // // Relocation Repository Agent that is for test only. // diff --git a/src/test/sequence/src/sequence.cc b/src/test/sequence/src/sequence.cc index c599ca46f3..44896d2974 100644 --- a/src/test/sequence/src/sequence.cc +++ b/src/test/sequence/src/sequence.cc @@ -26,6 +26,7 @@ #include #include + #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" #include "triton/backend/backend_model_instance.h" diff --git a/src/tracer.cc b/src/tracer.cc index cd442e83b9..26750fc3b5 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -27,7 +27,9 @@ #include "tracer.h" #include + #include + #include "common.h" #include "triton/common/logging.h" #ifdef TRITON_ENABLE_GPU @@ -342,8 +344,7 @@ TraceManager::Trace::CaptureTimestamp( if (trace_span_ == nullptr) { InitSpan(otel_timestamp); } - trace_span_->AddEvent( - name, otel_timestamp); + trace_span_->AddEvent(name, otel_timestamp); #else LOG_ERROR << "Unsupported trace mode: " << TraceManager::InferenceTraceModeString(setting_->mode_); @@ -369,10 +370,10 @@ TraceManager::Trace::InitTracer( } } exporter_ = otlp::OtlpHttpExporterFactory::Create(opts); - processor_ = otel_trace_sdk::SimpleSpanProcessorFactory::Create( - std::move(exporter_)); - provider_ = otel_trace_sdk::TracerProviderFactory::Create( - std::move(processor_)); + processor_ = + otel_trace_sdk::SimpleSpanProcessorFactory::Create(std::move(exporter_)); + provider_ = + otel_trace_sdk::TracerProviderFactory::Create(std::move(processor_)); } void diff --git a/src/tracer.h b/src/tracer.h index 88736c536b..c310921f14 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -157,7 +157,7 @@ class TraceManager { uint64_t trace_id_; -#if !defined (_WIN32) && defined (TRITON_ENABLE_TRACING) +#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) // OpenTelemetry SDK relies on system's clock for event timestamps. // Triton Tracing records timestamps using steady_clock. This is a // monotonic clock, i.e. time is always moving forward. It is not related diff --git a/src/triton_signal.h b/src/triton_signal.h index 870df7ed43..d5aefbf0bf 100644 --- a/src/triton_signal.h +++ b/src/triton_signal.h @@ -27,6 +27,7 @@ #include #include + #include "triton/core/tritonserver.h" namespace triton { namespace server { diff --git a/src/vertex_ai_server.cc b/src/vertex_ai_server.cc index 95613c36dd..f14143f5b6 100644 --- a/src/vertex_ai_server.cc +++ b/src/vertex_ai_server.cc @@ -26,6 +26,7 @@ #include "vertex_ai_server.h" #include + #include "common.h" namespace triton { namespace server {