From d9074344e8ca6fa700eedc03df9561cbf35ec697 Mon Sep 17 00:00:00 2001 From: leonardozcm Date: Thu, 8 Sep 2022 10:18:27 +0800 Subject: [PATCH 1/2] cpu --- .gitignore | 5 ++ configs/Kinetics/C2D_8x8_R50.yaml | 9 ++- install_requirements.sh | 10 +++ setup.py | 2 +- slowfast/datasets/decoder.py | 118 ++++++++++++++++++++---------- slowfast/utils/misc.py | 2 + tools/train_net.py | 5 +- 7 files changed, 107 insertions(+), 44 deletions(-) create mode 100644 .gitignore create mode 100644 install_requirements.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..6054951c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pyc +*.log +build/* +slowfast.egg-info/* +*.zip diff --git a/configs/Kinetics/C2D_8x8_R50.yaml b/configs/Kinetics/C2D_8x8_R50.yaml index b0363d421..7668f4fb9 100644 --- a/configs/Kinetics/C2D_8x8_R50.yaml +++ b/configs/Kinetics/C2D_8x8_R50.yaml @@ -1,7 +1,7 @@ TRAIN: ENABLE: True DATASET: kinetics - BATCH_SIZE: 64 + BATCH_SIZE: 16 EVAL_PERIOD: 10 CHECKPOINT_PERIOD: 1 AUTO_RESUME: True @@ -12,6 +12,8 @@ DATA: TRAIN_CROP_SIZE: 224 TEST_CROP_SIZE: 256 INPUT_CHANNEL_NUM: [3] + PATH_TO_DATA_DIR: dataset/tiny-kinetics-400/data + DECODING_BACKEND: pyav RESNET: ZERO_INIT_FINAL_BN: True WIDTH_PER_GROUP: 64 @@ -43,13 +45,14 @@ MODEL: LOSS_FUNC: cross_entropy DROPOUT_RATE: 0.5 TEST: - ENABLE: True + ENABLE: False DATASET: kinetics BATCH_SIZE: 64 DATA_LOADER: NUM_WORKERS: 8 PIN_MEMORY: True -NUM_GPUS: 8 +NUM_GPUS: 0 NUM_SHARDS: 1 +DIST_BACKEND: "gloo" RNG_SEED: 0 OUTPUT_DIR: . diff --git a/install_requirements.sh b/install_requirements.sh new file mode 100644 index 000000000..058e28f39 --- /dev/null +++ b/install_requirements.sh @@ -0,0 +1,10 @@ +pip install 'git+https://github.com/facebookresearch/fvcore' +pip install simplejson +pip install av +pip install -U iopath +pip install psutil +pip install opencv-python +pip install tensorboard +pip install moviepy +pip install pytorchvideo +pip install -e detectron2_repo \ No newline at end of file diff --git a/setup.py b/setup.py index 185c7d64f..0853b3895 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ "opencv-python", "pandas", "torchvision>=0.4.2", - "PIL", + "Pillow", "sklearn", "tensorboard", "fairscale", diff --git a/slowfast/datasets/decoder.py b/slowfast/datasets/decoder.py index 7b7aff4ae..0c9f1f920 100644 --- a/slowfast/datasets/decoder.py +++ b/slowfast/datasets/decoder.py @@ -3,8 +3,9 @@ import logging import math -import numpy as np import random + +import numpy as np import torch import torchvision.io as io @@ -84,7 +85,7 @@ def get_multiple_start_end_idx( num_clips_uniform, min_delta=0, max_delta=math.inf, - use_offset=False, + use_offset=False ): """ Sample a clip of size clip_size from a video of size video_size and @@ -114,7 +115,7 @@ def sample_clips( min_delta=0, max_delta=math.inf, num_retries=100, - use_offset=False, + use_offset=False ): se_inds = np.empty((0, 2)) dt = np.empty((0)) @@ -125,15 +126,13 @@ def sample_clips( if clip_idx == -1: # Random temporal sampling. start_idx = random.uniform(0, max_start) - else: # Uniformly sample the clip with the given index. + else: # Uniformly sample the clip with the given index. if use_offset: if num_clips_uniform == 1: # Take the center clip if num_clips is 1. start_idx = math.floor(max_start / 2) else: - start_idx = clip_idx * math.floor( - max_start / (num_clips_uniform - 1) - ) + start_idx = clip_idx * math.floor(max_start / (num_clips_uniform - 1)) else: start_idx = max_start * clip_idx / num_clips_uniform @@ -304,7 +303,10 @@ def torchvision_decode( decode_all_video = False # try selective decoding clip_sizes = [ - np.maximum(1.0, sampling_rate[i] * num_frames[i] / target_fps * fps) + np.maximum( + 1.0, + sampling_rate[i] * num_frames[i] / target_fps * fps + ) for i in range(len(sampling_rate)) ] start_end_delta_time = get_multiple_start_end_idx( @@ -381,6 +383,10 @@ def pyav_decode( num_clips_uniform=10, target_fps=30, use_offset=False, + modalities=("visual",), + max_spatial_scale=0, + min_delta=-math.inf, + max_delta=math.inf, ): """ Convert the video from its original fps to the target_fps. If the video @@ -388,7 +394,6 @@ def pyav_decode( the perform temporal selective decoding and sample a clip from the video with the PyAV decoder. If the video does not support selective decoding, decode the entire video. - Args: container (container): pyav container. sampling_rate (int): frame sampling rate (interval between two sampled @@ -418,38 +423,69 @@ def pyav_decode( # If failed to fetch the decoding information, decode the entire video. decode_all_video = True video_start_pts, video_end_pts = 0, math.inf + start_end_delta_time = None + + frames = None + if container.streams.video: + video_frames, max_pts = pyav_decode_stream( + container, + video_start_pts, + video_end_pts, + container.streams.video[0], + {"video": 0}, + ) + container.close() + + frames = [frame.to_rgb().to_ndarray() for frame in video_frames] + frames = torch.as_tensor(np.stack(frames)) + frames_out = [frames] + else: # Perform selective decoding. decode_all_video = False - clip_size = np.maximum( - 1.0, np.ceil(sampling_rate * (num_frames - 1) / target_fps * fps) - ) - start_idx, end_idx, fraction = get_start_end_idx( + clip_sizes = [ + np.maximum( + 1.0, + np.ceil( + sampling_rate[i] * (num_frames[i] - 1) / target_fps * fps + ), + ) + for i in range(len(sampling_rate)) + ] + start_end_delta_time = get_multiple_start_end_idx( frames_length, - clip_size, + clip_sizes, clip_idx, num_clips_uniform, - use_offset=use_offset, - ) - timebase = duration / frames_length - video_start_pts = int(start_idx * timebase) - video_end_pts = int(end_idx * timebase) - - frames = None - # If video stream was found, fetch video frames from the video. - if container.streams.video: - video_frames, max_pts = pyav_decode_stream( - container, - video_start_pts, - video_end_pts, - container.streams.video[0], - {"video": 0}, + min_delta=min_delta, + max_delta=max_delta, ) + frames_out = [None] * len(num_frames) + for k in range(len(num_frames)): + start_idx = start_end_delta_time[k, 0] + end_idx = start_end_delta_time[k, 1] + timebase = duration / frames_length + video_start_pts = int(start_idx * timebase) + video_end_pts = int(end_idx * timebase) + + frames = None + # If video stream was found, fetch video frames from the video. + if container.streams.video: + video_frames, max_pts = pyav_decode_stream( + container, + video_start_pts, + video_end_pts, + container.streams.video[0], + {"video": 0}, + ) + + frames = [frame.to_rgb().to_ndarray() for frame in video_frames] + frames = torch.as_tensor(np.stack(frames)) + + frames_out[k] = frames container.close() - frames = [frame.to_rgb().to_ndarray() for frame in video_frames] - frames = torch.as_tensor(np.stack(frames)) - return frames, fps, decode_all_video + return frames_out, fps, decode_all_video, start_end_delta_time def decode( @@ -509,10 +545,9 @@ def decode( ) # clips come temporally ordered from decoder try: if backend == "pyav": - assert ( - min_delta == -math.inf and max_delta == math.inf - ), "delta sampling not supported in pyav" - frames_decoded, fps, decode_all_video = pyav_decode( + assert min_delta == -math.inf and max_delta == math.inf, \ + "delta sampling not supported in pyav" + frames_decoded, fps, decode_all_video, start_end_delta_time = pyav_decode( container, sampling_rate, num_frames, @@ -520,6 +555,10 @@ def decode( num_clips_uniform, target_fps, use_offset=use_offset, + modalities=("visual",), + max_spatial_scale=max_spatial_scale, + min_delta=min_delta, + max_delta=max_delta, ) elif backend == "torchvision": ( @@ -557,7 +596,10 @@ def decode( frames_decoded = [frames_decoded] num_decoded = len(frames_decoded) clip_sizes = [ - np.maximum(1.0, sampling_rate[i] * num_frames[i] / target_fps * fps) + np.maximum( + 1.0, + sampling_rate[i] * num_frames[i] / target_fps * fps + ) for i in range(len(sampling_rate)) ] @@ -621,4 +663,4 @@ def decode( for i in range(num_decode) ) - return frames_out, start_end_delta_time, time_diff_aug + return frames_out, start_end_delta_time, time_diff_aug \ No newline at end of file diff --git a/slowfast/utils/misc.py b/slowfast/utils/misc.py index 48e1ffac8..e951bc717 100644 --- a/slowfast/utils/misc.py +++ b/slowfast/utils/misc.py @@ -412,6 +412,7 @@ def launch_job(cfg, init_method, func, daemon=False): daemonic processes will be created """ if cfg.NUM_GPUS > 1: + print("GPU Number is {}".format(cfg.NUM_GPUS)) torch.multiprocessing.spawn( mpu.run, nprocs=cfg.NUM_GPUS, @@ -427,6 +428,7 @@ def launch_job(cfg, init_method, func, daemon=False): daemon=daemon, ) else: + print("GPU Number is 0") func(cfg=cfg) diff --git a/tools/train_net.py b/tools/train_net.py index 49835f8d3..369a33bb5 100644 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -136,6 +136,7 @@ def train_epoch( preds, labels = model(inputs) else: preds = model(inputs) + print("cur iteration ", cur_iter) if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": labels = torch.zeros( preds.size(0), dtype=labels.dtype, device=labels.device @@ -271,12 +272,12 @@ def train_epoch( ) train_meter.iter_toc() # do measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) - torch.cuda.synchronize() + # torch.cuda.synchronize() train_meter.iter_tic() del inputs # in case of fragmented memory - torch.cuda.empty_cache() + # torch.cuda.empty_cache() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) From da5de7221f8ed23025e659895956fcf7092e3a20 Mon Sep 17 00:00:00 2001 From: leonardozcm Date: Fri, 16 Sep 2022 14:47:47 +0800 Subject: [PATCH 2/2] update --- orcaexample/kinetics.py | 32 ++++++++++++++++---------------- slowfast/utils/misc.py | 2 -- tools/train_net.py | 1 - 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/orcaexample/kinetics.py b/orcaexample/kinetics.py index 449867e6d..f747b451c 100644 --- a/orcaexample/kinetics.py +++ b/orcaexample/kinetics.py @@ -95,22 +95,22 @@ def loss_creator(config): ) val_stats = orca_estimator.evaluate(data=validation_data_creator(cfg,cfg.TEST.BATCH_SIZE)) print("===> Validation Complete: Top1Accuracy {}".format(val_stats["Accuracy"])) -# elif args.backend in ["ray", "spark"]: -# orca_estimator = Estimator.from_torch(model=model_creator, -# optimizer=optim_creator, -# loss=loss_creator, -# metrics=[Accuracy()], -# backend=args.backend, -# config=cfg, -# model_dir=os.getcwd(), -# use_tqdm=True) -# orca_estimator.fit(data=train_loader_creator, -# validation_data=validation_data_creator, -# batch_size=cfg.TRAIN.BATCH_SIZE, -# epochs=cfg.SOLVER.MAX_EPOCH) -# val_stats = orca_estimator.evaluate(data=validation_data_creator, batch_size=cfg.TEST.BATCH_SIZE) -# print("===> Validation Complete: Top1Accuracy {}".format(val_stats["Accuracy"])) -# orca_estimator.shutdown() +elif args.backend in ["ray", "spark"]: + orca_estimator = Estimator.from_torch(model=model_creator, + optimizer=optim_creator, + loss=loss_creator, + metrics=[Accuracy()], + backend=args.backend, + config=cfg, + model_dir=os.getcwd(), + use_tqdm=True) + orca_estimator.fit(data=train_loader_creator, + validation_data=validation_data_creator, + batch_size=cfg.TRAIN.BATCH_SIZE, + epochs=cfg.SOLVER.MAX_EPOCH) + val_stats = orca_estimator.evaluate(data=validation_data_creator, batch_size=cfg.TEST.BATCH_SIZE) + print("===> Validation Complete: Top1Accuracy {}".format(val_stats["Accuracy"])) + orca_estimator.shutdown() else: invalidInputError(False, "Only bigdl, ray, and spark are supported " "as the backend, but got {}".format(args.backend)) diff --git a/slowfast/utils/misc.py b/slowfast/utils/misc.py index e951bc717..48e1ffac8 100644 --- a/slowfast/utils/misc.py +++ b/slowfast/utils/misc.py @@ -412,7 +412,6 @@ def launch_job(cfg, init_method, func, daemon=False): daemonic processes will be created """ if cfg.NUM_GPUS > 1: - print("GPU Number is {}".format(cfg.NUM_GPUS)) torch.multiprocessing.spawn( mpu.run, nprocs=cfg.NUM_GPUS, @@ -428,7 +427,6 @@ def launch_job(cfg, init_method, func, daemon=False): daemon=daemon, ) else: - print("GPU Number is 0") func(cfg=cfg) diff --git a/tools/train_net.py b/tools/train_net.py index 034cacfc8..b8b05b61f 100644 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -136,7 +136,6 @@ def train_epoch( preds, labels = model(inputs) else: preds = model(inputs) - print("cur iteration ", cur_iter) if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": labels = torch.zeros( preds.size(0), dtype=labels.dtype, device=labels.device