|  | 
|  | 1 | +#include <ATen/ScalarOps.h> | 
|  | 2 | +#include <ATen/TensorIndexing.h> | 
|  | 3 | +#include <ATen/XPUNativeFunctions.h> | 
|  | 4 | +#include <ATen/core/Tensor.h> | 
|  | 5 | +#include <ATen/native/TensorCompare.h> | 
|  | 6 | +#include <ATen/native/TensorIterator.h> | 
|  | 7 | +#include <ATen/native/TypeProperties.h> | 
|  | 8 | +#include <aten/sycl/TensorCompare.h> | 
|  | 9 | + | 
|  | 10 | +namespace at { | 
|  | 11 | + | 
|  | 12 | +template <typename... Args> | 
|  | 13 | +Device out_device(Args&... inps) { | 
|  | 14 | +  for (const auto& i : {inps...}) { | 
|  | 15 | +    if (i.device() != at::kCPU) { | 
|  | 16 | +      return i.device(); | 
|  | 17 | +    } | 
|  | 18 | +  } | 
|  | 19 | +  return at::kCPU; | 
|  | 20 | +} | 
|  | 21 | + | 
|  | 22 | +Tensor& where_self_out( | 
|  | 23 | +    const Tensor& condition, | 
|  | 24 | +    const Tensor& self, | 
|  | 25 | +    const Tensor& other, | 
|  | 26 | +    Tensor& out) { | 
|  | 27 | +  const auto result_type = at::native::result_type(self, other); | 
|  | 28 | +  TORCH_CHECK( | 
|  | 29 | +      out.scalar_type() == result_type, | 
|  | 30 | +      "Expected out type to be ", | 
|  | 31 | +      result_type, | 
|  | 32 | +      " but got ", | 
|  | 33 | +      out.scalar_type()); | 
|  | 34 | + | 
|  | 35 | +  auto self_ = self.scalar_type() != result_type ? self.to(result_type) : self; | 
|  | 36 | +  auto other_ = | 
|  | 37 | +      other.scalar_type() != result_type ? other.to(result_type) : other; | 
|  | 38 | +  auto condition_ = condition; | 
|  | 39 | +  auto device = out_device(condition, self_, other_); | 
|  | 40 | +  if (device != at::kCPU) { // allow CPU scalars on non-cpu device | 
|  | 41 | +    if (condition.device() != device && condition.ndimension() == 0) { | 
|  | 42 | +      condition_ = condition.to(device); | 
|  | 43 | +    } | 
|  | 44 | +    if (self_.device() != device && self_.ndimension() == 0) { | 
|  | 45 | +      self_ = self_.to(device); | 
|  | 46 | +    } | 
|  | 47 | +    if (other_.device() != device && other_.ndimension() == 0) { | 
|  | 48 | +      other_ = other_.to(device); | 
|  | 49 | +    } | 
|  | 50 | +  } | 
|  | 51 | +  if (condition_.scalar_type() == ScalarType::Byte) { | 
|  | 52 | +    TORCH_WARN_ONCE( | 
|  | 53 | +        "where received a uint8 condition tensor. This behavior is deprecated and will be removed in a future version of PyTorch. Use a boolean condition instead."); | 
|  | 54 | +    condition_ = condition_.to(kBool); | 
|  | 55 | +  } | 
|  | 56 | +  TORCH_CHECK( | 
|  | 57 | +      condition_.scalar_type() == kBool, | 
|  | 58 | +      "where expected condition to be a boolean tensor, but got a tensor with dtype ", | 
|  | 59 | +      condition_.scalar_type()); | 
|  | 60 | +  // if there's still a device mismatch, let tensoriterator error out with it | 
|  | 61 | +  auto iter = at::TensorIteratorConfig() | 
|  | 62 | +                  .check_all_same_dtype(false) | 
|  | 63 | +                  .add_output(out) | 
|  | 64 | +                  .add_const_input(condition_) | 
|  | 65 | +                  .add_const_input(self_) | 
|  | 66 | +                  .add_const_input(other_) | 
|  | 67 | +                  .build(); | 
|  | 68 | +  native::xpu::where_kernel(iter); | 
|  | 69 | +  return out; | 
|  | 70 | +} | 
|  | 71 | + | 
|  | 72 | +Tensor& XPUNativeFunctions::where_out( | 
|  | 73 | +    const Tensor& condition, | 
|  | 74 | +    const Tensor& self, | 
|  | 75 | +    const Tensor& other, | 
|  | 76 | +    Tensor& out) { | 
|  | 77 | +  return where_self_out(condition, self, other, out); | 
|  | 78 | +} | 
|  | 79 | + | 
|  | 80 | +Tensor XPUNativeFunctions::where( | 
|  | 81 | +    const Tensor& condition, | 
|  | 82 | +    const Tensor& self, | 
|  | 83 | +    const Tensor& other) { | 
|  | 84 | +  auto device = out_device(condition, self, other); | 
|  | 85 | +  auto result_type = at::native::result_type(self, other); | 
|  | 86 | +  Tensor ret = at::empty({0}, self.options().dtype(result_type).device(device)); | 
|  | 87 | +  where_self_out(condition, self, other, ret); | 
|  | 88 | +  return ret; | 
|  | 89 | +} | 
|  | 90 | + | 
|  | 91 | +Tensor& XPUNativeFunctions::clamp_out( | 
|  | 92 | +    const Tensor& self, | 
|  | 93 | +    const c10::optional<Scalar>& min, | 
|  | 94 | +    const c10::optional<Scalar>& max, | 
|  | 95 | +    Tensor& result) { | 
|  | 96 | +  using at::native::detail::ClampLimits; | 
|  | 97 | +  if (min && max) { | 
|  | 98 | +    if ((*min).toDouble() != (*min).toDouble() || | 
|  | 99 | +        (*max).toDouble() != (*max).toDouble()) { | 
|  | 100 | +      at::fill_( | 
|  | 101 | +          const_cast<Tensor&>(result), | 
|  | 102 | +          std::numeric_limits<double>::quiet_NaN()); | 
|  | 103 | +    } else { | 
|  | 104 | +      auto iter = TensorIterator::unary_op(result, self); | 
|  | 105 | +      native::xpu::clamp_scalar_kernel(iter, *min, *max); | 
|  | 106 | +    } | 
|  | 107 | +  } else if (max) { | 
|  | 108 | +    auto iter = TensorIterator::unary_op(result, self); | 
|  | 109 | +    native::xpu::clamp_max_scalar_kernel(iter, *max); | 
|  | 110 | +  } else if (min) { | 
|  | 111 | +    auto iter = TensorIterator::unary_op(result, self); | 
|  | 112 | +    native::xpu::clamp_min_scalar_kernel(iter, *min); | 
|  | 113 | +  } | 
|  | 114 | +  return result; | 
|  | 115 | +} | 
|  | 116 | + | 
|  | 117 | +Tensor& XPUNativeFunctions::clamp_min_out( | 
|  | 118 | +    const Tensor& self, | 
|  | 119 | +    const Scalar& min, | 
|  | 120 | +    Tensor& result) { | 
|  | 121 | +  if (min.toDouble() != min.toDouble()) { | 
|  | 122 | +    at::fill_(const_cast<Tensor&>(result), min); | 
|  | 123 | +  } else { | 
|  | 124 | +    auto iter = TensorIterator::unary_op(result, self); | 
|  | 125 | +    native::xpu::clamp_min_scalar_kernel(iter, min); | 
|  | 126 | +  } | 
|  | 127 | +  return result; | 
|  | 128 | +} | 
|  | 129 | + | 
|  | 130 | +Tensor& XPUNativeFunctions::clamp_max_out( | 
|  | 131 | +    const Tensor& self, | 
|  | 132 | +    const Scalar& max, | 
|  | 133 | +    Tensor& result) { | 
|  | 134 | +  if (max.toDouble() != max.toDouble()) { | 
|  | 135 | +    // TODO this is not great, building TI again is expensive, but I can't use | 
|  | 136 | +    // fill_stub because fill is not structured | 
|  | 137 | +    // this is a corner case anyway | 
|  | 138 | +    at::fill_(const_cast<Tensor&>(result), native::wrapped_scalar_tensor(max)); | 
|  | 139 | +  } else { | 
|  | 140 | +    auto iter = TensorIterator::unary_op(result, self); | 
|  | 141 | +    native::xpu::clamp_max_scalar_kernel(iter, max); | 
|  | 142 | +  } | 
|  | 143 | +  return result; | 
|  | 144 | +} | 
|  | 145 | + | 
|  | 146 | +} // namespace at | 
0 commit comments