-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Relay, TOPI] Add negative log likelihood loss (nll_loss) op #8056
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a2f641c
1053a2c
d8f111b
4b717d6
47d9d04
ca255b2
b7b9865
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| #include <tvm/tir/expr.h> | ||
| #include <tvm/tir/op.h> | ||
| #include <tvm/topi/detail/constant_utils.h> | ||
| #include <tvm/topi/reduction.h> | ||
| #include <tvm/topi/tags.h> | ||
| #include <tvm/topi/transform.h> | ||
|
|
||
|
|
@@ -642,6 +643,53 @@ inline tvm::te::Tensor batch_to_space_nd(const tvm::te::Tensor& data, | |
| out = strided_slice(out, begin_idx, end_idx, strides); | ||
| return out; | ||
| } | ||
|
|
||
| /*! | ||
| * \brief Negative log likelihood loss. | ||
| * | ||
| * \param predictions The prediction tensor. | ||
| * \param targets The target tensor. | ||
| * \param weights A manual rescaling weight given to each class. | ||
| * \param reduction The reduction method to apply to the output. | ||
| * \param ignore_index The target value to ignore. | ||
| * \param name The name of the operation. | ||
| * \param tag The tag to mark the operation. | ||
| * | ||
| * \return The negative log likelihood loss of the predictions and targets. | ||
| */ | ||
| inline Tensor nll_loss(const Tensor& predictions, const Tensor& targets, const Tensor& weights, | ||
| std::string reduction = "mean", int ignore_index = -100, | ||
| const std::string name = "nll_loss", const std::string tag = kBroadcast) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should the tag be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @altanh I am confused with the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, that is confusing.. I'll get back to you on this soon
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't have much of an update for the tag, maybe you could try leaving it empty string?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tag here is topi-level, sometimes we use it to identify a specific |
||
| auto T = tvm::te::compute( | ||
| targets->shape, | ||
| [&](const tvm::Array<tvm::tir::Var>& target_indices) { | ||
| auto c = targets(target_indices); | ||
| tvm::Array<tvm::PrimExpr> pred_indices; | ||
| pred_indices.push_back(target_indices[0]); // batch index | ||
| pred_indices.push_back(c); // class index | ||
| for (size_t i = 1; i < target_indices.size(); i++) { | ||
| pred_indices.push_back(target_indices[i]); // indices for multidimensional loss | ||
| } | ||
| return tvm::tir::Select(c != ignore_index, -predictions(pred_indices) * weights(c), | ||
| tvm::tir::make_const(predictions->dtype, 0)); | ||
| }, | ||
| name, tag); | ||
| if (reduction == "mean") { | ||
| auto W = tvm::te::compute( | ||
| targets->shape, | ||
| [&](const tvm::Array<tvm::tir::Var>& target_indices) { | ||
| auto c = targets(target_indices); | ||
| return tvm::tir::Select(c != ignore_index, weights(c), | ||
| tvm::tir::make_const(predictions->dtype, 0)); | ||
| }, | ||
| name, tag); | ||
| return topi::divide(topi::sum(T, {}), topi::sum(W, {})); | ||
| } else if (reduction == "sum") { | ||
| return topi::sum(T, {}); | ||
| } else { // reduction == "none" | ||
| return T; | ||
| } | ||
| } | ||
| } // namespace topi | ||
| } // namespace tvm | ||
| #endif // TVM_TOPI_NN_H_ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2973,6 +2973,42 @@ def cross_entropy_with_logits(predictions, targets): | |
| return _make.cross_entropy_with_logits(predictions, targets) | ||
|
|
||
|
|
||
| def nll_loss(predictions, targets, weights, reduction="mean", ignore_index=-100): | ||
| """Negative log likelihood loss. | ||
zhuzilin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| output{n, i_1, i_2, ..., i_k} = -p * w | ||
| where t = target{n, i_1, i_2, ..., i_k} | ||
| p = predictions{n, t, i_1, i_2, i_k} | ||
| w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0 | ||
|
|
||
| result = reduction(output) | ||
|
|
||
| Parameters | ||
| ---------- | ||
| predictions : tvm.relay.Expr | ||
| The predictions. | ||
|
|
||
| targets : tvm.relay.Expr | ||
| The target value of each prediction. | ||
|
|
||
| weights : tvm.relay.Expr | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we make weights optional, like PyTorch? weights=1 is a pretty common case I believe and we could add a fast path implementation that skips the scaling
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @altanh We can make weights an optional parameter. I wonder if there are any example of a relay op with an optional tensor parameter that I can learn from. And also, how should we deal with gradient of an optional parameter? BTW, is there any better way we can mark a parameter as "no need for gradient" instead of returning an
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm not sure, that's a good point- let's just keep the weights for now. As for not needing a gradient, currently there is no other way than just putting some dummy value. It might make sense for us to introduce a |
||
| The weight of each target value. | ||
|
|
||
| reduction : string | ||
| The reduction method to apply to the output. | ||
zhuzilin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Possible values are "mean", "sum" and "none". | ||
|
|
||
| ignore_index : int | ||
| The target value to ignore. | ||
|
|
||
| Returns | ||
| ------- | ||
| result : tvm.relay.Expr | ||
| The computed result. | ||
| """ | ||
| return _make.nll_loss(predictions, targets, weights, reduction, ignore_index) | ||
|
|
||
|
|
||
| def depth_to_space(data, block_size, layout="NCHW", mode="DCR"): | ||
| """Convert channels into spatial blocks. | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
| # pylint: disable=invalid-name,unused-argument | ||
| """Loss functions definitions.""" | ||
| from __future__ import absolute_import | ||
| from . import cpp | ||
|
|
||
|
|
||
| def nll_loss(predictions, targets, weights, reduction, ignore_index): | ||
| """Negative log likelihood loss on the input data. | ||
|
|
||
| output{n, i_1, i_2, ..., i_k} = -p * w | ||
| where t = target{n, i_1, i_2, ..., i_k} | ||
| p = predictions{n, t, i_1, i_2, i_k} | ||
| w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0 | ||
|
|
||
| result = reduction(output) | ||
|
|
||
| Parameters | ||
| ---------- | ||
| predictions : tvm.te.Tensor | ||
| (k+2)-D with shape (N, C, d_1, d_2, ..., d_k), | ||
| where C is the number of target classes | ||
|
|
||
| targets : tvm.te.Tensor | ||
| (k+1)-D with shape (N, d_1, d_2, ..., d_k) | ||
| The target value of the input. | ||
|
|
||
| weights : tvm.te.Tensor | ||
| 1-D with shape (C,) | ||
| The weight of each target value. | ||
|
|
||
| reduction : string | ||
| The reduction method to apply to output. | ||
| Can be "mean", "sum" or "none". | ||
|
|
||
| ignore_index : int | ||
| The target value to ignore. | ||
|
|
||
| Returns | ||
| ------- | ||
| output : tvm.te.Tensor | ||
| a scalar if the reduction type is "mean" or "sum", | ||
| otherwise the same shape as `target`. | ||
| """ | ||
| return cpp.nn.nll_loss(predictions, targets, weights, reduction, ignore_index) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
| # pylint: disable=invalid-name | ||
| """NLLLoss in python""" | ||
| import numpy as np | ||
|
|
||
|
|
||
| def nll_loss(predictions, targets, weights, reduction="mean", ignore_index=-100): | ||
| """nll_loss operator implemented in numpy. | ||
|
|
||
| output{n, i_1, i_2, ..., i_k} = -p * w | ||
| where t = target{n, i_1, i_2, ..., i_k} | ||
| p = predictions{n, t, i_1, i_2, i_k} | ||
| w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0 | ||
|
|
||
| result = reduction(output) | ||
|
|
||
| Parameters | ||
| ---------- | ||
| predictions : numpy.ndarray | ||
| (k+2)-D with shape (N, C, d_1, d_2, ..., d_k), | ||
| where C is the number of target classes | ||
|
|
||
| targets : numpy.ndarray | ||
| (k+1)-D with shape (N, d_1, d_2, ..., d_k) | ||
| The target value of the input. | ||
|
|
||
| weights : numpy.ndarray | ||
| 1-D with shape (C,) | ||
| The weight of each target value. | ||
|
|
||
| reduction : string | ||
| The reduction method to apply to output. | ||
| Can be "mean", "sum" or "none". | ||
|
|
||
| ignore_index : int | ||
| The target value to ignore. | ||
|
|
||
| Returns | ||
| ------- | ||
| output : numpy.ndarray | ||
| a scalar if the reduction type is "mean" or "sum", | ||
| otherwise the same shape as `target`. | ||
| """ | ||
| res = np.zeros(targets.shape) | ||
| weight_sum = 0.0 | ||
| for index in np.ndindex(targets.shape): | ||
| class_id = targets[index] | ||
| if class_id != ignore_index: | ||
| index_list = list(index) | ||
| pred_index = tuple(index_list[:1] + [class_id] + index_list[1:]) | ||
| res[index] = -predictions[pred_index] * weights[class_id] | ||
| weight_sum += weights[class_id] | ||
| if reduction == "mean": | ||
| return np.sum(res) / weight_sum | ||
| if reduction == "sum": | ||
| return np.sum(res) | ||
| return res |
Uh oh!
There was an error while loading. Please reload this page.