[IDEEP] [fix bug] Fix bug in ideep SkipOutputCopy strategy (#8372)
* fix a bug for SkipIndices
* IDEEP bug, revise the output to CPUTensor in SkipOutputCopy strategy
* [IDEEP] Add IDEEP fallbacks for Style-Transfer ops
diff --git a/caffe2/core/common.h b/caffe2/core/common.h
index c30d502..8d75628 100644
--- a/caffe2/core/common.h
+++ b/caffe2/core/common.h
@@ -258,7 +258,7 @@
}
template <int First, int Second, int... Rest>
static inline bool ContainsInternal(const int i) {
- return (i == First) && ContainsInternal<Second, Rest...>(i);
+ return (i == First) || ContainsInternal<Second, Rest...>(i);
}
public:
diff --git a/caffe2/ideep/operators/operator_fallback_ideep.cc b/caffe2/ideep/operators/operator_fallback_ideep.cc
index d79557d..7491601 100644
--- a/caffe2/ideep/operators/operator_fallback_ideep.cc
+++ b/caffe2/ideep/operators/operator_fallback_ideep.cc
@@ -15,6 +15,8 @@
#include <caffe2/operators/given_tensor_fill_op.h>
#include <caffe2/operators/load_save_op.h>
#include <caffe2/operators/loss_op.h>
+#include <caffe2/operators/pad_op.h>
+#include <caffe2/operators/prelu_op.h>
#include <caffe2/operators/reshape_op.h>
#include <caffe2/operators/roi_align_op.h>
#include <caffe2/operators/softmax_op.h>
@@ -94,9 +96,16 @@
IDEEPFallbackOp<CollectAndDistributeFpnRpnProposalsOp<CPUContext>>);
REGISTER_IDEEP_OPERATOR(
BoxWithNMSLimit,
- IDEEPFallbackOp<BoxWithNMSLimitOp<CPUContext>>);
+ IDEEPFallbackOp<BoxWithNMSLimitOp<CPUContext>, SkipIndices<0,1,2>>);
REGISTER_IDEEP_OPERATOR(
BBoxTransform,
IDEEPFallbackOp<BBoxTransformOp<float, CPUContext>>);
+REGISTER_IDEEP_OPERATOR(
+ PadImage,
+ IDEEPFallbackOp<PadImageOp<float, CPUContext>>);
+REGISTER_IDEEP_OPERATOR(
+ PRelu,
+ IDEEPFallbackOp<PReluOp<float, CPUContext>>);
+
} // namespace caffe2
diff --git a/caffe2/ideep/operators/operator_fallback_ideep.h b/caffe2/ideep/operators/operator_fallback_ideep.h
index 6c428e7..97bc8d1 100644
--- a/caffe2/ideep/operators/operator_fallback_ideep.h
+++ b/caffe2/ideep/operators/operator_fallback_ideep.h
@@ -52,11 +52,14 @@
// Create output blobs in parent workspace,
// then forward output blobs to local workspace.
std::unordered_map<string, string> forwarded_output_blobs;
- for (const string& name : base_def_.output()) {
- string parent_name(name + "_cpu_output_blob_" + base_def_.type());
+ for (int i = 0; i < base_def_.output_size(); i++) {
+ string parent_name(base_def_.output(i));
+ if (!SkipOutputCopy::Contains(i)) {
+ parent_name += "_cpu_output_blob_" + base_def_.type();
+ }
local_output_blobs_.push_back(ws->CreateBlob(parent_name));
CHECK_NOTNULL(local_output_blobs_.back());
- forwarded_output_blobs[name] = parent_name;
+ forwarded_output_blobs[base_def_.output(i)] = parent_name;
}
local_ws_.reset(new Workspace(ws, forwarded_output_blobs));
// Set up the symbols for the local workspace.
diff --git a/caffe2/operators/stylizer_ops.cc b/caffe2/operators/stylizer_ops.cc
index 80dcaaa..ca4a762 100644
--- a/caffe2/operators/stylizer_ops.cc
+++ b/caffe2/operators/stylizer_ops.cc
@@ -2,6 +2,11 @@
#include "caffe2/utils/cpu_neon.h"
#include "caffe2/utils/math.h"
+#ifdef CAFFE2_USE_IDEEP
+#include <caffe2/ideep/operators/operator_fallback_ideep.h>
+#include <caffe2/ideep/utils/ideep_operator.h>
+#endif
+
namespace caffe2 {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
@@ -580,5 +585,14 @@
OPERATOR_SCHEMA(BRGNCHWCToPackedInt8BGRAStylizerDeprocess)
.NumInputs(2)
.NumOutputs(1);
+
+#ifdef CAFFE2_USE_IDEEP
+REGISTER_IDEEP_OPERATOR(
+ BRGNCHWCToPackedInt8BGRAStylizerDeprocess,
+ IDEEPFallbackOp<BRGNCHWCToPackedInt8BGRAStylizerDeprocessOp, SkipIndices<0>>);
+REGISTER_IDEEP_OPERATOR(
+ PackedInt8BGRANHWCToNCHWCStylizerPreprocess,
+ IDEEPFallbackOp<PackedInt8BGRANHWCToNCHWCStylizerPreprocessOp>);
+#endif
} // namespace
} // namespace caffe2