fix bug in dependency inference for RNNExecutor

Summary: RNN executor did not consider race condition -type of dependency where an op A reads blob X and following op writes blob X. This happened in beam search with a inplace-reshape following FC op.

Reviewed By: jamesr66a

Differential Revision: D5792018

fbshipit-source-id: a5590d80e1b7b127abcdf2b1c2854ea56018e12f
diff --git a/caffe2/operators/recurrent_network_executor.cc b/caffe2/operators/recurrent_network_executor.cc
index 40fb097..b287b7a 100644
--- a/caffe2/operators/recurrent_network_executor.cc
+++ b/caffe2/operators/recurrent_network_executor.cc
@@ -141,7 +141,9 @@
     } catch (::caffe2::EnforceNotMet& enf) {
       std::unique_lock<std::mutex> lk(countdown_mtx_);
       LOG(ERROR) << "Crash at thread " << id << " timestep " << job.timestep
-                 << " " << enf.what();
+                 << " op:"
+                 << ProtoDebugString(step_net_def_.op(job.op_idx))
+                 << enf.what();
       job_queue_.NoMoreJobs();
       failed_ = true;
       cv_.notify_one();
diff --git a/caffe2/operators/recurrent_network_executor.h b/caffe2/operators/recurrent_network_executor.h
index 55a6f33..0236302 100644
--- a/caffe2/operators/recurrent_network_executor.h
+++ b/caffe2/operators/recurrent_network_executor.h
@@ -166,12 +166,18 @@
    * Add dependencies to ops in the next timestep that would write an op
    * that this op has as an input or output. This is special for RNNs,
    * since we can have ops running in different timesteps concurrently.
+   * Also, we need to check ops that output a blob that is input of
+   * of the op in question.
    */
   void add_race_conflict_dependencies(
       int opidx,
       std::vector<RNNNetOperator>& rnn_ops,
       std::set<int>* dep_ops) {
-    for (int i = 0; i < opidx; i++) {
+
+    for (int i = 0; i < rnn_ops.size(); i++) {
+      if (i == opidx) {
+        continue;
+      }
       if (rnn_ops[i].link_op && this->ignoreLinkDependencies()) {
         continue;
       }
@@ -182,10 +188,12 @@
             break;
           }
         }
-        for (auto& outp : step_net_def_.op(opidx).output()) {
-          if (outp == dep_blob) {
-            dep_ops->insert(i);
-            break;
+        if (i < opidx) {
+          for (auto& outp : step_net_def_.op(opidx).output()) {
+            if (outp == dep_blob) {
+              dep_ops->insert(i);
+              break;
+            }
           }
         }
       }
@@ -231,11 +239,13 @@
             timestep_ops_template_,
             &dependent_ops);
 
+        // Race conditions arise when operator writes a blob that is
+        // being read by another.
         if (!this->ignoreLinkDependencies()) {
-          // Race conditions arise when link ops can be run out-of-order.
           add_race_conflict_dependencies(
-              rnn_op.order, timestep_ops_template_, &dependent_ops);
+            rnn_op.order, timestep_ops_template_, &dependent_ops);
         }
+
         for (int i : dependent_ops) {
           rnn_op.dependencies.push_back(i);
         }