Always use generator version of map for Variable iteration.

In Python 2, the non-generator map will always perform the indexing
even when it is not used in the end.  Using the generator can let
us avoid indexing when it is not used.

As an added bonus, it makes the ordering of operations deterministic
between Python 2 and Python 3 in LSTM.

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
diff --git a/torch/autograd/variable.py b/torch/autograd/variable.py
index a2439f8..8ad0e3a 100644
--- a/torch/autograd/variable.py
+++ b/torch/autograd/variable.py
@@ -6,6 +6,16 @@
 import torch.utils.hooks as hooks
 import warnings
 import weakref
+import itertools
+
+
+# A portable way of referring to the generator version of map
+# in both Python 2 and Python 3.
+# TODO: Move this into an appropriate utility library.
+if hasattr(itertools, 'imap'):
+    imap = itertools.imap
+else:
+    imap = map
 
 
 class Variable(_C._VariableBase):
@@ -868,7 +878,13 @@
         return len(self.data)
 
     def __iter__(self):
-        return iter(map(lambda i: self[i], range(self.size(0))))
+        # NB: we use 'imap' and not 'map' here, so that in Python 2 we get a
+        # generator and don't eagerly perform all the indexes.  This could
+        # save us work, and also helps keep trace ordering deterministic
+        # (e.g., if you zip(*hiddens), the eager map will force all the
+        # indexes of hiddens[0] before hiddens[1], while the generator
+        # map will interleave them.)
+        return iter(imap(lambda i: self[i], range(self.size(0))))
 
     def __mod__(self, other):
         return self.remainder(other)