Windows raw string fix (#10998)

Summary:
Breaking this out of https://github.com/pytorch/pytorch/pull/8338

mingzhe09088's fix of the docstrings for Windows builds. Unfortunately some versions of Windows seem to try and parse the `#` inside the string as a pre-processor declaration. We might need to change this to something else later, but want to get this landed first.

cc mingzhe09088 Yangqing
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10998

Reviewed By: mingzhe09088

Differential Revision: D9557480

Pulled By: orionr

fbshipit-source-id: c6a6237c27b7cf35c81133fd9faefead675a9f59
diff --git a/caffe2/operators/concat_split_op.cc b/caffe2/operators/concat_split_op.cc
index a8f4c91..3125602 100644
--- a/caffe2/operators/concat_split_op.cc
+++ b/caffe2/operators/concat_split_op.cc
@@ -311,8 +311,8 @@
     axis=3
 )
 
-workspace.FeedBlob("X1", np.random.randint(10, size=(1, 1, 5, 5))) # NCHW
-workspace.FeedBlob("X2", np.random.randint(10, size=(1, 1, 5, 5))) # NCHW
+workspace.FeedBlob("X1", np.random.randint(10, size=(1, 1, 5, 5))) // NCHW
+workspace.FeedBlob("X2", np.random.randint(10, size=(1, 1, 5, 5))) // NCHW
 print("X1:", workspace.FetchBlob("X1"))
 print("X2:", workspace.FetchBlob("X2"))
 workspace.RunOperatorOnce(op)
diff --git a/caffe2/operators/conv_op.cc b/caffe2/operators/conv_op.cc
index 082c94f..30fb79d 100644
--- a/caffe2/operators/conv_op.cc
+++ b/caffe2/operators/conv_op.cc
@@ -42,24 +42,24 @@
     stride=2
 )
 
-# Create X: (N,C,H,W)
+// Create X: (N,C,H,W)
 data = np.random.randn(1,1,8,8).astype(np.float32)
 print("Data shape: ",data.shape)
 
-# Create W: (M,C,Kh,Kw)
+// Create W: (M,C,Kh,Kw)
 filters = np.random.randn(3,1,5,5).astype(np.float32)
 print("Filter shape: ",filters.shape)
 
-# Create b: M
+// Create b: M
 bias = np.array([1.,1.,1.]).astype(np.float32)
 print("Bias shape: ",bias.shape)
 
-# Put the inputs into the workspace
+// Put the inputs into the workspace
 workspace.FeedBlob("X", data)
 workspace.FeedBlob("filter", filters)
 workspace.FeedBlob("bias", bias)
 
-# Run the operator
+// Run the operator
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
 
diff --git a/caffe2/operators/conv_transpose_op.cc b/caffe2/operators/conv_transpose_op.cc
index 57ec02b..7de16af 100644
--- a/caffe2/operators/conv_transpose_op.cc
+++ b/caffe2/operators/conv_transpose_op.cc
@@ -44,24 +44,24 @@
     strides=[2,2]
 )
 
-# Create X: (N,C,H,W)
+// Create X: (N,C,H,W)
 data = np.random.randn(2,3,5,5).astype(np.float32)
 print("Data shape: ",data.shape)
 
-# Create filter: (M,C,Kh,Kw)
+// Create filter: (M,C,Kh,Kw)
 filters = np.random.randn(3,1,2,2).astype(np.float32)
 print("Filter shape: ",filters.shape)
 
-# Create b: M
+// Create b: M
 bias = np.array([1.]).astype(np.float32)
 print("Bias shape: ",bias.shape)
 
-# Put the inputs into the workspace
+// Put the inputs into the workspace
 workspace.FeedBlob("X", data)
 workspace.FeedBlob("filter", filters)
 workspace.FeedBlob("bias", bias)
 
-# Run the operator
+// Run the operator
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
 
diff --git a/caffe2/operators/counter_ops.cc b/caffe2/operators/counter_ops.cc
index 15cdab5..50e4b94 100644
--- a/caffe2/operators/counter_ops.cc
+++ b/caffe2/operators/counter_ops.cc
@@ -58,22 +58,22 @@
 )
 
 
-# Create counter
+// Create counter
 workspace.RunOperatorOnce(createcounter_op)
 print("'counter' pointer:", workspace.FetchBlob("counter"))
 
 
-# Retrieve initial counter value
+// Retrieve initial counter value
 workspace.RunOperatorOnce(retrievecount_op)
 print("Initial 'count':", workspace.FetchBlob("count"))
 
 
-# Check if counter is done
+// Check if counter is done
 workspace.RunOperatorOnce(checkcounterdone_op)
 print("Initial 'done' value:", workspace.FetchBlob("done"))
 
 
-# Test CountUp operator
+// Test CountUp operator
 print("\nTesting CountUp operator...")
 for i in range(5):
     workspace.RunOperatorOnce(countup_op)
@@ -83,7 +83,7 @@
 print("'count' value after CountUp test:", workspace.FetchBlob("count"))
 
 
-# Test CountDown operator
+// Test CountDown operator
 print("\nTesting CountDown operator...")
 for i in range(11):
     workspace.RunOperatorOnce(countdown_op)
diff --git a/caffe2/operators/cross_entropy_op.cc b/caffe2/operators/cross_entropy_op.cc
index 584b7ab..0473e7d4 100644
--- a/caffe2/operators/cross_entropy_op.cc
+++ b/caffe2/operators/cross_entropy_op.cc
@@ -401,22 +401,22 @@
     ["Y"]
 )
 
-# Create X: Sample softmax output for 5-class model
+// Create X: Sample softmax output for 5-class model
 X = np.array([[.01, .05, .02, .02, .9],[.03, .1, .42, .05, .4]])
 print("X:\n",X)
 
-# Create label: Sample 1-hot ground truth label vectors
+// Create label: Sample 1-hot ground truth label vectors
 label = np.array([4,2])
 print("label:\n",label)
 
-# Feed X & label into workspace
+// Feed X & label into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 workspace.FeedBlob("label", label.astype(np.int32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
@@ -635,22 +635,22 @@
     ["Y"]
 )
 
-# Create X: Sample softmax output for 5-class model
+// Create X: Sample softmax output for 5-class model
 X = np.array([[.01, .05, .02, .02, .9],[.03, .1, .42, .05, .4]])
 print("X:\n",X)
 
-# Create label: Sample 1-hot ground truth label vectors
+// Create label: Sample 1-hot ground truth label vectors
 label = np.array([[0.,0.,0.,0.,1.],[0.,0.,1.,0.,0.]])
 print("label:\n",label)
 
-# Feed X & label into workspace
+// Feed X & label into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 workspace.FeedBlob("label", label.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
diff --git a/caffe2/operators/distance_op.cc b/caffe2/operators/distance_op.cc
index d9abfa0..9a38a4a7 100644
--- a/caffe2/operators/distance_op.cc
+++ b/caffe2/operators/distance_op.cc
@@ -437,22 +437,22 @@
     ["Z"]
 )
 
-# Create X
+// Create X
 X = 5*np.ones((1, 4))
 print("X:\n",X)
 
-# Create Y
+// Create Y
 Y = np.ones((1, 4))
 print("Y:\n",Y)
 
-# Feed X & Y into workspace
+// Feed X & Y into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 workspace.FeedBlob("Y", Y.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Z:\n", workspace.FetchBlob("Z"))
 
 ```
@@ -645,22 +645,22 @@
     ["Z"]
 )
 
-# Create X
+// Create X
 X = np.random.randn(3, 3)
 print("X:\n",X)
 
-# Create Y
+// Create Y
 Y = np.random.randn(3, 3)
 print("Y:\n",Y)
 
-# Feed X & Y into workspace
+// Feed X & Y into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 workspace.FeedBlob("Y", Y.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Z:\n", workspace.FetchBlob("Z"))
 
 ```
diff --git a/caffe2/operators/elementwise_linear_op.cc b/caffe2/operators/elementwise_linear_op.cc
index d68bfbc..371aae7 100644
--- a/caffe2/operators/elementwise_linear_op.cc
+++ b/caffe2/operators/elementwise_linear_op.cc
@@ -112,28 +112,28 @@
     ["Y"]
 )
 
-# Create X
+// Create X
 X = np.array([[1,2,3,4,5],[6,8,9,16,10]])
 print("X:\n",X)
 
-# Create w
+// Create w
 w = np.array([1,1/2.,1/3.,1/4.,1/5.])
 print("w:\n",w)
 
-# Create b
+// Create b
 b = np.array([1.,1.,1.,1.,1.])
 print("b:\n",b)
 
 
-# Feed X & w & b into workspace
+// Feed X & w & b into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 workspace.FeedBlob("w", w.astype(np.float32))
 workspace.FeedBlob("b", b.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
diff --git a/caffe2/operators/elementwise_logical_ops.cc b/caffe2/operators/elementwise_logical_ops.cc
index 5ddd457..0e2da56 100644
--- a/caffe2/operators/elementwise_logical_ops.cc
+++ b/caffe2/operators/elementwise_logical_ops.cc
@@ -63,7 +63,7 @@
     value=[0,2,4,6,8],
 )
 
-# Use a not-empty tensor
+// Use a not-empty tensor
 workspace.FeedBlob("X", np.array([0,1,2,3,4,5,6,7,8]).astype(np.int32))
 print("X:\n", workspace.FetchBlob("X"))
 
@@ -75,7 +75,7 @@
 **Result**
 
 ```
-# value=[0,2,4,6,8]
+// value=[0,2,4,6,8]
 
 X:
  [0 1 2 3 4 5 6 7 8]
diff --git a/caffe2/operators/elementwise_sum_op.cc b/caffe2/operators/elementwise_sum_op.cc
index 861f4f1..dee3671 100644
--- a/caffe2/operators/elementwise_sum_op.cc
+++ b/caffe2/operators/elementwise_sum_op.cc
@@ -86,7 +86,7 @@
 op = core.CreateOperator(
     "Sum",
     ["A",  "B"],
-    ["A"],  # inplace
+    ["A"],  // inplace
 )
 
 workspace.FeedBlob("A", np.array([[1,2,5],[8,3,4]]).astype(np.float32))
diff --git a/caffe2/operators/filler_op.cc b/caffe2/operators/filler_op.cc
index ff3eac2..c5a121e 100644
--- a/caffe2/operators/filler_op.cc
+++ b/caffe2/operators/filler_op.cc
@@ -298,11 +298,11 @@
     input_as_shape=1
 )
 
-# Test arg-based op
+// Test arg-based op
 workspace.RunOperatorOnce(op_1)
 print("output (op_1):\n", workspace.FetchBlob("output"))
 
-# Test input-based op
+// Test input-based op
 workspace.ResetWorkspace()
 workspace.FeedBlob("shape", np.array([5,5]))
 workspace.FeedBlob("min", np.array(13.8, dtype=np.float32))
@@ -389,11 +389,11 @@
     input_as_shape=1
 )
 
-# Test arg-based op
+// Test arg-based op
 workspace.RunOperatorOnce(op_1)
 print("output (op_1):\n", workspace.FetchBlob("output"))
 
-# Test input-based op
+// Test input-based op
 workspace.ResetWorkspace()
 workspace.FeedBlob("shape", np.array([5,5]))
 workspace.FeedBlob("min", np.array(13, dtype=np.int32))
diff --git a/caffe2/operators/fully_connected_op.cc b/caffe2/operators/fully_connected_op.cc
index 6fe95ee..e14fec6 100644
--- a/caffe2/operators/fully_connected_op.cc
+++ b/caffe2/operators/fully_connected_op.cc
@@ -182,9 +182,9 @@
 
 ```
 
-# In this example, our batch size is 1 (M=1), the input observation will have
-#   6 features (K=6), and the layer will have one hidden node (N=1). The
-#   expected output is Y=7.
+// In this example, our batch size is 1 (M=1), the input observation will have
+//   6 features (K=6), and the layer will have one hidden node (N=1). The
+//   expected output is Y=7.
 workspace.ResetWorkspace()
 
 op = core.CreateOperator(
@@ -193,23 +193,23 @@
     ["Y"]
 )
 
-# Create X: MxK
+// Create X: MxK
 data = np.array([1,2,3,4,5,6]).astype(np.float32)
 data = data[np.newaxis,:]
 
-# Create W: NxK
+// Create W: NxK
 weights = np.array(np.array([1,1/2.,1/3.,1/4.,1/5.,1/6.])).astype(np.float32)
 weights = weights[np.newaxis,:]
 
-# Create b: N
+// Create b: N
 bias = np.array([1.]).astype(np.float32)
 
-# Put the inputs into the workspace
+// Put the inputs into the workspace
 workspace.FeedBlob("X", data)
 workspace.FeedBlob("W", weights)
 workspace.FeedBlob("b", bias)
 
-# Run the operator
+// Run the operator
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
 
diff --git a/caffe2/operators/gather_op.cc b/caffe2/operators/gather_op.cc
index cee268d..34c42bf 100644
--- a/caffe2/operators/gather_op.cc
+++ b/caffe2/operators/gather_op.cc
@@ -37,7 +37,7 @@
 inds = np.array([[0, 1],[1, 2]])
 print("INDICES:\n",inds)
 
-# Feed X into workspace
+// Feed X into workspace
 workspace.FeedBlob("DATA", data.astype(np.float32))
 workspace.FeedBlob("INDICES", inds.astype(np.int32))
 
diff --git a/caffe2/operators/local_response_normalization_op.cc b/caffe2/operators/local_response_normalization_op.cc
index 1cba60e..81499b4 100644
--- a/caffe2/operators/local_response_normalization_op.cc
+++ b/caffe2/operators/local_response_normalization_op.cc
@@ -342,7 +342,7 @@
      order="NHWC"
 )
 
-workspace.FeedBlob("X", np.random.randn(1, 6, 6, 1).astype(np.float32)) # NCHW
+workspace.FeedBlob("X", np.random.randn(1, 6, 6, 1).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
diff --git a/caffe2/operators/lp_pool_op.cc b/caffe2/operators/lp_pool_op.cc
index f877786..f39aaaa 100644
--- a/caffe2/operators/lp_pool_op.cc
+++ b/caffe2/operators/lp_pool_op.cc
@@ -258,7 +258,7 @@
     p=2.0
 )
 
-workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) # NCHW
+workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
diff --git a/caffe2/operators/lpnorm_op.cc b/caffe2/operators/lpnorm_op.cc
index 6af404d..79c35cd 100644
--- a/caffe2/operators/lpnorm_op.cc
+++ b/caffe2/operators/lpnorm_op.cc
@@ -100,7 +100,7 @@
 X = np.array([5., 2.])
 print("X:\n",X)
 
-# Feed X into workspace
+// Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 
 workspace.RunOperatorOnce(op)
diff --git a/caffe2/operators/pool_op.cc b/caffe2/operators/pool_op.cc
index eca7978..87d67b1 100644
--- a/caffe2/operators/pool_op.cc
+++ b/caffe2/operators/pool_op.cc
@@ -764,7 +764,7 @@
     stride=2,
 )
 
-workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) # NCHW
+workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
@@ -832,7 +832,7 @@
     stride=2,
 )
 
-workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) # NCHW
+workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))
diff --git a/caffe2/operators/reduction_ops.cc b/caffe2/operators/reduction_ops.cc
index 0d01d50..95f15b5 100644
--- a/caffe2/operators/reduction_ops.cc
+++ b/caffe2/operators/reduction_ops.cc
@@ -139,17 +139,17 @@
     ["Y"]
 )
 
-# Create X, simulating a batch of 2, 4x4 matricies
+// Create X, simulating a batch of 2, 4x4 matricies
 X = np.random.randint(0,high=20,size=(2,4,4))
 print("X:\n",X)
 
-# Feed X into workspace
+// Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
@@ -226,17 +226,17 @@
     ["Y"]
 )
 
-# Create X, simulating a batch of 2, 4x4 matricies
+// Create X, simulating a batch of 2, 4x4 matricies
 X = np.random.randint(0,high=20,size=(2,4,4))
 print("X:\n",X)
 
-# Feed X into workspace
+// Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.float32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
diff --git a/caffe2/operators/relu_op.cc b/caffe2/operators/relu_op.cc
index 0320524..0f1abd8 100644
--- a/caffe2/operators/relu_op.cc
+++ b/caffe2/operators/relu_op.cc
@@ -105,7 +105,7 @@
   ["Y"]
   )
 
-workspace.FeedBlob("X", np.random.randn(4, 4).astype(np.float32)) # NCHW
+workspace.FeedBlob("X", np.random.randn(4, 4).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 
 workspace.RunOperatorOnce(op)
diff --git a/caffe2/operators/sparse_to_dense_mask_op.cc b/caffe2/operators/sparse_to_dense_mask_op.cc
index bea0b43..d968112 100644
--- a/caffe2/operators/sparse_to_dense_mask_op.cc
+++ b/caffe2/operators/sparse_to_dense_mask_op.cc
@@ -48,8 +48,8 @@
 corresponds to each id provided in mask argument. Missing values are filled with
 the value of `default_value`. After running this op:
 
-  output[j, :] = values[i] # where mask[j] == indices[i]
-  output[j, ...] = default_value # when mask[j] doesn't appear in indices
+  output[j, :] = values[i] // where mask[j] == indices[i]
+  output[j, ...] = default_value // when mask[j] doesn't appear in indices
 
 If `lengths` is provided and not empty, and extra "batch" dimension is prepended
 to the output.
diff --git a/caffe2/operators/sparse_to_dense_op.cc b/caffe2/operators/sparse_to_dense_op.cc
index 4f6a497..0c9519e 100644
--- a/caffe2/operators/sparse_to_dense_op.cc
+++ b/caffe2/operators/sparse_to_dense_op.cc
@@ -23,7 +23,7 @@
 
 After running this op:
 
-  output[indices[i], :] += values[i]  # sum over all indices[i] equal to the index
+  output[indices[i], :] += values[i]  // sum over all indices[i] equal to the index
   output[j, ...] = 0 if j not in indices
 )DOC")
     .Input(0, "indices", "1-D int32/int64 tensor of concatenated ids of data")
diff --git a/caffe2/operators/stats_ops.cc b/caffe2/operators/stats_ops.cc
index 508dd1a..d07f9ca 100644
--- a/caffe2/operators/stats_ops.cc
+++ b/caffe2/operators/stats_ops.cc
@@ -290,7 +290,7 @@
     ["nanos"]
 )
 
-# Test TimerBegin/TimerGet/TimerEnd
+// Test TimerBegin/TimerGet/TimerEnd
 workspace.RunOperatorOnce(timerbegin_op)
 print("timer:", workspace.FetchBlob("timer"))
 workspace.RunOperatorOnce(timerget_op)
@@ -298,7 +298,7 @@
 workspace.RunOperatorOnce(timerend_op)
 
 
-# Test TimerBegin/TimerGetAndEnd
+// Test TimerBegin/TimerGetAndEnd
 workspace.RunOperatorOnce(timerbegin_op)
 print("timer:", workspace.FetchBlob("timer"))
 workspace.RunOperatorOnce(timergetandend_op)
diff --git a/caffe2/operators/utility_ops.cc b/caffe2/operators/utility_ops.cc
index cc7c037..eb77197 100644
--- a/caffe2/operators/utility_ops.cc
+++ b/caffe2/operators/utility_ops.cc
@@ -103,17 +103,17 @@
     ["Y"]
 )
 
-# Create X: Sample softmax output for 5-class model
+// Create X: Sample softmax output for 5-class model
 X = np.array([2,2,2,2,2,2,2,2,2,2])
 print("X:\n",X)
 
-# Feed X into workspace
+// Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.int32))
 
-# Run op
+// Run op
 workspace.RunOperatorOnce(op)
 
-# Collect Output
+// Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))
 
 ```
@@ -508,14 +508,14 @@
     ["has_elements"],
 )
 
-# Use a not-empty tensor
+// Use a not-empty tensor
 workspace.FeedBlob("tensor", np.random.randn(2, 2).astype(np.float32))
 print("tensor:\n", workspace.FetchBlob("tensor"))
 
 workspace.RunOperatorOnce(op)
 print("has_elements: ", workspace.FetchBlob("has_elements"),"\n")
 
-# Use an empty tensor
+// Use an empty tensor
 workspace.FeedBlob("tensor", np.empty(0))
 print("tensor:\n", workspace.FetchBlob("tensor"))