Update perf test baseline with every master commit (#5605)

* Update perf test baseline with every master commit

* Get perf test data from repo for local runs
diff --git a/.jenkins/perf_test/compare_with_baseline.py b/.jenkins/perf_test/compare_with_baseline.py
index 810f28d..e55987c 100644
--- a/.jenkins/perf_test/compare_with_baseline.py
+++ b/.jenkins/perf_test/compare_with_baseline.py
@@ -1,20 +1,36 @@
 import sys
 import json
 import numpy
-from scipy import stats
+import argparse
 
-with open('../perf_test_numbers.json') as data_file:
+parser = argparse.ArgumentParser()
+parser.add_argument('--test-name', dest='test_name', action='store',
+                    required=True, help='test name')
+parser.add_argument('--sample-stats', dest='sample_stats', action='store',
+                    required=True, help='stats from sample')
+parser.add_argument('--update', action='store_true',
+                    help='whether to update baseline using stats from sample')
+args = parser.parse_args()
+
+test_name = args.test_name
+
+if 'cpu' in test_name:
+    backend = 'cpu'
+elif 'gpu' in test_name:
+    backend = 'gpu'
+
+data_file_path = '../perf_test_numbers_{}.json'.format(backend)
+
+with open(data_file_path) as data_file:
     data = json.load(data_file)
 
-test_name = sys.argv[1]
-
 mean = float(data[test_name]['mean'])
 sigma = float(data[test_name]['sigma'])
 
 print("population mean: ", mean)
 print("population sigma: ", sigma)
 
-sample_stats_data = json.loads(sys.argv[2])
+sample_stats_data = json.loads(args.sample_stats)
 
 sample_mean = sample_stats_data['mean']
 sample_sigma = sample_stats_data['sigma']
@@ -33,3 +49,12 @@
 ''')
 else:
     print("z-value < 2, no perf regression detected.")
+    if args.update:
+        print("We will use these numbers as new baseline.")
+        new_data_file_path = '../new_perf_test_numbers_{}.json'.format(backend)
+        with open(new_data_file_path) as new_data_file:
+            new_data = json.load(new_data_file)
+        new_data[test_name]['mean'] = sample_mean
+        new_data[test_name]['sigma'] = sample_sigma
+        with open(new_data_file_path, 'w') as new_data_file:
+            json.dump(new_data, new_data_file, indent=4)
diff --git a/.jenkins/perf_test/perf_test_numbers.json b/.jenkins/perf_test/perf_test_numbers.json
deleted file mode 100644
index e4903a0..0000000
--- a/.jenkins/perf_test/perf_test_numbers.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-	"commit": "492466f25fdf12b62f2043587130e9ee393e4b1c",
-
-	"cpu-test-base-machine": "Mac Mini with 2.3 GHz i7 processors, 16GB RAM, 250GB SSD",
-	"cpu-test-docker-settings": "2 cores, 4GB RAM",
-
-	"test_cpu_speed_mini_sequence_labeler": {
-		"mean": "6.75325",
-		"sigma": "0.62155"
-	},
-
-	"test_cpu_speed_mnist": {
-		"mean": "30.86437",
-		"sigma": "2.0945721121747041"
-	},
-
-	"gpu-test-base-machine": "g3.8xlarge",
-
-	"test_gpu_speed_mnist": {
-		"mean": "11.66185",
-		"sigma": "0.29402"
-	},
-
-	"test_gpu_speed_word_language_model": {
-		"mean": "6.0089",
-		"sigma": "0.015459301407243404"
-	},
-
-	"test_gpu_speed_cudnn_lstm": {
-		"mean": "4.98804",
-		"sigma": "0.21906"
-	},
-
-	"test_gpu_speed_lstm": {
-		"mean": "5.27968",
-		"sigma": "0.13834"
-	},
-
-	"test_gpu_speed_mlstm": {
-		"mean": "4.11562",
-		"sigma": "0.0722"
-	}
-}
diff --git a/.jenkins/perf_test/test_cpu_speed_mini_sequence_labeler.sh b/.jenkins/perf_test/test_cpu_speed_mini_sequence_labeler.sh
index 07addd4..d563580 100644
--- a/.jenkins/perf_test/test_cpu_speed_mini_sequence_labeler.sh
+++ b/.jenkins/perf_test/test_cpu_speed_mini_sequence_labeler.sh
@@ -15,7 +15,7 @@
   cd scripts/mini_sequence_labeler
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python main.py")
@@ -28,8 +28,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_cpu_speed_mnist.sh b/.jenkins/perf_test/test_cpu_speed_mnist.sh
index 890fd4d..f3c3c09 100644
--- a/.jenkins/perf_test/test_cpu_speed_mnist.sh
+++ b/.jenkins/perf_test/test_cpu_speed_mnist.sh
@@ -16,7 +16,7 @@
   python main.py --epochs 0
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python main.py --epochs 1 --no-log")
@@ -30,8 +30,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_gpu_speed_cudnn_lstm.sh b/.jenkins/perf_test/test_gpu_speed_cudnn_lstm.sh
index 10ad506..95cbb2b 100644
--- a/.jenkins/perf_test/test_gpu_speed_cudnn_lstm.sh
+++ b/.jenkins/perf_test/test_gpu_speed_cudnn_lstm.sh
@@ -11,7 +11,7 @@
   cd benchmark/scripts/
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python cudnn_lstm.py --skip-cpu-governor-check")
@@ -25,8 +25,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_gpu_speed_lstm.sh b/.jenkins/perf_test/test_gpu_speed_lstm.sh
index 48a667c..4364760 100644
--- a/.jenkins/perf_test/test_gpu_speed_lstm.sh
+++ b/.jenkins/perf_test/test_gpu_speed_lstm.sh
@@ -11,7 +11,7 @@
   cd benchmark/scripts/
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python lstm.py --skip-cpu-governor-check")
@@ -25,8 +25,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_gpu_speed_mlstm.sh b/.jenkins/perf_test/test_gpu_speed_mlstm.sh
index 834dcdf..eb1eeb9 100644
--- a/.jenkins/perf_test/test_gpu_speed_mlstm.sh
+++ b/.jenkins/perf_test/test_gpu_speed_mlstm.sh
@@ -11,7 +11,7 @@
   cd benchmark/scripts/
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python mlstm.py --skip-cpu-governor-check")
@@ -25,8 +25,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_gpu_speed_mnist.sh b/.jenkins/perf_test/test_gpu_speed_mnist.sh
index 7594519..d047610 100644
--- a/.jenkins/perf_test/test_gpu_speed_mnist.sh
+++ b/.jenkins/perf_test/test_gpu_speed_mnist.sh
@@ -16,7 +16,7 @@
   python main.py --epochs 0
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python main.py --epochs 1 --no-log")
@@ -30,8 +30,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/test_gpu_speed_word_language_model.sh b/.jenkins/perf_test/test_gpu_speed_word_language_model.sh
index c6c162b..efa6b7f 100644
--- a/.jenkins/perf_test/test_gpu_speed_word_language_model.sh
+++ b/.jenkins/perf_test/test_gpu_speed_word_language_model.sh
@@ -24,7 +24,7 @@
   cd ../..
 
   SAMPLE_ARRAY=()
-  NUM_RUNS=20
+  NUM_RUNS=$1
 
   for (( i=1; i<=$NUM_RUNS; i++ )) do
     runtime=$(get_runtime_of_command "python main.py --cuda --epochs 1")
@@ -38,8 +38,10 @@
   echo "Runtime stats in seconds:"
   echo $stats
 
-  if [ "$1" == "compare_with_baseline" ]; then
-    python ../compare_with_baseline.py ${FUNCNAME[0]} "${stats}"
+  if [ "$2" == "compare_with_baseline" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}"
+  elif [ "$2" == "compare_and_update" ]; then
+    python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update
   fi
 }
 
diff --git a/.jenkins/perf_test/update_commit_hash.py b/.jenkins/perf_test/update_commit_hash.py
new file mode 100644
index 0000000..ee7fa8a
--- /dev/null
+++ b/.jenkins/perf_test/update_commit_hash.py
@@ -0,0 +1,13 @@
+import sys
+import json
+
+data_file_path = sys.argv[1]
+commit_hash = sys.argv[2]
+
+with open(data_file_path) as data_file:
+    data = json.load(data_file)
+
+data['commit'] = commit_hash
+
+with open(data_file_path, 'w') as data_file:
+    json.dump(data, data_file)
diff --git a/.jenkins/short-perf-test-cpu.sh b/.jenkins/short-perf-test-cpu.sh
index 770c966..76f85a5 100755
--- a/.jenkins/short-perf-test-cpu.sh
+++ b/.jenkins/short-perf-test-cpu.sh
@@ -8,10 +8,41 @@
 
 echo "Running CPU perf test for PyTorch..."
 
+# Get last master commit hash
+export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
+
+# Get baseline file from https://github.com/yf225/perf-tests
+if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json ]; then
+    cp /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json perf_test_numbers_cpu.json
+else
+    curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_cpu.json -O
+fi
+
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    # Prepare new baseline file
+    cp perf_test_numbers_cpu.json new_perf_test_numbers_cpu.json
+    python update_commit_hash.py new_perf_test_numbers_cpu.json ${PYTORCH_COMMIT_ID}
+fi
+
 # Include tests
 . ./test_cpu_speed_mini_sequence_labeler.sh
 . ./test_cpu_speed_mnist.sh
 
 # Run tests
-run_test test_cpu_speed_mini_sequence_labeler compare_with_baseline
-run_test test_cpu_speed_mnist compare_with_baseline
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    run_test test_cpu_speed_mini_sequence_labeler 20 compare_and_update
+    run_test test_cpu_speed_mnist 20 compare_and_update
+else
+    run_test test_cpu_speed_mini_sequence_labeler 20 compare_with_baseline
+    run_test test_cpu_speed_mnist 20 compare_with_baseline
+fi
+
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    # Push new baseline file
+    cp new_perf_test_numbers_cpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_cpu.json
+    cd /var/lib/jenkins/host-workspace
+    git config --global user.email jenkins@ci.pytorch.org
+    git config --global user.name Jenkins
+    git add perf_test_numbers_cpu.json
+    git commit -m "New CPU perf test baseline from ${PYTORCH_COMMIT_ID}"
+fi
diff --git a/.jenkins/short-perf-test-gpu.sh b/.jenkins/short-perf-test-gpu.sh
index ab7dd81b..04072dc 100755
--- a/.jenkins/short-perf-test-gpu.sh
+++ b/.jenkins/short-perf-test-gpu.sh
@@ -11,6 +11,22 @@
 
 echo "Running GPU perf test for PyTorch..."
 
+# Get last master commit hash
+export PYTORCH_COMMIT_ID=$(git log --format="%H" -n 1)
+
+# Get baseline file from https://github.com/yf225/perf-tests
+if [ -f /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json ]; then
+    cp /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json perf_test_numbers_gpu.json
+else
+    curl https://raw.githubusercontent.com/yf225/perf-tests/master/perf_test_numbers_gpu.json -O
+fi
+
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    # Prepare new baseline file
+    cp perf_test_numbers_gpu.json new_perf_test_numbers_gpu.json
+    python update_commit_hash.py new_perf_test_numbers_gpu.json ${PYTORCH_COMMIT_ID}
+fi
+
 # Include tests
 . ./test_gpu_speed_mnist.sh
 . ./test_gpu_speed_word_language_model.sh
@@ -19,8 +35,26 @@
 . ./test_gpu_speed_mlstm.sh
 
 # Run tests
-run_test test_gpu_speed_mnist compare_with_baseline
-run_test test_gpu_speed_word_language_model compare_with_baseline
-run_test test_gpu_speed_cudnn_lstm compare_with_baseline
-run_test test_gpu_speed_lstm compare_with_baseline
-run_test test_gpu_speed_mlstm compare_with_baseline
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    run_test test_gpu_speed_mnist 20 compare_and_update
+    run_test test_gpu_speed_word_language_model 20 compare_and_update
+    run_test test_gpu_speed_cudnn_lstm 20 compare_and_update
+    run_test test_gpu_speed_lstm 20 compare_and_update
+    run_test test_gpu_speed_mlstm 20 compare_and_update
+else
+    run_test test_gpu_speed_mnist 20 compare_with_baseline
+    run_test test_gpu_speed_word_language_model 20 compare_with_baseline
+    run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline
+    run_test test_gpu_speed_lstm 20 compare_with_baseline
+    run_test test_gpu_speed_mlstm 20 compare_with_baseline
+fi
+
+if [[ "$GIT_COMMIT" == *origin/master* ]]; then
+    # Push new baseline file
+    cp new_perf_test_numbers_gpu.json /var/lib/jenkins/host-workspace/perf_test_numbers_gpu.json
+    cd /var/lib/jenkins/host-workspace
+    git config --global user.email jenkins@ci.pytorch.org
+    git config --global user.name Jenkins
+    git add perf_test_numbers_gpu.json
+    git commit -m "New GPU perf test baseline from ${PYTORCH_COMMIT_ID}"
+fi