Add folding rules for: Sar64(x,0) and Sar32(x,0).  Immediate
shifts by zero seem to have a surprisingly large perf hit on
Intels, possibly due to the bizarre eflags/rflags semantics
involved.



git-svn-id: svn://svn.valgrind.org/vex/trunk@2964 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/ir_opt.c b/priv/ir_opt.c
index d132efc..0779e61 100644
--- a/priv/ir_opt.c
+++ b/priv/ir_opt.c
@@ -2075,7 +2075,8 @@
             case Iop_Shl32:
             case Iop_Shl64:
             case Iop_Shr64:
-               /* Shl32/Shl64/Shr64(x,0) ==> x */
+            case Iop_Sar64:
+               /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
                if (isZeroU(e->Iex.Binop.arg2)) {
                   e2 = e->Iex.Binop.arg1;
                   break;
@@ -2087,8 +2088,9 @@
                }
                break;
 
+            case Iop_Sar32:
             case Iop_Shr32:
-               /* Shr32(x,0) ==> x */
+               /* Shr32/Sar32(x,0) ==> x */
                if (isZeroU(e->Iex.Binop.arg2)) {
                   e2 = e->Iex.Binop.arg1;
                   break;