Removed further timing differences during SSL message decryption in ssl_decrypt_buf()

New padding checking is unbiased on correct or incorrect padding and
has no branch prediction timing differences.

The additional MAC checks further straighten out the timing differences.
diff --git a/ChangeLog b/ChangeLog
index a4c11f6..75989bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,10 @@
 Bugfix
    * Fixed memory leak in ssl_free() and ssl_reset() for active session
 
+Security
+   * Removed further timing differences during SSL message decryption in
+     ssl_decrypt_buf()
+
 = Version 1.2.5 released 2013-02-02
 Changes
    * Allow enabling of dummy error_strerror() to support some use-cases
diff --git a/include/polarssl/md5.h b/include/polarssl/md5.h
index 2e97c2b..b0611e2 100644
--- a/include/polarssl/md5.h
+++ b/include/polarssl/md5.h
@@ -154,6 +154,9 @@
  */
 int md5_self_test( int verbose );
 
+/* Internal use */
+void md5_process( md5_context *ctx, const unsigned char data[64] );
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/polarssl/sha1.h b/include/polarssl/sha1.h
index 45ffadc..48da246 100644
--- a/include/polarssl/sha1.h
+++ b/include/polarssl/sha1.h
@@ -152,6 +152,9 @@
  */
 int sha1_self_test( int verbose );
 
+/* Internal use */
+void sha1_process( sha1_context *ctx, const unsigned char data[64] );
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/polarssl/sha2.h b/include/polarssl/sha2.h
index 054988d..39d9347 100644
--- a/include/polarssl/sha2.h
+++ b/include/polarssl/sha2.h
@@ -160,6 +160,9 @@
  */
 int sha2_self_test( int verbose );
 
+/* Internal use */
+void sha2_process( sha2_context *ctx, const unsigned char data[64] );
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/library/md5.c b/library/md5.c
index 298f1fa..b2ee10b 100644
--- a/library/md5.c
+++ b/library/md5.c
@@ -75,7 +75,7 @@
     ctx->state[3] = 0x10325476;
 }
 
-static void md5_process( md5_context *ctx, const unsigned char data[64] )
+void md5_process( md5_context *ctx, const unsigned char data[64] )
 {
     uint32_t X[16], A, B, C, D;
 
diff --git a/library/sha1.c b/library/sha1.c
index b0b6e43..1e82580 100644
--- a/library/sha1.c
+++ b/library/sha1.c
@@ -76,7 +76,7 @@
     ctx->state[4] = 0xC3D2E1F0;
 }
 
-static void sha1_process( sha1_context *ctx, const unsigned char data[64] )
+void sha1_process( sha1_context *ctx, const unsigned char data[64] )
 {
     uint32_t temp, W[16], A, B, C, D, E;
 
diff --git a/library/sha2.c b/library/sha2.c
index 0245f31..af3a6ee 100644
--- a/library/sha2.c
+++ b/library/sha2.c
@@ -97,7 +97,7 @@
     ctx->is224 = is224;
 }
 
-static void sha2_process( sha2_context *ctx, const unsigned char data[64] )
+void sha2_process( sha2_context *ctx, const unsigned char data[64] )
 {
     uint32_t temp1, temp2, W[64];
     uint32_t A, B, C, D, E, F, G, H;
diff --git a/library/ssl_tls.c b/library/ssl_tls.c
index bf9838b..6630ad8 100644
--- a/library/ssl_tls.c
+++ b/library/ssl_tls.c
@@ -1299,7 +1299,7 @@
         unsigned char *dec_msg;
         unsigned char *dec_msg_result;
         size_t dec_msglen;
-        size_t minlen = 0, fake_padlen;
+        size_t minlen = 0;
 
         /*
          * Check immediate ciphertext sanity
@@ -1399,7 +1399,6 @@
         }
 
         padlen = 1 + ssl->in_msg[ssl->in_msglen - 1];
-        fake_padlen = 256 - padlen;
 
         if( ssl->in_msglen < ssl->transform_in->maclen + padlen )
         {
@@ -1408,7 +1407,6 @@
                         ssl->in_msglen, ssl->transform_in->maclen, padlen ) );
 #endif
             padlen = 0;
-            fake_padlen = 256;
             correct = 0;
         }
 
@@ -1430,26 +1428,23 @@
              * TLSv1+: always check the padding up to the first failure
              * and fake check up to 256 bytes of padding
              */
+            size_t pad_count = 0, fake_pad_count = 0;
+            size_t padding_idx = ssl->in_msglen - padlen - 1;
+
             for( i = 1; i <= padlen; i++ )
-            {
-                if( ssl->in_msg[ssl->in_msglen - i] != padlen - 1 )
-                {
-                    correct = 0;
-                    fake_padlen = 256 - i;
-                    padlen = 0;
-                }
-            }
-            for( i = 1; i <= fake_padlen; i++ )
-            {
-                if( ssl->in_msg[i + 1] != fake_padlen - 1 )
-                    minlen = 0;
-                else
-                    minlen = 1;
-            }
+                pad_count += ( ssl->in_msg[padding_idx + i] == padlen - 1 );
+
+            for( ; i <= 256; i++ )
+                fake_pad_count += ( ssl->in_msg[padding_idx + i] == padlen - 1 );
+
+            correct &= ( pad_count == padlen ); /* Only 1 on correct padding */
+            correct &= ( pad_count + fake_pad_count < 512 ); /* Always 1 */
+
 #if defined(POLARSSL_SSL_DEBUG_ALL)
             if( padlen > 0 && correct == 0)
                 SSL_DEBUG_MSG( 1, ( "bad padding byte detected" ) );
 #endif
+            padlen &= correct * 0x1FF;
         }
     }
 
@@ -1492,19 +1487,52 @@
         /*
          * Process MAC and always update for padlen afterwards to make
          * total time independent of padlen
+         *
+         * extra_run compensates MAC check for padlen 
+         *
+         * Known timing attacks:
+         *  - Lucky Thirteen (http://www.isg.rhul.ac.uk/tls/TLStiming.pdf)
+         *
+         * We use ( ( Lx + 8 ) / 64 ) to handle 'negative Lx' values
+         * correctly. (We round down instead of up, so -56 is the correct
+         * value for our calculations instead of -55)
          */
+        int j, extra_run = 0;
+        extra_run = ( 13 + ssl->in_msglen + padlen + 8 ) / 64 -
+                    ( 13 + ssl->in_msglen          + 8 ) / 64;
+
+        extra_run &= correct * 0xFF;
+
         if( ssl->transform_in->maclen == 16 )
-             md5_hmac( ssl->transform_in->mac_dec, 16,
-                       ssl->in_ctr,  ssl->in_msglen + 13,
-                       ssl->in_msg + ssl->in_msglen );
+        {
+            md5_context ctx;
+            md5_hmac_starts( &ctx, ssl->transform_in->mac_dec, 16 );
+            md5_hmac_update( &ctx, ssl->in_ctr,  ssl->in_msglen + 13 );
+            md5_hmac_finish( &ctx, ssl->in_msg + ssl->in_msglen );
+
+            for( j = 0; j < extra_run; j++ )
+                md5_process( &ctx, ssl->in_msg ); 
+        }
         else if( ssl->transform_in->maclen == 20 )
-            sha1_hmac( ssl->transform_in->mac_dec, 20,
-                       ssl->in_ctr,  ssl->in_msglen + 13,
-                       ssl->in_msg + ssl->in_msglen );
+        {
+            sha1_context ctx;
+            sha1_hmac_starts( &ctx, ssl->transform_in->mac_dec, 20 );
+            sha1_hmac_update( &ctx, ssl->in_ctr,  ssl->in_msglen + 13 );
+            sha1_hmac_finish( &ctx, ssl->in_msg + ssl->in_msglen );
+
+            for( j = 0; j < extra_run; j++ )
+                sha1_process( &ctx, ssl->in_msg ); 
+        }
         else if( ssl->transform_in->maclen == 32 )
-            sha2_hmac( ssl->transform_in->mac_dec, 32,
-                       ssl->in_ctr,  ssl->in_msglen + 13,
-                       ssl->in_msg + ssl->in_msglen, 0 );
+        {
+            sha2_context ctx;
+            sha2_hmac_starts( &ctx, ssl->transform_in->mac_dec, 32, 0 );
+            sha2_hmac_update( &ctx, ssl->in_ctr,  ssl->in_msglen + 13 );
+            sha2_hmac_finish( &ctx, ssl->in_msg + ssl->in_msglen );
+
+            for( j = 0; j < extra_run; j++ )
+                sha2_process( &ctx, ssl->in_msg ); 
+        }
         else if( ssl->transform_in->maclen != 0 )
         {
             SSL_DEBUG_MSG( 1, ( "invalid MAC len: %d",
@@ -1520,7 +1548,9 @@
     if( memcmp( tmp, ssl->in_msg + ssl->in_msglen,
                      ssl->transform_in->maclen ) != 0 )
     {
+#if defined(POLARSSL_SSL_DEBUG_ALL)
         SSL_DEBUG_MSG( 1, ( "message mac does not match" ) );
+#endif
         correct = 0;
     }