Add support for key inversion using AES-NI
diff --git a/include/polarssl/aesni.h b/include/polarssl/aesni.h
index f3f9fc3..b66aa8a 100644
--- a/include/polarssl/aesni.h
+++ b/include/polarssl/aesni.h
@@ -81,6 +81,16 @@
                     const unsigned char a[16],
                     const unsigned char b[16] );
 
+/**
+ * \brief           Compute decryption round keys from encryption round keys
+ *
+ * \param invkey    Round keys for the equivalent inverse cipher
+ * \param fwdkey    Original round keys (for encryption)
+ * \param nr        Number of rounds (that is, number of round keys minus one)
+ */
+void aesni_inverse_key( unsigned char *invkey,
+                        const unsigned char *fwdkey, int nr );
+
 #endif /* POLARSSL_HAVE_X86_64 */
 
 #endif /* POLARSSL_AESNI_H */
diff --git a/library/aes.c b/library/aes.c
index d2d1c0c..4477084 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -591,6 +591,15 @@
     if( ret != 0 )
         return( ret );
 
+#if defined(POLARSSL_AESNI_C) && defined(POLARSSL_HAVE_X86_64)
+    if( aesni_supports( POLARSSL_AESNI_AES ) )
+    {
+        aesni_inverse_key( (unsigned char *) ctx->rk,
+                           (const unsigned char *) cty.rk, ctx->nr );
+        goto done;
+    }
+#endif
+
     SK = cty.rk + cty.nr * 4;
 
     *RK++ = *SK++;
@@ -614,6 +623,7 @@
     *RK++ = *SK++;
     *RK++ = *SK++;
 
+done:
     memset( &cty, 0, sizeof( aes_context ) );
 
     return( 0 );
diff --git a/library/aesni.c b/library/aesni.c
index 0431f78..488731c 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -215,6 +215,28 @@
     return( 0 );
 }
 
+/*
+ * Compute decryption round keys from encryption round keys
+ */
+void aesni_inverse_key( unsigned char *invkey,
+                        const unsigned char *fwdkey, int nr )
+{
+    unsigned char *ik = invkey;
+    const unsigned char *fk = fwdkey + 16 * nr;
+
+    memcpy( ik, fk, 16 );
+
+    for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
+        asm( "movdqu (%0), %%xmm0       \n"
+             "aesimc %%xmm0, %%xmm0     \n"
+             "movdqu %%xmm0, (%1)       \n"
+             :
+             : "r" (fk), "r" (ik)
+             : "memory", "xmm0" );
+
+    memcpy( ik, fk, 16 );
+}
+
 #endif /* POLARSSL_HAVE_X86_64 */
 
 #endif /* POLARSSL_AESNI_C */