CopyRow_X86 for gcc
BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/300007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@97 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/README.chromium b/README.chromium
index 3e6209b..483ffd3 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv

 URL: http://code.google.com/p/libyuv/

-Version: 96

+Version: 97

 License: BSD

 License File: LICENSE

 

diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 9ef3890..7b6f0de 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -129,7 +129,6 @@
 // CopyRows copys 'count' bytes using a 16 byte load/store, 64 bytes at time
 #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
 #define HAS_COPYROW_SSE2
-#define HAS_COPYROW_X86
 __declspec(naked)
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
   __asm {
@@ -150,6 +149,7 @@
   }
 }
 
+#define HAS_COPYROW_X86
 __declspec(naked)
 void CopyRow_X86(const uint8* src, uint8* dst, int count) {
   __asm {
@@ -169,15 +169,15 @@
 #define HAS_COPYROW_SSE2
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
   asm volatile (
-"1:                                            \n"
-  "movdqa      (%0),%%xmm0                     \n"
-  "movdqa      0x10(%0),%%xmm1                 \n"
-  "lea         0x20(%0),%0                     \n"
-  "movdqa      %%xmm0,(%1)                     \n"
-  "movdqa      %%xmm1,0x10(%1)                 \n"
-  "lea         0x20(%1),%1                     \n"
-  "sub         $0x20,%2                        \n"
-  "ja          1b                              \n"
+  "1:                                          \n"
+    "movdqa    (%0),%%xmm0                     \n"
+    "movdqa    0x10(%0),%%xmm1                 \n"
+    "lea       0x20(%0),%0                     \n"
+    "movdqa    %%xmm0,(%1)                     \n"
+    "movdqa    %%xmm1,0x10(%1)                 \n"
+    "lea       0x20(%1),%1                     \n"
+    "sub       $0x20,%2                        \n"
+    "ja        1b                              \n"
   : "+r"(src),   // %0
     "+r"(dst),   // %1
     "+r"(count)  // %2
@@ -186,7 +186,21 @@
 #if defined(__SSE2__)
     , "xmm0", "xmm1"
 #endif
-);
+  );
+}
+
+#define HAS_COPYROW_X86
+void CopyRow_X86(const uint8* src, uint8* dst, int width) {
+  size_t width_tmp = static_cast<size_t>(width);
+  asm volatile (
+    "shr       $0x2,%2                         \n"
+    "rep movsl (%0),(%1)                       \n"
+  : "+S"(src),  // %0
+    "+D"(dst),  // %1
+    "+c"(width_tmp) // %2
+  :
+  : "memory", "cc"
+  );
 }
 #endif