libc/bionic/memmove_words.c - platform/bionic - Git at Google

 /*
  * Copyright (C) 2011 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <stdlib.h>
 #include <stdint.h>
 #include <assert.h>

 /*
  * Works like memmove(), except:
  * - if all arguments are at least 32-bit aligned, we guarantee that we
  *   will use operations that preserve atomicity of 32-bit values
  * - if not, we guarantee atomicity of 16-bit values
  *
  * If all three arguments are not at least 16-bit aligned, the behavior
  * of this function is undefined.  (We could remove this restriction by
  * testing for unaligned values and punting to memmove(), but that's
  * not currently useful.)
  *
  * TODO: add loop for 64-bit alignment
  * TODO: use __builtin_prefetch
  * TODO: write an ARM-optimized version
  */
 void _memmove_words(void* dest, const void* src, size_t n)
 {
     assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0);

     char* d = (char*) dest;
     const char* s = (const char*) src;
     size_t copyCount;

     /*
      * If the source and destination pointers are the same, this is
      * an expensive no-op.  Testing for an empty move now allows us
      * to skip a check later.
      */
     if (n == 0 || d == s)
         return;

     /*
      * Determine if the source and destination buffers will overlap if
      * we copy data forward (i.e. *dest++ = *src++).
      *
      * It's okay if the destination buffer starts before the source and
      * there is some overlap, because the reader is always ahead of the
      * writer.
      */
     if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) {
         /*
          * Copy forward.  We prefer 32-bit loads and stores even for 16-bit
          * data, so sort that out.
          */
         if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
             /*
              * Not 32-bit aligned.  Two possibilities:
              * (1) Congruent, we can align to 32-bit by copying one 16-bit val
              * (2) Non-congruent, we can do one of:
              *   a. copy whole buffer as a series of 16-bit values
              *   b. load/store 32 bits, using shifts to ensure alignment
              *   c. just copy the as 32-bit values and assume the CPU
              *      will do a reasonable job
              *
              * We're currently using (a), which is suboptimal.
              */
             if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
                 copyCount = n;
             } else {
                 copyCount = 2;
             }
             n -= copyCount;
             copyCount /= sizeof(uint16_t);

             while (copyCount--) {
                 *(uint16_t*)d = *(uint16_t*)s;
                 d += sizeof(uint16_t);
                 s += sizeof(uint16_t);
             }
         }

         /*
          * Copy 32-bit aligned words.
          */
         copyCount = n / sizeof(uint32_t);
         while (copyCount--) {
             *(uint32_t*)d = *(uint32_t*)s;
             d += sizeof(uint32_t);
             s += sizeof(uint32_t);
         }

         /*
          * Check for leftovers.  Either we finished exactly, or we have
          * one remaining 16-bit chunk.
          */
         if ((n & 0x02) != 0) {
             *(uint16_t*)d = *(uint16_t*)s;
         }
     } else {
         /*
          * Copy backward, starting at the end.
          */
         d += n;
         s += n;

         if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
             /* try for 32-bit alignment */
             if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
                 copyCount = n;
             } else {
                 copyCount = 2;
             }
             n -= copyCount;
             copyCount /= sizeof(uint16_t);

             while (copyCount--) {
                 d -= sizeof(uint16_t);
                 s -= sizeof(uint16_t);
                 *(uint16_t*)d = *(uint16_t*)s;
             }
         }

         /* copy 32-bit aligned words */
         copyCount = n / sizeof(uint32_t);
         while (copyCount--) {
             d -= sizeof(uint32_t);
             s -= sizeof(uint32_t);
             *(uint32_t*)d = *(uint32_t*)s;
         }

         /* copy leftovers */
         if ((n & 0x02) != 0) {
             d -= sizeof(uint16_t);
             s -= sizeof(uint16_t);
             *(uint16_t*)d = *(uint16_t*)s;
         }
     }
 }
	/*
	* Copyright (C) 2011 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <stdlib.h>
	#include <stdint.h>
	#include <assert.h>

	/*
	* Works like memmove(), except:
	* - if all arguments are at least 32-bit aligned, we guarantee that we
	* will use operations that preserve atomicity of 32-bit values
	* - if not, we guarantee atomicity of 16-bit values
	*
	* If all three arguments are not at least 16-bit aligned, the behavior
	* of this function is undefined. (We could remove this restriction by
	* testing for unaligned values and punting to memmove(), but that's
	* not currently useful.)
	*
	* TODO: add loop for 64-bit alignment
	* TODO: use __builtin_prefetch
	* TODO: write an ARM-optimized version
	*/
	void _memmove_words(void* dest, const void* src, size_t n)
	{
	assert((((uintptr_t) dest \| (uintptr_t) src \| n) & 0x01) == 0);

	char* d = (char*) dest;
	const char* s = (const char*) src;
	size_t copyCount;

	/*
	* If the source and destination pointers are the same, this is
	* an expensive no-op. Testing for an empty move now allows us
	* to skip a check later.
	*/
	if (n == 0 \|\| d == s)
	return;

	/*
	* Determine if the source and destination buffers will overlap if
	* we copy data forward (i.e. dest++ = src++).
	*
	* It's okay if the destination buffer starts before the source and
	* there is some overlap, because the reader is always ahead of the
	* writer.
	*/
	if (__builtin_expect((d < s) \|\| ((size_t)(d - s) >= n), 1)) {
	/*
	* Copy forward. We prefer 32-bit loads and stores even for 16-bit
	* data, so sort that out.
	*/
	if ((((uintptr_t) d \| (uintptr_t) s) & 0x03) != 0) {
	/*
	* Not 32-bit aligned. Two possibilities:
	* (1) Congruent, we can align to 32-bit by copying one 16-bit val
	* (2) Non-congruent, we can do one of:
	* a. copy whole buffer as a series of 16-bit values
	* b. load/store 32 bits, using shifts to ensure alignment
	* c. just copy the as 32-bit values and assume the CPU
	* will do a reasonable job
	*
	* We're currently using (a), which is suboptimal.
	*/
	if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
	copyCount = n;
	} else {
	copyCount = 2;
	}
	n -= copyCount;
	copyCount /= sizeof(uint16_t);

	while (copyCount--) {
	(uint16_t)d = (uint16_t)s;
	d += sizeof(uint16_t);
	s += sizeof(uint16_t);
	}
	}

	/*
	* Copy 32-bit aligned words.
	*/
	copyCount = n / sizeof(uint32_t);
	while (copyCount--) {
	(uint32_t)d = (uint32_t)s;
	d += sizeof(uint32_t);
	s += sizeof(uint32_t);
	}

	/*
	* Check for leftovers. Either we finished exactly, or we have
	* one remaining 16-bit chunk.
	*/
	if ((n & 0x02) != 0) {
	(uint16_t)d = (uint16_t)s;
	}
	} else {
	/*
	* Copy backward, starting at the end.
	*/
	d += n;
	s += n;

	if ((((uintptr_t) d \| (uintptr_t) s) & 0x03) != 0) {
	/* try for 32-bit alignment */
	if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
	copyCount = n;
	} else {
	copyCount = 2;
	}
	n -= copyCount;
	copyCount /= sizeof(uint16_t);

	while (copyCount--) {
	d -= sizeof(uint16_t);
	s -= sizeof(uint16_t);
	(uint16_t)d = (uint16_t)s;
	}
	}

	/* copy 32-bit aligned words */
	copyCount = n / sizeof(uint32_t);
	while (copyCount--) {
	d -= sizeof(uint32_t);
	s -= sizeof(uint32_t);
	(uint32_t)d = (uint32_t)s;
	}

	/* copy leftovers */
	if ((n & 0x02) != 0) {
	d -= sizeof(uint16_t);
	s -= sizeof(uint16_t);
	(uint16_t)d = (uint16_t)s;
	}
	}
	}