#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "opcodes.h"

#ifndef M3
#define M3 0
#endif

/* The abstracted result of an CU12 insn */
typedef struct {
   uint64_t addr1;  // target
   uint64_t len1;
   uint64_t addr2;  // source
   uint64_t len2;
   uint32_t cc;
} cu12_t;

/* Define various input buffers. */

/* 1-byte UTF-8 character */
uint8_t pattern1[] = {
   0x00, 0x01, 0x02, 0x03
};

/* 2-byte UTF-8 character */
uint8_t pattern2[] = {
   0xc2, 0x80,
   0xc2, 0x81,
   0xc2, 0x82,
   0xc2, 0x83,
};

/* 3-byte UTF-8 character */
uint8_t pattern3[] = {
   0xe1, 0x80, 0x80,
   0xe1, 0x80, 0x81,
   0xe1, 0x80, 0x82,
   0xe1, 0x80, 0x83,
};

/* 4-byte UTF-8 character */
uint8_t pattern4[] = {
   0xf4, 0x80, 0x80, 0x80,
   0xf4, 0x80, 0x80, 0x81,
   0xf4, 0x80, 0x80, 0x82,
   0xf4, 0x80, 0x80, 0x83,
};


/* Mixed bytes */
uint8_t mixed[] = {
   0x01,                    // 1 byte
   0xc3, 0x80,              // 2 bytes
   0x12,                    // 1 byte
   0xe1, 0x90, 0x93,        // 3 bytes
   0x23,                    // 1 byte
   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
   0x34,                    // 1 byte
   0xc4, 0x8c,              // 2 bytes
   0xe1, 0x91, 0x94,        // 3 bytes
   0xc5, 0x8a,              // 2 bytes
   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
   0xc5, 0x8a,              // 2 bytes
   0xe1, 0x91, 0x94,        // 3 bytes
   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
   0xe1, 0x91, 0x94,        // 3 bytes
};

/* This is the buffer for the converted bytes. */
uint16_t buff[1000];  /* Large so we con'don't have to worry about it */


static cu12_t
do_cu12(uint16_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len)
{
   int cc = 42;
   cu12_t regs;

   /* build up the register pairs */
   register uint8_t  *source     asm("4") = src;
   register uint64_t  source_len asm("5") = src_len;
   register uint16_t *dest       asm("2") = dst;
   register uint64_t  dest_len   asm("3") = dst_len;

   asm volatile(
                CU12(M3,2,4)
                "ipm %2\n\t"
                "srl %2,28\n\t"
                : "+d"(dest), "+d"(source), "=d"(cc),
                  "+d"(source_len), "+d"(dest_len)
                :
                : "memory", "cc");

   /* Capture register contents at end of cu12 */
   regs.addr1 = (uint64_t)dest;
   regs.len1  = dest_len;
   regs.addr2 = (uint64_t)source;
   regs.len2  = source_len;
   regs.cc = cc;
   
   return regs;
}

void
run_test(uint16_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len)
{
   int i;
   cu12_t result;

   printf("UTF8:  ");
   if (src_len == 0) 
      printf(" <none>");
   else {
      for(i = 0; i < src_len; ++i)
         printf(" %02x", src[i]);
   }
   printf("\n");
      
   result = do_cu12(dst, dst_len, src, src_len);

   // Write out the converted byte, if any
   printf("UTF16: ");
   if (dst_len - result.len1 == 0)
      printf(" <none>");
   else {
      uint64_t num_bytes = dst_len - result.len1;

      /* The number of bytes that were written must be divisible by 2 */
      if (num_bytes % 2 != 0)
         fprintf(stderr, "*** number of bytes is not a multiple of 2\n");

      for (i = 0; i < num_bytes / 2; i++) {
         printf(" %04x", dst[i]);
      }
   }
   printf("\n");

   printf("  cc = %d\n", result.cc);
   if (dst != NULL)
      printf("  dst address difference: %"PRId64, result.addr1 - (uint64_t)dst);
   printf("  dst len: %"PRId64"\n", result.len1);

   if (src != NULL)
      printf("  src address difference: %"PRId64, result.addr2 - (uint64_t)src);
   printf("  src len: %"PRId64"\n", result.len2);
}

// Test conversion of a one-byte character
void convert_1_byte(void)
{
   int i;

   printf("===== Conversion of a one-byte character =====\n");

   printf("\n----- Valid characters -----\n");
   uint8_t valid[] = { 
      0x00, 0x7f,              // corner cases
      0x01, 0x10, 0x7e, 0x5d   // misc 
   };
   run_test(buff, sizeof buff, valid, sizeof valid);

   // As conversion stops upon encountering an invalid character, we
   // need to test each invalid character separately, to make sure it
   // is recognized as invalid.
   
   printf("\n----- Invalid characters -----\n");
   uint8_t always_invalid[] = {
      0x80, 0xbf,              // corner cases
      0xf8, 0xff,              // corner cases
      0x81, 0xbe, 0x95, 0xab   // misc
   };
   for (i = 0; i < sizeof always_invalid; ++i) {
      uint8_t invalid_char[1];
      invalid_char[0] = always_invalid[i];
      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
   }

   // In case of m3 == 0 we get cc=0 indicating exhaustion of source
   printf("\n----- Invalid characters if m3 == 1 -----\n");
   uint8_t invalid_if_m3[] = {  // contains all such invalid characters
      0xc0, 0xc1,
      0xf5, 0xf6, 0xf7
   };
   for (i = 0; i < sizeof invalid_if_m3; ++i) {
      uint8_t invalid_char[1];
      invalid_char[0] = invalid_if_m3[i];
      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
   }

   printf("\n----- 1st char valid, 2nd char invalid -----\n");
   uint8_t valid_invalid[] = {
      0x10, // valid
      0xaa  // invalid
   };
   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
}

// Test conversion of a two-byte character
void convert_2_bytes(void)
{
   int i;

   printf("\n===== Conversion of a two-byte character =====\n");

   printf("\n----- Valid characters -----\n");
   uint8_t valid[] = { 
      0xc2, 0x80,             // corner case
      0xc2, 0xbf,             // corner case
      0xdf, 0x80,             // corner case
      0xdf, 0xbf,             // corner case
      0xc3, 0xbe, 0xda, 0xbc  // misc 
   };
   run_test(buff, sizeof buff, valid, sizeof valid);

   printf("\n----- Valid characters if m3 == 0 -----\n");
   // First char is 0xc0 or 0xc1
   uint8_t valid_if_not_m3[] = {
      0xc0, 0x80,
      0xc0, 0xbf,
      0xc1, 0x80,
      0xc0, 0xbf
   };
   run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3);

   // Test for invalid two-byte characters where the 1st byte is valid
   // The 2nd byte is invalid if not in range 0x80..0xbf, inclusive

   // As conversion stops upon encountering an invalid character, we
   // need to test each invalid character separately, to make sure it
   // is recognized as invalid.
   
   printf("\n----- Invalid characters if m3 == 1 -----\n");
   uint8_t always_invalid[] = {
      0xc2, 0x00,
      0xc2, 0x7f,
      0xc2, 0xc0,
      0xc2, 0xff
   };
   for (i = 0; i < sizeof always_invalid; i += 2) {
      uint8_t invalid_char[2];
      invalid_char[0] = always_invalid[i];
      invalid_char[1] = always_invalid[i+1];
      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
   }

   /* Nb: for a two-byte character we need not test the case where 
      invalidity of the character (cc=2) takes precedence over exhaustion
      of the 1st operand (cc=1). Invalidity of the character has already
      been tested when testing the 1st byte. */

   printf("\n----- 1st char valid, 2nd char invalid -----\n");
   uint8_t valid_invalid[] = {
      0xc3, 0x81, // valid
      0xc4, 0x00  // invalid
   };
   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
}

// Test conversion of a three-byte character
void
convert_3_bytes(void)
{
   int i;

   printf("\n===== Conversion of a three-byte character =====\n");

   /* Exhaustively test the 1st byte E0 - EF, and the interval boundaries for
      the 2nd and 3rd bytes */
   printf("\n----- Valid characters -----\n");
   uint8_t e0[] = { 
      0xe0, 0xa0, 0x80,
      0xe0, 0xbf, 0x80,
      0xe0, 0xa0, 0xbf,
      0xe0, 0xbf, 0xbf,
      0xe0, 0xaa, 0xbb,   // random  e0 .. ..
   };
   run_test(buff, sizeof buff, e0, sizeof e0);

   uint8_t ed[] = { 
      0xed, 0x80, 0x80,
      0xed, 0x9f, 0x80,
      0xed, 0x80, 0xbf,
      0xed, 0x9f, 0xbf,
      0xed, 0x8a, 0xbb,   // random  ed .. ..
   };
   run_test(buff, sizeof buff, ed, sizeof ed);

   for (i = 0; i <= 0xf; ++i) {
      uint8_t exxx_1[3] = { 0x0, 0x80, 0x80 };
      uint8_t exxx_2[3] = { 0x0, 0xbf, 0x80 };
      uint8_t exxx_3[3] = { 0x0, 0x80, 0xbf };
      uint8_t exxx_4[3] = { 0x0, 0xbf, 0xbf };

      if (i == 0x00) continue;   // special case e0
      if (i == 0x0d) continue;   // special case ed

      exxx_1[0] = 0xe0 | i;
      exxx_2[0] = 0xe0 | i;
      exxx_3[0] = 0xe0 | i;
      exxx_4[0] = 0xe0 | i;
      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
      run_test(buff, sizeof buff, exxx_3, sizeof exxx_3);
      run_test(buff, sizeof buff, exxx_4, sizeof exxx_4);
   };

   printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
   // Test for invalid three-byte characters where the 1st byte is valid
   // The 2nd byte is invalid.

   // As conversion stops upon encountering an invalid character, we
   // need to test each invalid character separately, to make sure it
   // is recognized as invalid.

   e0[0] = 0xe0;  // valid
   e0[1] = 0x9f;  // invalid  because outside [0xa0 .. 0xbf]
   e0[2] = 0x80;  // valid
   run_test(buff, sizeof buff, e0, sizeof e0);
   e0[1] = 0xc0;  // invalid  because outside [0xa0 .. 0xbf]
   run_test(buff, sizeof buff, e0, sizeof e0);

   ed[0] = 0xed;  // valid
   ed[1] = 0x7f;  // invalid  because outside [0x80 .. 0x9f]
   ed[2] = 0x80;  // valid
   run_test(buff, sizeof buff, ed, sizeof ed);
   ed[1] = 0xa0;  // invalid  because outside [0x80 .. 0x9f]
   run_test(buff, sizeof buff, ed, sizeof ed);

   for (i = 0; i <= 0xf; ++i) {
      uint8_t exxx_1[3] = { 0x0, 0x7f, 0x80 };
      uint8_t exxx_2[3] = { 0x0, 0xc0, 0x80 };

      if (i == 0x00) continue;   // special case e0
      if (i == 0x0d) continue;   // special case ed

      exxx_1[0] = 0xe0 | i;
      exxx_2[0] = 0xe0 | i;
      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
   };

   printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
   // For all 1st bytes 0xe0 .. 0xef the 3rd bytes must be in [0x80 .. 0xbf]
   // No need to special case 0xe0 and 0xed
   for (i = 0; i <= 0xf; ++i) {
      uint8_t exxx_1[3] = { 0x0, 0xab, 0x7f };
      uint8_t exxx_2[3] = { 0x0, 0xab, 0xc0 };

      exxx_1[0] = 0xe0 | i;
      exxx_2[0] = 0xe0 | i;
      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
   };

   printf("\n----- Invalid 2nd char AND output exhausted -----\n");
   /* The character is invalid in its 2nd byte AND the output buffer is 
      exhausted (2 bytes are needed) */
   uint8_t pat1[] = {
      0xe0, 0x00, 0x80
   };
   run_test(buff, 1, pat1, 3);

   printf("\n----- Invalid 3rd char AND output exhausted -----\n");
   /* The character is invalid in its 3rd byte AND the output buffer is 
      exhausted (2 bytes are needed) */
   uint8_t pat2[] = {
      0xe4, 0x84, 0x00
   };
   run_test(buff, 1, pat2, 3);

   printf("\n----- 1st char valid, 2nd char invalid -----\n");
   uint8_t valid_invalid[] = {
      0xe1, 0x90, 0x90, // valid
      0xe1, 0x00, 0x90  // invalid
   };
   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
}

// Test conversion of a four-byte character
void
convert_4_bytes(void)
{
   int i, j;

   printf("\n===== Conversion of a four-byte character =====\n");

   printf("\n----- Valid characters -----\n");
   for (i = 0; i <= 4; ++i) {
      uint8_t valid[4];

      valid[0] = 0xf0 | i;

      for (j = 0; j <= 1; ++j) {
         // Byte 2
         if (i == 0) {
            valid[1] = j == 0 ? 0x90 : 0xbf;    // 0xf0
         } else if (i == 4) {
            valid[1] = j == 0 ? 0x80 : 0x8f;    // 0xf4
         } else {
            valid[1] = j == 0 ? 0x80 : 0xbf;    // 0xf1 .. 0xf3
         }
         // Byte 3 and byte 4 have same interval 0x80 .. 0xbf
         valid[2] = 0x80;
         valid[3] = 0x80;
         run_test(buff, sizeof buff, valid, sizeof valid);
         valid[2] = 0x80;
         valid[3] = 0xbf;
         run_test(buff, sizeof buff, valid, sizeof valid);
         valid[2] = 0xbf;
         valid[3] = 0x80;
         run_test(buff, sizeof buff, valid, sizeof valid);
         valid[2] = 0xbf;
         valid[3] = 0xbf;
         run_test(buff, sizeof buff, valid, sizeof valid);
      }
   }

   printf("\n----- Valid characters if m3 == 0 -----\n");
   // First char is 0xf5 .. 0xf7
   uint8_t valid_if_not_m3[] = {
      0xf5, 0x00, 0x00, 0x00,
      0xf6, 0x11, 0x22, 0x33,
      0xf7, 0x44, 0x55, 0x66,
   };
   run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3);

   // As conversion stops upon encountering an invalid character, we
   // need to test each invalid character separately, to make sure it
   // is recognized as invalid.

   printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
   // Test for invalid four-byte characters where the 2nd byte is invalid.
   // All other bytes are valid
   uint8_t f0[4], f4[4];

   f0[0] = 0xf0;  // valid
   f0[1] = 0x8f;  // invalid  because outside [0x90 .. 0xbf]
   f0[2] = 0x80;  // valid
   f0[3] = 0x80;  // valid
   run_test(buff, sizeof buff, f0, sizeof f0);
   f0[1] = 0xc0;  // invalid  because outside [0x90 .. 0xbf]
   run_test(buff, sizeof buff, f0, sizeof f0);

   f4[0] = 0xf4;  // valid
   f4[1] = 0x7f;  // invalid  because outside [0x80 .. 0x8f]
   f4[2] = 0x80;  // valid
   f4[3] = 0x80;  // valid
   run_test(buff, sizeof buff, f4, sizeof f4);
   f4[1] = 0x90;  // invalid  because outside [0x80 .. 0x9f]
   run_test(buff, sizeof buff, f4, sizeof f4);

   for (i = 0; i <= 0x4; ++i) {
      uint8_t fxxx_1[4] = { 0x0, 0x7f, 0x80, 0x80 };
      uint8_t fxxx_2[4] = { 0x0, 0xc0, 0x80, 0x80 };

      if (i == 0) continue;   // special case f0
      if (i == 4) continue;   // special case f4

      fxxx_1[0] = 0xf0 | i;
      fxxx_2[0] = 0xf0 | i;
      run_test(buff, sizeof buff, fxxx_1, sizeof fxxx_1);
      run_test(buff, sizeof buff, fxxx_2, sizeof fxxx_2);
   };

   printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
   // Test for invalid four-byte characters where the 3rd byte is invalid.
   // All other bytes are valid
   for (i = 0; i <= 0x4; ++i) {
      uint8_t fxxx[4] = { 0x0, 0x0, 0x0, 0x80 };

      fxxx[0] = 0xf0 | i;
      fxxx[1] = (i == 0) ? 0x94 : 0x84;
      fxxx[2] = 0x7f;
      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
      fxxx[2] = 0xc0;
      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
   };

   printf("\n----- Invalid characters (4th byte is invalid) -----\n");
   // Test for invalid four-byte characters where the 3rd byte is invalid.
   // All other bytes are valid
   for (i = 0; i <= 0x4; ++i) {
      uint8_t fxxx[4] = { 0x0, 0x0, 0x80, 0x0 };

      fxxx[0] = 0xf0 | i;
      fxxx[1] = (i == 0) ? 0x94 : 0x84;
      fxxx[3] = 0x7f;
      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
      fxxx[3] = 0xc0;
      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
   };

   printf("\n----- Invalid 2nd char AND output exhausted -----\n");
   /* The character is invalid in its 2nd byte AND the output buffer is 
      exhausted (4 bytes are needed) */
   uint8_t pat1[] = {
      0xf0, 0x00, 0x80, 0x80
   };
   run_test(buff, 1, pat1, 4);

   printf("\n----- Invalid 3rd char AND output exhausted -----\n");
   /* The character is invalid in its 3rd byte AND the output buffer is 
      exhausted (4 bytes are needed) */
   uint8_t pat2[] = {
      0xf0, 0xaa, 0x00, 0x80
   };
   run_test(buff, 3, pat2, 4);

   printf("\n----- Invalid 4th char AND output exhausted -----\n");
   /* The character is invalid in its 4th byte AND the output buffer is 
      exhausted (4 bytes are needed) */
   uint8_t pat3[] = {
      0xf0, 0xaa, 0xaa, 0x00
   };
   run_test(buff, 3, pat3, 4);

   printf("\n----- 1st char valid, 2nd char invalid -----\n");
   uint8_t valid_invalid[] = {
      0xf0, 0xaa, 0xaa, 0xaa, // valid
      0xf0, 0x00, 0x00, 0x00  // invalid
   };
   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
}


int main()
{
   convert_1_byte();
   convert_2_bytes();
   convert_3_bytes();
   convert_4_bytes();

   /* Length == 0, no memory should be read or written */
   printf("\n------------- test1 ----------------\n");
   run_test(NULL, 0, NULL, 0);

   /* Test exhaustion of source length (source bytes are valid) */
   printf("\n------------- test2.1 ----------------\n");

   /* No character will be written to BUFF, i.e. loop in jitted code
      is not iterated */
   run_test(buff, sizeof buff, NULL,     0);
   run_test(buff, sizeof buff, pattern1, 0);
   run_test(buff, sizeof buff, pattern2, 0);
   run_test(buff, sizeof buff, pattern2, 1);
   run_test(buff, sizeof buff, pattern3, 0);
   run_test(buff, sizeof buff, pattern3, 1);
   run_test(buff, sizeof buff, pattern3, 2);
   run_test(buff, sizeof buff, pattern4, 0);
   run_test(buff, sizeof buff, pattern4, 1);
   run_test(buff, sizeof buff, pattern4, 2);
   run_test(buff, sizeof buff, pattern4, 3);

   printf("\n------------- test2.2 ----------------\n");
   /* At least one character will be written to BUFF, i.e. loop in jitted
      code is iterated */
   run_test(buff, sizeof buff, pattern1, 2);
   run_test(buff, sizeof buff, pattern2, 5);
   run_test(buff, sizeof buff, pattern3, 6);
   run_test(buff, sizeof buff, pattern4, 9);

   /* Test exhaustion of destination length (source bytes are valid) */
   printf("\n------------- test3.1 ----------------\n");

   /* No character will be written to BUFF, i.e. loop in jitted code
      is not iterated */

   /* Want to write 2 or 4 bytes at a time */
   run_test(NULL, 0, pattern1, sizeof pattern1);  // 2-byte result
   run_test(NULL, 0, pattern2, sizeof pattern2);  // 2-byte result
   run_test(NULL, 1, pattern2, sizeof pattern2);  // 2-byte result
   run_test(NULL, 0, pattern3, sizeof pattern3);  // 2-byte result
   run_test(NULL, 1, pattern3, sizeof pattern3);  // 2-byte result
   run_test(NULL, 0, pattern4, sizeof pattern4);  // 4-byte result
   run_test(NULL, 1, pattern4, sizeof pattern4);  // 4-byte result
   run_test(NULL, 2, pattern4, sizeof pattern4);  // 4-byte result
   run_test(NULL, 3, pattern4, sizeof pattern4);  // 4-byte result

   printf("\n------------- test3.2 ----------------\n");
   /* At least one character will be written to BUFF, i.e. loop in jitted
      code is iterated */
   run_test(buff, 4, pattern1, sizeof pattern1);
   run_test(buff, 5, pattern1, sizeof pattern2);
   run_test(buff, 6, pattern1, sizeof pattern3);
   run_test(buff, 7, pattern1, sizeof pattern4);

   /* Convert buffer with mixed characters */
   printf("\n------------- test4 ----------------\n");
   run_test(buff, sizeof buff, mixed, sizeof mixed);

   return 0;
}
