#include "utf8.h"
#include "str_partial.h"
#include <stdint.h>
#include <stdio.h>

struct utf8_s {
  str s;
};

/* The classification and the transition table. */

static const
uint8_t utf8d[] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,

  0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  12,36,12,12,12,12,12,12,12,12,12,12,
};

static UTF8_State
decode(UTF8_State *state, uint32_t *codep, uint8_t byte)
{
  uint32_t type = utf8d[byte];

  *codep = (*state != UTF8_STATE_ACCEPT) ?
    (byte & 0x3fu) | (*codep << 6) :
    (0xff >> type) & ((uint32_t) byte);

  *state = utf8d[256 + *state + type];
  return *state;
}

str_info
utf8_get_info(const str * const restrict s, UNUM n)
{
  UTF8_State state = UTF8_STATE_ACCEPT;
  uint32_t code = 0;
  NUM orig_n = n;

  /* error */
  if (n >= s->size) return (str_info) { -1, 0 };

  decode(&state, &code, *(s->data+n));

  if (state == UTF8_STATE_ACCEPT)
    return (str_info) { code, 1 };

  if (state == UTF8_STATE_REJECT)
    return (str_info) { -1, 1 };

  n++;

  if (n >= s->size) return (str_info) { -1, 1 };

  for (; n < s->size
         && state > UTF8_STATE_REJECT;
       n++) {
    decode(&state, &code, *(s->data+n));
  }

  if (state == UTF8_STATE_ACCEPT)
    return (str_info) { code, n - orig_n };

  return (str_info) { -1, n - orig_n - 1};
}

utf8 *
new_utf8(char *string, UNUM size)
{
  str *s = new_str(string, size);

  s->getter = utf8_get_info;

  return (utf8 *)s;
}
  
/* result should be long enough to hold the data */
BOOL
encode(NUM code_point, str *result)
{
  /* calculate the number of bits of CODE_POINT */
  int number_of_bits = 0;

  for (;code_point >> number_of_bits;) number_of_bits++;

  if (!number_of_bits) {
    result->size = 0;
    return 1;
  }

  if (number_of_bits <= 7) {
    if (!(result->size)) {
      eprintf("%s:%d, Result cannot hold the encoded value\n",
              __FILE__, __LINE__);
      return 1;
    }

    result->size = 1;
    *(result->data) = (char) code_point;
  } else if (number_of_bits <= 11) {
    if (result->size < 2) {
      eprintf("%s:%d, Result cannot hold the encoded value",
              __FILE__, __LINE__);
      return 1;
    }

    result->size = 2;
    *(result->data) = (char) (0xc0 | (code_point >> 6));
    *(result->data+1) = (char) (0x80 | (code_point & 0x3f));
  } else if (number_of_bits <= 16) {
    if (result->size < 3) {
      eprintf("%s:%d, Result cannot hold the encoded value",
              __FILE__, __LINE__);
      return 1;
    }

    result->size = 3;
    *(result->data) = (char) (0xe0 | (code_point >> 12));
    *(result->data+1) = (char) (0x80 | ((code_point >> 6) & 0x3f));
    *(result->data+2) = (char) (0x80 | (code_point & 0x3f));
  } else if (number_of_bits <= 21) {
    if (result->size < 4) {
      eprintf("%s:%d, Result cannot hold the encoded value",
              __FILE__, __LINE__);
      return 1;
    }

    result->size = 4;
    *(result->data) = (char) (0xf0 | (code_point >> 18));
    *(result->data+1) = (char) (0x80 | ((code_point >> 12) & 0x3f));
    *(result->data+2) = (char) (0x80 | ((code_point >> 6) & 0x3f));
    *(result->data+3) = (char) (0x80 | (code_point & 0x3f));
  } else {
    eprintf("%s:%d, Invalid code point to encode",
            __FILE__, __LINE__);
    return 1;
  }

  return 0;
}