/*
 * Java
 *
 * Copyright 2007-2021 IS2T. All rights reserved.
 * IS2T PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
package com.is2t.vm.support.util;

import com.is2t.vm.support.CalibrationConstants;
import com.is2t.vm.support.util.EncodingConversion;

import ej.bon.Constants;

/**
 * Full UTF-8 Encoding conversion implementation
 * Note that DataInputStream.readUTF() & DataOutputStream.writeUTF() are slighty different 
 * because they process UTF bytes as described in classfile format
 */
public class EncUTF_8 extends EncodingConversion {

	private static final byte DEFAULT_ERROR_CHAR = (byte)0xFD;
	
	private static final String UTF_8 = "UTF-8"; //$NON-NLS-1$

	public EncUTF_8() {
		super(UTF_8);
	}
	
	/**
	 * @accelerable
	 */
	// Cyclomatic complexity is acceptable here
	public int decode(byte[] bytes, int[] bytesOffset, int bytesLength, char[] chars, int offset, int length){ //NOSONAR
		int currentOffset = offset;
		int currentBytesOffset = bytesOffset[0];
		int stopBytes = currentBytesOffset+bytesLength;
		int stopChars = offset+length;
		while(true){
			int a, b, c, d;
			int charRead;
			if(
					currentBytesOffset == stopBytes ||
					currentOffset == stopChars
			) {
				break;
			}
			
			int read = bytes[currentBytesOffset++];

			a = read & 0xFF;

			if(Constants.getBoolean(CalibrationConstants.CONSTANT_SUPPLEMENTARY_CHARACTER)) {
				if ((a & 0xf0) == 0xf0){
					// 4 bytes char
					if(currentBytesOffset >= stopBytes-2){
						--currentBytesOffset;
						break;
					}

					// build the integer code point
					b = bytes[currentBytesOffset++] & 0xFF;
					c = bytes[currentBytesOffset++] & 0xFF;
					d = bytes[currentBytesOffset++] & 0xFF;
					int codePoint = ((a & 0x07) << 18) | (((b & 0x3F) << 12) | ((c & 0x3F) << 6) | (d & 0x3F));

					// convert into surrogate pairs (2 char)				
					codePoint -= 0x10000;
					char high = (char)((codePoint >> 10) + 0xd800);
					char low = (char)((codePoint & 0x3ff) + 0xdc00);
					chars[currentOffset++] = high;
					chars[currentOffset++] = low;
					continue;
				}
			}
			
			if ((a & 0x80) == 0) { // 0xxxxxxx -> single byte character
				// add the char decoded to the buffer
				charRead = a;
			}
			else if ((a & 0xE0) == 0xE0) { // 1110xxxx -> 3-bytes char
				if(currentBytesOffset >= stopBytes-1){
					--currentBytesOffset;
					break;
				}
				b = bytes[currentBytesOffset++] & 0xFF;
				c = bytes[currentBytesOffset++] & 0xFF;

				if ((b & 0xC0) != 0x80 || (c & 0xC0) != 0x80){ // must be 10xxxxxx
					// invalid encoding
					charRead = DEFAULT_ERROR_CHAR;
				}
				else {
					charRead = (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
				}
			} 
			else if ((a & 0xC0) == 0xC0) { // 110xxxxx -> 2-bytes char
				if(currentBytesOffset == stopBytes){
					--currentBytesOffset;
					break;
				}
				b = bytes[currentBytesOffset++] & 0xFF;
				if ((b & 0xC0) != 0x80){ // must be 10xxxxxx
					// invalid encoding
					charRead = DEFAULT_ERROR_CHAR;
				}
				charRead = (((a & 0x1F) << 6) | (b & 0x3F));
			} 
			else{
				// invalid encoding
				charRead = DEFAULT_ERROR_CHAR;
			}
			
			chars[currentOffset++] = (char)charRead;
		}
		
		bytesOffset[0] = currentBytesOffset;
		return currentOffset-offset;
	}

	public int getMaxBytesPerChar() {
		// the maximum number of bytes per char is always 3, even if CONSTANT_SUPPLEMENTARY_CHARACTER is enabled
		// (2 chars are stored on 4 bytes)
		return 3;
	}
	
	/**
	 * @accelerable
	 */
	public int encode(char[] chars, int[] charsOffset, int charsLength, byte[] bytes, int offset, int length) {
		int currentOffset = offset;
		int currentCharsOffset = charsOffset[0];
		int stopChars = currentCharsOffset+charsLength;
		int stopBytes = currentOffset+length;
		while(true){
			if(currentCharsOffset == stopChars) {
				break;
			}
			
			int c = chars[currentCharsOffset++];
			if (c <= '\u007f'){ 
				if(currentOffset == stopBytes) {
					break; // not enough room to store 1 byte
				}
				bytes[currentOffset++] = (byte)c;
			}
			else if (c <= '\u07ff') {			
				if(currentOffset >= stopBytes-1) {
					break; // not enough room to store 2 bytes
				}
				bytes[currentOffset++] = (byte)(0xc0 | (0x1f & (c >> 6)));
				bytes[currentOffset++] = (byte)(0x80 | (0x3f & c));
			}
			else{					
				if(Constants.getBoolean(CalibrationConstants.CONSTANT_SUPPLEMENTARY_CHARACTER)) {
					if(c >= '\ud800' && c <= '\udbff') {
						// high surrogate pair
						int high = c;
						int low = chars[currentCharsOffset++];

						if(currentOffset >= stopBytes-3) {
							break; // not enough room to store 4 bytes
						}
						assert (low < '\udc00' || c > '\udfff');
						
						int codepoint = (high-0xd800) << 10;
						codepoint |= (low-0xdc00);
						codepoint += 0x10000;
						
						bytes[currentOffset++] = (byte)(0xf0 | (0x07 & (codepoint >> 18)));
						bytes[currentOffset++] = (byte)(0x80 | (0x3f & (codepoint >> 12)));
						bytes[currentOffset++] = (byte)(0x80 | (0x3f & (codepoint >> 6)));
						bytes[currentOffset++] = (byte)(0x80 | (0x3f & codepoint));
						continue;
					}
				}
				
				
				if(currentOffset >= stopBytes-2) {
					break; // not enough room to store 3 bytes
				}
			
				// from \u0800 through \uffff
				bytes[currentOffset++] = (byte)(0xe0 | (0x0f & (c >> 12)));
				bytes[currentOffset++] = (byte)(0x80 | (0x3f & (c >>  6)));
				bytes[currentOffset++] = (byte)(0x80 | (0x3f & c));
			}
		}

		charsOffset[0] = currentCharsOffset;
		return currentOffset-offset;
	}

}
