/*
 * Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
 * Copyright (C) 2015-2020 MicroEJ Corp. - EDC compliance and optimizations.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package java.net;

import java.io.UnsupportedEncodingException;
import java.util.BitSet;

/**
 * Utility class for HTML form encoding. This class contains static methods for converting a String to the
 * <CODE>application/x-www-form-urlencoded</CODE> MIME format. For more information about HTML form encoding, consult
 * the HTML <A HREF="http://www.w3.org/TR/html4/">specification</A>.
 *
 * <p>
 * When encoding a String, the following rules apply:
 *
 * <ul>
 * <li>The alphanumeric characters &quot;{@code a}&quot; through &quot;{@code z}&quot;, &quot;{@code A}&quot; through
 * &quot;{@code Z}&quot; and &quot;{@code 0}&quot; through &quot;{@code 9}&quot; remain the same.
 * <li>The special characters &quot;{@code .}&quot;, &quot;{@code -}&quot;, &quot;{@code *}&quot;, and &quot;{@code _}
 * &quot; remain the same.
 * <li>The space character &quot; &nbsp; &quot; is converted into a plus sign &quot;{@code +}&quot;.
 * <li>All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then
 * each byte is represented by the 3-character string &quot;<i>{@code %xy}</i>&quot;, where <i>xy</i> is the two-digit
 * hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility
 * reasons, if an encoding is not specified, then the default encoding of the platform is used.
 * </ul>
 *
 * <p>
 * For example using UTF-8 as the encoding scheme the string &quot;The string &#252;@foo-bar&quot; would get converted
 * to &quot;The+string+%C3%BC%40foo-bar&quot; because in UTF-8 the character &#252; is encoded as two bytes C3 (hex) and
 * BC (hex), and the character @ is encoded as one byte 40 (hex).
 *
 * @author Herb Jellinek
 * @since JDK1.0
 */
public class URLEncoder {
	static BitSet dontNeedEncoding;
	static final int caseDiff = ('a' - 'A');

	static {

		/*
		 * The list of characters that are not encoded has been determined as follows:
		 *
		 * RFC 2396 states: ----- Data characters that are allowed in a URI but do not have a reserved purpose are
		 * called unreserved. These include upper and lower case letters, decimal digits, and a limited set of
		 * punctuation marks and symbols.
		 *
		 * unreserved = alphanum | mark
		 *
		 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
		 *
		 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done
		 * unless the URI is being used in a context that does not allow the unescaped character to appear. -----
		 *
		 * It appears that both Netscape and Internet Explorer escape all special characters from this list with the
		 * exception of "-", "_", ".", "*". While it is not clear why they are escaping the other characters, perhaps it
		 * is safest to assume that there might be contexts in which the others are unsafe if not escaped. Therefore, we
		 * will use the same list. It is also noteworthy that this is consistent with O'Reilly's
		 * "HTML: The Definitive Guide" (page 164).
		 *
		 * As a last note, Intenet Explorer does not encode the "@" character which is clearly not unreserved according
		 * to the RFC. We are being consistent with the RFC in this matter, as is Netscape.
		 */

		dontNeedEncoding = new BitSet(256);
		int i;
		for (i = 'a'; i <= 'z'; i++) {
			dontNeedEncoding.set(i);
		}
		for (i = 'A'; i <= 'Z'; i++) {
			dontNeedEncoding.set(i);
		}
		for (i = '0'; i <= '9'; i++) {
			dontNeedEncoding.set(i);
		}
		dontNeedEncoding.set(' '); /*
									 * encoding a space to a + is done in the encode() method
									 */
		dontNeedEncoding.set('-');
		dontNeedEncoding.set('_');
		dontNeedEncoding.set('.');
		dontNeedEncoding.set('*');
	}

	/**
	 * You can't call the constructor.
	 */
	private URLEncoder() {
	}

	/**
	 * Translates a string into {@code application/x-www-form-urlencoded} format using a specific encoding scheme. This
	 * method uses the supplied encoding scheme to obtain the bytes for unsafe characters.
	 * <p>
	 * <em><strong>Note:</strong> The <a href= "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> World
	 * Wide Web Consortium Recommendation</a> states that UTF-8 should be used. Not doing so may introduce
	 * incompatibilities.</em>
	 *
	 * @param s
	 *            {@code String} to be translated.
	 * @param enc
	 *            The name of a supported <a href="../lang/package-summary.html#charenc">character encoding</a>.
	 * @return the translated {@code String}.
	 * @exception UnsupportedEncodingException
	 *                If the named encoding is not supported
	 * @see URLDecoder#decode(java.lang.String, java.lang.String)
	 * @since 1.4
	 */
	public static String encode(String s, String enc) throws UnsupportedEncodingException {
		boolean needToChange = false;
		StringBuilder out = new StringBuilder(s.length());
		StringBuilder needEncoding = new StringBuilder();

		if (enc == null) {
			throw new NullPointerException("charsetName");
		}

		for (int i = 0; i < s.length();) {
			int c = s.charAt(i);
			if (dontNeedEncoding.get(c)) {
				if (c == ' ') {
					c = '+';
					needToChange = true;
				}
				out.append((char) c);
				i++;
			} else {
				// convert to external encoding before hex conversion
				do {
					needEncoding.append((char) c);
					/*
					 * If this character represents the start of a Unicode surrogate pair, then pass in two characters.
					 * It's not clear what should be done if a bytes reserved in the surrogate pairs range occurs
					 * outside of a legal surrogate pair. For now, just treat it as if it were any other character.
					 */
					if (c >= 0xD800 && c <= 0xDBFF) {
						throw new RuntimeException("Surrogate pair not supported");
					}
					i++;
				} while (i < s.length() && !dontNeedEncoding.get((c = s.charAt(i))));

				String str = needEncoding.toString();
				byte[] ba = str.getBytes(enc);
				for (int j = 0; j < ba.length; j++) {
					out.append('%');
					char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16);
					// converting to use uppercase letter as part of
					// the hex value if ch is a letter.
					if (Character_isLetter(ch)) {
						ch -= caseDiff;
					}
					out.append(ch);
					ch = Character.forDigit(ba[j] & 0xF, 16);
					if (Character_isLetter(ch)) {
						ch -= caseDiff;
					}
					out.append(ch);
				}
				needEncoding.setLength(0);
				needToChange = true;
			}
		}

		return (needToChange ? out.toString() : s);
	}

	// Approximation of Character.isLetter.
	private static boolean Character_isLetter(char character) {
		return Character.isLowerCase(character) || Character.isUpperCase(character);
	}
}
