utf_converter Flat contracts

Automatic generation produced by ISE Eiffel


Classes
Clusters
Cluster hierarchy
Chart
Relations
Flat contracts
Go to: 
note
	description: "[
		Converter from/to UTF-8, UTF-16 and UTF-32 encodings.
		
		Handling of invalid encodings
		=============================
		
		Whenever a UTF-8 or UTF-16 sequence is decoded, the decoding routines also check
		that the sequence is valid. If it is not, it will replace the invalid unit (e.g. a byte
		for UTF-8 and a 2-byte for UTF-16 by the replacement character U+FFFD as described by
		variant #3 of the recommended practice for replacement character in Unicode (see
		http://www.unicode.org/review/pr-121.html for more details).
		
		However it means that you cannot roundtrip incorrectly encoded sequence back and forth
		between the encoded version and the decoded STRING_32 version. To allow roundtrip, an
		escaped representation of a bad encoded sequence has been introduced. It is adding a
		a fourth variant (which is a slight modification of variant #3) to the recommended
		practice where the replacement character is followed by the printed hexadecimal value
		of the invalid byte or the invalid 2-byte sequence.
		
		To provide an example (assuming that the Unicode character U+FFFD is represented as
		? textually):
		1 - on UNIX, any invalid UTF-8 byte sequence such as 0x8F 0x8F is encoded as the
		following Unicode sequence: U+FFFD U+0038 U+0046 U+FFFF U+0038 U+0046, and textually
		it looks like "?8F?8F".
		2 - on Windows, any invalid UTF-16 2-byte sequence such as 0xD800 0x0054 is encoded as the
		following Unicode sequence: U+FFFD U+0075 U+0044 U+0038 U+0030 U+0030 U+FFFD U+0035 U+0033,
		and textually it looks like "?uD800?54". The rule is that if the 2-byte sequence does not fit
		into 1 byte, it uses the letter u followed by the hexadecimal value of the 2-byte sequence,
		otherwise it simply uses the 1-byte hexadecimal representation.
	]"
	date: "$Date: 2021-06-18 09:01:52 -0800 (Fri, 18 Jun 2021) $"
	revision: "$Revision: 105548 $"

expanded class interface
	UTF_CONVERTER

create 
	default_create
			-- Process instances of classes with no creation clause.
			-- (Default: do nothing.)
			-- (from ANY)

feature -- Access

	Escape_character: CHARACTER_32 = '�'
			-- Unicode replacement character to escape invalid UTF-8 or UTF-16 encoding.
			-- UTF-8 encoding: 0xEF 0xBF 0xBD
			-- Binary UTF-8 encoding: 11101111 10111111 10111101
			-- UTF-16 encoding: 0xFFFD

	generating_type: TYPE [detachable UTF_CONVERTER]
			-- Type of current object
			-- (type of which it is a direct instance)
			-- (from ANY)
		ensure -- from ANY
			generating_type_not_void: Result /= Void

	generator: STRING_8
			-- Name of current object's generating class
			-- (base class of the type of which it is a direct instance)
			-- (from ANY)
		ensure -- from ANY
			generator_not_void: Result /= Void
			generator_not_empty: not Result.is_empty
	
feature -- Measurement

	utf_16_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of bytes necessary at the very least to encode in UTF-16 s.substring (start_pos, end_pos).
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
		ensure
			instance_free: class

	utf_16_characters_count_form_pointer (m: MANAGED_POINTER; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of characters of the UTF-16 encoded m starting at start_pos in m up to end_pos - 1.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 0
			end_position: start_pos <= end_pos + 2
			end_pos_small_enought: end_pos < m.count
			even_start_position: start_pos \\ 2 = 0
			even_end_position: end_pos \\ 2 = 0
		ensure
			instance_free: class

	utf_8_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of bytes necessary to encode in UTF-8 s.substring (start_pos, end_pos).
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
		ensure
			instance_free: class

	utf_8_to_string_32_count (s: SPECIAL [CHARACTER_8]; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Count of characters corresponding to UTF-8 sequence s.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < s.count
		ensure
			instance_free: class
	
feature -- Comparison

	frozen deep_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void
			-- or attached to isomorphic object structures?
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			shallow_implies_deep: standard_equal (a, b) implies Result
			both_or_none_void: (a = Void) implies (Result = (b = Void))
			same_type: (Result and (a /= Void)) implies (b /= Void and then a.same_type (b))
			symmetric: Result implies deep_equal (b, a)

	frozen equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached
			-- to objects considered equal?
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.is_equal (b))

	frozen is_deep_equal alias "≡≡≡" (other: UTF_CONVERTER): BOOLEAN
			-- Are Current and other attached to isomorphic object structures?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			shallow_implies_deep: standard_is_equal (other) implies Result
			same_type: Result implies same_type (other)
			symmetric: Result implies other.is_deep_equal (Current)

	is_equal (other: UTF_CONVERTER): BOOLEAN
			-- Is other attached to an object considered
			-- equal to current object?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			symmetric: Result implies other ~ Current
			consistent: standard_is_equal (other) implies Result

	frozen standard_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached to
			-- field-by-field identical objects of the same type?
			-- Always uses default object comparison criterion.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.standard_is_equal (b))

	frozen standard_is_equal alias "≜" (other: UTF_CONVERTER): BOOLEAN
			-- Is other attached to an object of the same type
			-- as current object, and field-by-field identical to it?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			same_type: Result implies same_type (other)
			symmetric: Result implies other.standard_is_equal (Current)
	
feature -- Status report

	conforms_to (other: ANY): BOOLEAN
			-- Does type of current object conform to type
			-- of other (as per Eiffel: The Language, chapter 13)?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void

	is_valid_utf_16 (s: SPECIAL [NATURAL_16]): BOOLEAN
			-- Is s a valid UTF-16 Unicode sequence?
		ensure
			instance_free: class

	is_valid_utf_16_subpointer (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): BOOLEAN
			-- Is p a valid UTF-16 Unicode sequence between code unit start_pos and end_pos?
			-- If a_stop_at_null we stop checking after finding a null character.
		ensure
			instance_free: class

	is_valid_utf_16le_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is s a valid UTF-16LE Unicode sequence?
		ensure
			instance_free: class

	is_valid_utf_8_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is s a valid UTF-8 Unicode sequence?
		ensure
			instance_free: class

	same_type (other: ANY): BOOLEAN
			-- Is type of current object identical to type of other?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			definition: Result = (conforms_to (other) and other.conforms_to (Current))
	
feature -- Duplication

	copy (other: UTF_CONVERTER)
			-- Update current object using fields of object attached
			-- to other, so as to yield equal objects.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		ensure -- from ANY
			is_equal: Current ~ other

	frozen deep_copy (other: UTF_CONVERTER)
			-- Effect equivalent to that of:
			--		`copy` (other . `deep_twin`)
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			deep_equal: deep_equal (Current, other)

	frozen deep_twin: UTF_CONVERTER
			-- New object structure recursively duplicated from Current.
			-- (from ANY)
		ensure -- from ANY
			deep_twin_not_void: Result /= Void
			deep_equal: deep_equal (Current, Result)

	frozen standard_copy (other: UTF_CONVERTER)
			-- Copy every field of other onto corresponding field
			-- of current object.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		ensure -- from ANY
			is_standard_equal: standard_is_equal (other)

	frozen standard_twin: UTF_CONVERTER
			-- New object field-by-field identical to other.
			-- Always uses default copying semantics.
			-- (from ANY)
		ensure -- from ANY
			standard_twin_not_void: Result /= Void
			equal: standard_equal (Result, Current)

	frozen twin: UTF_CONVERTER
			-- New object equal to Current
			-- `twin` calls `copy`; to change copying/twinning semantics, redefine `copy`.
			-- (from ANY)
		ensure -- from ANY
			twin_not_void: Result /= Void
			is_equal: Result ~ Current
	
feature -- Basic operations

	frozen default: detachable UTF_CONVERTER
			-- Default value of object's type
			-- (from ANY)

	frozen default_pointer: POINTER
			-- Default value of type POINTER
			-- (Avoid the need to write p.`default` for
			-- some p of type POINTER.)
			-- (from ANY)
		ensure -- from ANY
			instance_free: class

	default_rescue
			-- Process exception for routines with no Rescue clause.
			-- (Default: do nothing.)
			-- (from ANY)

	frozen do_nothing
			-- Execute a null action.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
	
feature -- Byte Order Mark (BOM)

	Utf_16be_bom_to_string_8: STRING_8 = "þÿ"
			-- UTF-16BE BOM sequence.

	Utf_16le_bom_to_string_8: STRING_8 = "ÿþ"
			-- UTF-16LE BOM sequence.

	Utf_32be_bom_to_string_8: STRING_8 = "%U%Uþÿ"
			-- UTF-32BE BOM sequence.

	Utf_32le_bom_to_string_8: STRING_8 = "ÿþ%U%U"
			-- UTF-32LE BOM sequence.

	Utf_8_bom_to_string_8: STRING_8 = "ï»¿"
			-- UTF-8 BOM sequence.
	
feature -- Output

	Io: STD_FILES
			-- Handle to standard file setup
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			io_not_void: Result /= Void

	out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		ensure -- from ANY
			out_not_void: Result /= Void

	print (o: detachable ANY)
			-- Write terse external representation of o
			-- on standard output.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class

	frozen tagged_out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		ensure -- from ANY
			tagged_out_not_void: Result /= Void
	
feature -- Platform

	Operating_environment: OPERATING_ENVIRONMENT
			-- Objects available from the operating system
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			operating_environment_not_void: Result /= Void
	
feature -- UTF-16 to UTF-32

	utf_16_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence p which is zero-terminated,
			-- where invalid UTF-16LE sequences are escaped, appended into a_result.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then ∀ c: l_utf.new_cursor.incremented (1) ¦
					(c.natural_32_code | (l_utf.code (@c.target_index + 1) |<< 8)) = 
-- Unable to show all postconditions!

			utf_16_0_pointer_into_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
					-- Copy {STRING_32} object corresponding to UTF-16 sequence p which is zero-terminated
					-- appended into a_result.
				require
					minimum_size: p.count >= 2
					valid_count: p.count \\ 2 = 0
				ensure
					instance_free: class
					roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies ∀ n: string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) ¦
							n = p.read_natural_16 (@n.target_index * 2)

			utf_16_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
					-- {STRING_32} object corresponding to UTF-16 sequence p which is zero-terminated,
					-- where invalid UTF-16LE sequences are escaped.
				require
					minimum_size: p.count >= 2
					valid_count: p.count \\ 2 = 0
				ensure
					instance_free: class
					roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then ∀ c: l_utf.new_cursor.incremented (1) ¦
							(c.natural_32_code | (l_utf.code (@c.target_index + 1) |<< 8)) = 
-- Unable to show all postconditions!

					utf_16_0_pointer_to_string_32 (p: MANAGED_POINTER): STRING_32
							-- {STRING_32} object corresponding to UTF-16 sequence p which is zero-terminated.
						require
							minimum_size: p.count >= 2
							valid_count: p.count \\ 2 = 0
						ensure
							instance_free: class
							roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies ∀ n: string_32_to_utf_16 (Result) ¦
									n = p.read_natural_16 ((@n.target_index + 1) * 2)

					utf_16_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
							-- Copy {STRING_32} object corresponding to UTF-16 sequence p between code units start_pos and
							-- end_pos or the first null character encountered if a_stop_at_null, where invalid
							-- UTF-16LE sequences are escaped, appended into a_result.
						require
							minimum_size: p.count >= 2
							start_position_big_enough: start_pos >= 0
							end_position_big_enough: start_pos <= end_pos + 1
							end_pos_small_enough: end_pos < p.count // 2
						ensure
							instance_free: class
							roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then ∀ c: l_utf.new_cursor.incremented (1) ¦
									(c.natural_32_code | (l_utf.code (@c.target_index + 1) |<< 8)) = 
-- Unable to show all postconditions!

							utf_16_0_subpointer_into_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
									-- Copy {STRING_32} object corresponding to UTF-16 sequence p between code units start_pos and
									-- end_pos or the first null character encountered if a_stop_at_null appended into a_result.
								require
									minimum_size: p.count >= 2
									start_position_big_enough: start_pos >= 0
									end_position_big_enough: start_pos <= end_pos + 1
									end_pos_small_enough: end_pos < p.count // 2
								ensure
									instance_free: class
									roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies ∀ x: string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) ¦
											x = p.read_natural_16 (@x.target_index * 2)

							utf_16_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
									-- {STRING_32} object corresponding to UTF-16 sequence p between code units start_pos and
									-- end_pos or the first null character encountered if a_stop_at_null, where invalid
									-- UTF-16LE sequences are escaped.
								require
									minimum_size: p.count >= 2
									start_position_big_enough: start_pos >= 0
									end_position_big_enough: start_pos <= end_pos + 1
									end_pos_small_enough: end_pos < p.count // 2
								ensure
									instance_free: class
									roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then ∀ c: l_utf.new_cursor.incremented (1) ¦
											(c.natural_32_code | (l_utf.code (@c.target_index + 1) |<< 8)) = 
-- Unable to show all postconditions!

									utf_16_0_subpointer_to_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
											-- {STRING_32} object corresponding to UTF-16 sequence p between code units start_pos and
											-- end_pos or the first null character encountered if a_stop_at_null.
										require
											minimum_size: p.count >= 2
											start_position_big_enough: start_pos >= 0
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos < p.count // 2
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies ∀ n: string_32_to_utf_16 (Result) ¦
													n = p.read_natural_16 (@n.target_index * 2)

									utf_16_into_string_32 (s: SPECIAL [NATURAL_16]; a_result: STRING_32)
											-- Copy {STRING_32} object corresponding to UTF-16 sequence s
											-- appended into a_result.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)).is_equal (s)

									utf_16_to_string_32 (s: SPECIAL [NATURAL_16]): STRING_32
											-- {STRING_32} object corresponding to UTF-16 sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (Result).is_equal (s)

									utf_16le_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
											-- Copy {STRING_32} object corresponding to UTF-16LE sequence s, where invalid UTF-16LE
											-- sequences are escaped, appended into a_result.
										ensure
											instance_free: class
											roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

									utf_16le_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
											-- Copy {STRING_32} object corresponding to UTF-16LE sequence s appended into a_result.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

									utf_16le_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
											-- {STRING_32} object corresponding to UTF-16LE sequence s, where invalid UTF-16LE
											-- sequences are escaped.
										ensure
											instance_free: class
											roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)

									utf_16le_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
											-- {STRING_32} object corresponding to UTF-16LE sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)
									
								feature -- UTF-16 to UTF-8
								
									utf_16_into_utf_8_string_8 (s: SPECIAL [NATURAL_16]; a_result: STRING_8)
											-- Copy UTF-8 sequence corresponding to UTF-16 sequence s appended into a_result.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).is_equal (s)

									utf_16_to_utf_8_string_8 (s: SPECIAL [NATURAL_16]): STRING_8
											-- UTF-8 sequence corresponding to UTF-16 sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (Result)).is_equal (s)

									utf_16le_string_8_into_utf_8_string_8 (s: READABLE_STRING_8; a_result: STRING_8)
											-- Copy UTF-8 sequence corresponding to UTF-16LE sequence s appended into a_result.
										require
											even_count: (s.count & 1) = 0
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).same_string (s)

									utf_16le_string_8_to_utf_8_string_8 (s: READABLE_STRING_8): STRING_8
											-- UTF-8 sequence corresponding to UTF-16LE sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (Result)).same_string (s)
									
								feature -- UTF-32 to UTF-16
								
									escaped_utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
											-- Copy UTF-16LE sequence corresponding to s interpreted as a UTF-32 sequence that could be
											-- escaped appended into a_result.
											-- If s contains the `escape_character` followed by either "HH" or "uHHHH" where H stands
											-- for an hexadecimal digit, then s has been escaped and will be converted to what is
											-- expected by the current platform.
											-- Otherwise it will be ignored and it will be left as is.
											-- See the note clause for the class for more details on the encoding.
										ensure
											instance_free: class
											roundtrip: utf_16le_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

									escaped_utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
											-- UTF-16LE sequence corresponding to s interpreted as a UTF-32 sequence that could be escaped.
											-- If s contains the `escape_character` followed by either "HH" or "uHHHH" where H stands
											-- for an hexadecimal digit, then s has been escaped and will be converted to what is
											-- expected by the current platform.
											-- Otherwise it will be ignored and it will be left as is.
											-- See the note clause for the class for more details on the encoding.
										ensure
											instance_free: class
											roundtrip: utf_16le_string_8_to_escaped_string_32 (Result).same_string_general (s)

									escaped_utf_32_substring_into_utf_16_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-16 sequence corresponding to the substring of s,
											-- interpreted as a UTF-32 sequence, starting at index start_pos
											-- and ending at index end_pos to address p + p_offset and update the
											-- size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is not zero-terminated.
										require
											start_position_big_enough: start_pos >= 1
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos <= s.count
											even_p_offset: (p_offset \\ 2) = 0
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											p_count_may_increase: p.count >= old p.count
											roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s.substring (start_pos, end_pos))
											roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s.substring (start_pos, end_pos))

									string_32_into_utf_16_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-16 sequence corresponding to s with terminating zero
											-- to address p + p_offset and update the size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is zero-terminated.
										require
											even_p_offset: (p_offset \\ 2) = 0
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)

									string_32_into_utf_16_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-16 sequence corresponding to s to address p + p_offset
											-- and update the size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is not zero-terminated.
										require
											even_p_offset: (p_offset \\ 2) = 0
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)

									string_32_to_utf_16 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to s.
											-- The sequence is not zero-terminated.
										ensure
											instance_free: class
											roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then ∀ n: Result ¦
													n.to_natural_32 = (l_ref.code (@n.target_index * 2 + 1) | (l_ref.code ((.target_index + 1) * 2) |<< 16))

									string_32_to_utf_16_0 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to s with terminating zero.
										ensure
											instance_free: class
											roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then ∀ n: Result.resized_area_with_default (0, Result.count - 1) ¦
													n.to_natural_32 = (l_ref.code (@n.target_index * 2 + 1) | ((l_ref.code ((.target_index + 1) * 2)) |<< 8))

									utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
											-- Copy UTF-16LE sequence corresponding to s interpreted as a UTF-32 sequence
											-- appended into a_result.
										ensure
											instance_free: class
											roundtrip: utf_16le_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

									utf_32_string_to_utf_16 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to s interpreted as a UTF-32 sequence.
											-- The sequence is not zero-terminated.
										ensure
											instance_free: class
											roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then ∀ n: Result ¦
													n.to_natural_32 = (l_ref.code (@n.target_index * 2 + 1) | (l_ref.code ((.target_index + 1) * 2) |<< 8))

									utf_32_string_to_utf_16_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to s, interpreted as a UTF-32 sequence,
											-- with terminating zero.
										ensure
											instance_free: class
											roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then ∀ x: Result.resized_area_with_default (0, Result.count - 1) ¦
													x.to_natural_32 = (l_ref.code (@x.target_index * 2 + 1) | ((l_ref.code ((.target_index + 1) * 2)) |<< 8))

									utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
											-- UTF-16LE sequence corresponding to s interpreted as a UTF-32 sequence
										ensure
											instance_free: class
											roundtrip: utf_16le_string_8_to_string_32 (Result).same_string_general (s)

									utf_32_substring_into_utf_16_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-16 sequence corresponding to the substring of s,
											-- interpreted as a UTF-32 sequence, starting at index start_pos
											-- and ending at index end_pos to address p + p_offset and update the
											-- size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is zero-terminated.
										require
											start_position_big_enough: start_pos >= 1
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos <= s.count
											even_p_offset: (p_offset \\ 2) = 0
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											p_count_may_increase: p.count >= old p.count
											roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)

									utf_32_substring_into_utf_16_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-16 sequence corresponding to the substring of s,
											-- interpreted as a UTF-32 sequence, starting at index start_pos
											-- and ending at index end_pos to address p + p_offset and update the
											-- size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is not zero-terminated.
										require
											start_position_big_enough: start_pos >= 1
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos <= s.count
											even_p_offset: (p_offset \\ 2) = 0
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											p_count_may_increase: p.count >= old p.count
											roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)
									
								feature -- UTF-32 to UTF-8
								
									escaped_utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
											-- Copy the UTF-8 sequence corresponding to s interpreted as a UTF-32 sequence that could
											-- be escaped appended into a_result.
											-- If s contains the `escape_character` followed by either "HH" or "uHHHH" where H stands
											-- for an hexadecimal digit, then s has been escaped and will be converted to what is
											-- expected by the current platform.
											-- Otherwise it will be ignored and it will be left as is.
											-- See the note clause for the class for more details on the encoding.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

									escaped_utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
											-- UTF-8 sequence corresponding to s interpreted as a UTF-32 sequence that could be escaped.
											-- If s contains the `escape_character` followed by either "HH" or "uHHHH" where H stands
											-- for an hexadecimal digit, then s has been escaped and will be converted to what is
											-- expected by the current platform.
											-- Otherwise it will be ignored and it will be left as is.
											-- See the note clause for the class for more details on the encoding.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_escaped_string_32 (Result).same_string_general (s)

									escaped_utf_32_substring_into_utf_8_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-8 sequence corresponding to s, interpreted as a UTF-32 sequence that could
											-- be escaped, with terminating zero to address p + p_offset and update the size of p to the
											-- number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is zero-terminated.
											-- If s contains the `escape_character` followed by either "HH" or "uHHHH" where H stands
											-- for an hexadecimal digit, then s has been escaped and will be converted to what is
											-- expected by the current platform.
											-- Otherwise it will be ignored and it will be left as is.
											-- See the note clause for the class for more details on the encoding.
										require
											start_position_big_enough: start_pos >= 1
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos <= s.count
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s.substring (start_pos, end_pos))
											roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s.substring (start_pos, end_pos))

									string_32_into_utf_8_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-8 sequence corresponding to s with terminating zero
											-- to address p + p_offset and update the size of p to the number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is zero-terminated.
										require
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)

									string_32_into_utf_8_string_8 (s: READABLE_STRING_32; a_result: STRING_8)
											-- Copy the UTF-8 sequence corresponding to s appended into a_result.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

									string_32_to_utf_8_string_8 (s: READABLE_STRING_32): STRING_8
											-- UTF-8 sequence corresponding to s.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_string_32 (Result).same_string (s)

									utf_32_code_into_utf_8_string_8 (c: NATURAL_32; a_result: STRING_8)
											-- Copy the UTF-8 sequence corresponding to code c appended into a_result.
										ensure
											instance_free: class

									utf_32_string_into_utf_8_0_pointer (s: READABLE_STRING_GENERAL; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
											-- Write UTF-8 sequence corresponding to s, interpreted as a UTF-32 sequence,
											-- with terminating zero to address p + p_offset and update the size of p to the
											-- number of written bytes.
											-- If a_new_upper is provided, the upper index of p containing the zero-termination
											-- is written to a_new_upper.
											-- The sequence is zero-terminated.
										require
											p_offset_non_negative: p_offset >= 0
										ensure
											instance_free: class
											roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s)
											roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)

									utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
											-- Copy the UTF-8 sequence corresponding to s interpreted as a UTF-32 sequence
											-- appended into a_result.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

									utf_32_string_to_utf_8 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
											-- UTF-8 sequence corresponding to s, interpreted as a UTF-32 sequence.
											-- The sequence is not zero-terminated.
										ensure
											instance_free: class
											roundtrip: attached utf_32_string_to_utf_8_string_8 (s) as l_ref and then ∀ n: Result ¦
													n.to_natural_32 = l_ref.code (@n.target_index + 1)

									utf_32_string_to_utf_8_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
											-- UTF-8 sequence corresponding to s, interpreted as a UTF-32 sequence.
											-- The sequence is zero-terminated.
										ensure
											instance_free: class
											attached_utf_8_string: attached utf_32_string_to_utf_8_string_8 (s) as l_ref
											count: Result.count = l_ref.count + 1
											roundtrip: ∀ x: l_ref ¦
													x = Result [@x.target_index - 1].to_character_8
											zero_terminated: Result [Result.upper] = 0

									utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
											-- UTF-8 sequence corresponding to s interpreted as a UTF-32 sequence.
										ensure
											instance_free: class
											roundtrip: utf_8_string_8_to_string_32 (Result).same_string_general (s)
									
								feature -- UTF-8 to UTF-16
								
									utf_8_string_8_to_utf_16 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to UTF-8 sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)

									utf_8_string_8_to_utf_16_0 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
											-- UTF-16 sequence corresponding to UTF-8 sequence s with terminating zero.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)
									
								feature -- UTF-8 to UTF-32
								
									utf_8_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
											-- Copy {STRING_32} object corresponding to UTF-8 sequence p which is zero-terminated,
											-- where invalid UTF-8 sequences are escaped, appended into a_result.
										ensure
											instance_free: class
											roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as s and then ∀ c: s ¦
													c = p.read_natural_8 (@c.target_index - 1).to_character_8

									utf_8_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
											-- {STRING_32} object corresponding to UTF-8 sequence p which is zero-terminated,
											-- where invalid UTF-8 sequences are escaped.
										ensure
											instance_free: class
											roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as s and then ∀ c: s ¦
													c = p.read_natural_8 (@c.target_index - 1).to_character_8

									utf_8_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
											-- Copy {STRING_32} object corresponding to UTF-8 sequence p between indexes start_pos and
											-- end_pos or the first null character encountered if a_stop_at_null, where invalid
											-- UTF-8 sequences are escaped, appended into a_result.
										require
											start_position_big_enough: start_pos >= 0
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos < p.count
										ensure
											instance_free: class
											roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as s and then ∀ c: s ¦
													c = p.read_natural_8 (start_pos + .target_index - 1).to_character_8

									utf_8_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
											-- {STRING_32} object corresponding to UTF-8 sequence p between indexes start_pos and
											-- end_pos or the first null character encountered if a_stop_at_null, where invalid
											-- UTF-8 sequences are escaped.
										require
											start_position_big_enough: start_pos >= 0
											end_position_big_enough: start_pos <= end_pos + 1
											end_pos_small_enough: end_pos < p.count
										ensure
											instance_free: class
											roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as s and then ∀ c: s ¦
													c = p.read_natural_8 (start_pos + .target_index - 1).to_character_8

									utf_8_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
											-- Copy STRING_32 corresponding to UTF-8 sequence s, where invalid UTF-8 sequences are escaped,
											-- appended into a_result.
										ensure
											instance_free: class
											roundtrip: escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

									utf_8_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
											-- Copy STRING_32 corresponding to UTF-8 sequence s appended into a_result.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

									utf_8_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
											-- STRING_32 corresponding to UTF-8 sequence s, where invalid UTF-8 sequences are escaped.
										ensure
											instance_free: class
											roundtrip: escaped_utf_32_string_to_utf_8_string_8 (Result).same_string (s)

									utf_8_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
											-- STRING_32 corresponding to UTF-8 sequence s.
										ensure
											instance_free: class
											roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (Result).same_string (s)
									
								invariant
										-- from ANY
									reflexive_equality: standard_is_equal (Current)
									reflexive_conformance: conforms_to (Current)

								note
									ca_ignore: "CA011", "CA011: too many arguments"
									copyright: "Copyright (c) 1984-2021, Eiffel Software and others"
									license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
									source: "[
										Eiffel Software
										5949 Hollister Ave., Goleta, CA 93117 USA
										Telephone 805-685-1006, Fax 805-685-6869
										Website http://www.eiffel.com
										Customer support http://support.eiffel.com
									]"

								end -- class UTF_CONVERTER

Classes
Clusters
Cluster hierarchy
Chart
Relations
Flat contracts
Go to:
-- Generated by Eiffel Studio --
For more details: eiffel.org