|  | @@ -0,0 +1,521 @@
 | 
	
		
			
				|  |  | +// Copyright 2005-2008 Google Inc. All Rights Reserved.
 | 
	
		
			
				|  |  | +// Author: jrm@google.com (Jim Meehan)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#include <google/protobuf/stubs/common.h>
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +namespace google {
 | 
	
		
			
				|  |  | +namespace protobuf {
 | 
	
		
			
				|  |  | +namespace internal {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// These four-byte entries compactly encode how many bytes 0..255 to delete
 | 
	
		
			
				|  |  | +// in making a string replacement, how many bytes to add 0..255, and the offset
 | 
	
		
			
				|  |  | +// 0..64k-1 of the replacement string in remap_string.
 | 
	
		
			
				|  |  | +struct RemapEntry {
 | 
	
		
			
				|  |  | +  uint8 delete_bytes;
 | 
	
		
			
				|  |  | +  uint8 add_bytes;
 | 
	
		
			
				|  |  | +  uint16 bytes_offset;
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Exit type codes for state tables. All but the first get stuffed into
 | 
	
		
			
				|  |  | +// signed one-byte entries. The first is only generated by executable code.
 | 
	
		
			
				|  |  | +// To distinguish from next-state entries, these must be contiguous and
 | 
	
		
			
				|  |  | +// all <= kExitNone
 | 
	
		
			
				|  |  | +typedef enum {
 | 
	
		
			
				|  |  | +  kExitDstSpaceFull = 239,
 | 
	
		
			
				|  |  | +  kExitIllegalStructure,  // 240
 | 
	
		
			
				|  |  | +  kExitOK,                // 241
 | 
	
		
			
				|  |  | +  kExitReject,            // ...
 | 
	
		
			
				|  |  | +  kExitReplace1,
 | 
	
		
			
				|  |  | +  kExitReplace2,
 | 
	
		
			
				|  |  | +  kExitReplace3,
 | 
	
		
			
				|  |  | +  kExitReplace21,
 | 
	
		
			
				|  |  | +  kExitReplace31,
 | 
	
		
			
				|  |  | +  kExitReplace32,
 | 
	
		
			
				|  |  | +  kExitReplaceOffset1,
 | 
	
		
			
				|  |  | +  kExitReplaceOffset2,
 | 
	
		
			
				|  |  | +  kExitReplace1S0,
 | 
	
		
			
				|  |  | +  kExitSpecial,
 | 
	
		
			
				|  |  | +  kExitDoAgain,
 | 
	
		
			
				|  |  | +  kExitRejectAlt,
 | 
	
		
			
				|  |  | +  kExitNone               // 255
 | 
	
		
			
				|  |  | +} ExitReason;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// This struct represents one entire state table. The three initialized byte
 | 
	
		
			
				|  |  | +// areas are state_table, remap_base, and remap_string. state0 and state0_size
 | 
	
		
			
				|  |  | +// give the byte offset and length within state_table of the initial state --
 | 
	
		
			
				|  |  | +// table lookups are expected to start and end in this state, but for
 | 
	
		
			
				|  |  | +// truncated UTF-8 strings, may end in a different state. These allow a quick
 | 
	
		
			
				|  |  | +// test for that condition. entry_shift is 8 for tables subscripted by a full
 | 
	
		
			
				|  |  | +// byte value and 6 for space-optimized tables subscripted by only six
 | 
	
		
			
				|  |  | +// significant bits in UTF-8 continuation bytes.
 | 
	
		
			
				|  |  | +typedef struct {
 | 
	
		
			
				|  |  | +  const uint32 state0;
 | 
	
		
			
				|  |  | +  const uint32 state0_size;
 | 
	
		
			
				|  |  | +  const uint32 total_size;
 | 
	
		
			
				|  |  | +  const int max_expand;
 | 
	
		
			
				|  |  | +  const int entry_shift;
 | 
	
		
			
				|  |  | +  const int bytes_per_entry;
 | 
	
		
			
				|  |  | +  const uint32 losub;
 | 
	
		
			
				|  |  | +  const uint32 hiadd;
 | 
	
		
			
				|  |  | +  const uint8* state_table;
 | 
	
		
			
				|  |  | +  const RemapEntry* remap_base;
 | 
	
		
			
				|  |  | +  const uint8* remap_string;
 | 
	
		
			
				|  |  | +  const uint8* fast_state;
 | 
	
		
			
				|  |  | +} UTF8StateMachineObj;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +typedef UTF8StateMachineObj UTF8ScanObj;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#define X__ (kExitIllegalStructure)
 | 
	
		
			
				|  |  | +#define RJ_ (kExitReject)
 | 
	
		
			
				|  |  | +#define S1_ (kExitReplace1)
 | 
	
		
			
				|  |  | +#define S2_ (kExitReplace2)
 | 
	
		
			
				|  |  | +#define S3_ (kExitReplace3)
 | 
	
		
			
				|  |  | +#define S21 (kExitReplace21)
 | 
	
		
			
				|  |  | +#define S31 (kExitReplace31)
 | 
	
		
			
				|  |  | +#define S32 (kExitReplace32)
 | 
	
		
			
				|  |  | +#define T1_ (kExitReplaceOffset1)
 | 
	
		
			
				|  |  | +#define T2_ (kExitReplaceOffset2)
 | 
	
		
			
				|  |  | +#define S11 (kExitReplace1S0)
 | 
	
		
			
				|  |  | +#define SP_ (kExitSpecial)
 | 
	
		
			
				|  |  | +#define D__ (kExitDoAgain)
 | 
	
		
			
				|  |  | +#define RJA (kExitRejectAlt)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +//  Entire table has 9 state blocks of 256 entries each
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_STATE0 = 0;     // state[0]
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256;  // =[1]
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
 | 
	
		
			
				|  |  | +static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static const uint8 utf8acceptnonsurrogates[] = {
 | 
	
		
			
				|  |  | +// state[0] 0x000000 Byte 1
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  2,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   7,   3,   3,
 | 
	
		
			
				|  |  | +  4,   5,   5,   5,   6, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[1] 0x000080 Byte 2 of 2
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[2] 0x000000 Byte 2 of 3
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[3] 0x001000 Byte 2 of 3
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[4] 0x000000 Byte 2 of 4
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[5] 0x040000 Byte 2 of 4
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[6] 0x100000 Byte 2 of 4
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[7] 0x00d000 Byte 2 of 3
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
 | 
	
		
			
				|  |  | +  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
 | 
	
		
			
				|  |  | +  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// state[8] 0x00d800 Byte 3 of 3
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
 | 
	
		
			
				|  |  | +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
 | 
	
		
			
				|  |  | +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
 | 
	
		
			
				|  |  | +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Remap base[0] = (del, add, string_offset)
 | 
	
		
			
				|  |  | +static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
 | 
	
		
			
				|  |  | +{0, 0, 0} };
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Remap string[0]
 | 
	
		
			
				|  |  | +static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
 | 
	
		
			
				|  |  | +0 };
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static const unsigned char utf8acceptnonsurrogates_fast[256] = {
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_STATE0,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_STATE0_SIZE,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_TOTAL_SIZE,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_MAX_EXPAND_X4,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_SHIFT,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_BYTES,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_LOSUB,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_HIADD,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_remap_base,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_remap_string,
 | 
	
		
			
				|  |  | +  utf8acceptnonsurrogates_fast
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#undef X__
 | 
	
		
			
				|  |  | +#undef RJ_
 | 
	
		
			
				|  |  | +#undef S1_
 | 
	
		
			
				|  |  | +#undef S2_
 | 
	
		
			
				|  |  | +#undef S3_
 | 
	
		
			
				|  |  | +#undef S21
 | 
	
		
			
				|  |  | +#undef S31
 | 
	
		
			
				|  |  | +#undef S32
 | 
	
		
			
				|  |  | +#undef T1_
 | 
	
		
			
				|  |  | +#undef T2_
 | 
	
		
			
				|  |  | +#undef S11
 | 
	
		
			
				|  |  | +#undef SP_
 | 
	
		
			
				|  |  | +#undef D__
 | 
	
		
			
				|  |  | +#undef RJA
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Return true if current Tbl pointer is within state0 range
 | 
	
		
			
				|  |  | +// Note that unsigned compare checks both ends of range simultaneously
 | 
	
		
			
				|  |  | +static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
 | 
	
		
			
				|  |  | +  const uint8* Tbl0 = &st->state_table[st->state0];
 | 
	
		
			
				|  |  | +  return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Scan a UTF-8 string based on state table.
 | 
	
		
			
				|  |  | +// Always scan complete UTF-8 characters
 | 
	
		
			
				|  |  | +// Set number of bytes scanned. Return reason for exiting
 | 
	
		
			
				|  |  | +int UTF8GenericScan(const UTF8ScanObj* st,
 | 
	
		
			
				|  |  | +                    const char * str,
 | 
	
		
			
				|  |  | +                    int str_length,
 | 
	
		
			
				|  |  | +                    int* bytes_consumed) {
 | 
	
		
			
				|  |  | +  *bytes_consumed = 0;
 | 
	
		
			
				|  |  | +  if (str_length == 0) return kExitOK;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  int eshift = st->entry_shift;
 | 
	
		
			
				|  |  | +  const uint8* isrc = reinterpret_cast<const uint8*>(str);
 | 
	
		
			
				|  |  | +  const uint8* src = isrc;
 | 
	
		
			
				|  |  | +  const uint8* srclimit = isrc + str_length;
 | 
	
		
			
				|  |  | +  const uint8* srclimit8 = srclimit - 7;
 | 
	
		
			
				|  |  | +  const uint8* Tbl_0 = &st->state_table[st->state0];
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | + DoAgain:
 | 
	
		
			
				|  |  | +  // Do state-table scan
 | 
	
		
			
				|  |  | +  int e = 0;
 | 
	
		
			
				|  |  | +  uint8 c;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  // Do fast for groups of 8 identity bytes.
 | 
	
		
			
				|  |  | +  // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
 | 
	
		
			
				|  |  | +  // including slowing slightly on cr/lf/ht
 | 
	
		
			
				|  |  | +  //----------------------------
 | 
	
		
			
				|  |  | +  const uint8* Tbl2 = &st->fast_state[0];
 | 
	
		
			
				|  |  | +  uint32 losub = st->losub;
 | 
	
		
			
				|  |  | +  uint32 hiadd = st->hiadd;
 | 
	
		
			
				|  |  | +  while (src < srclimit8) {
 | 
	
		
			
				|  |  | +    uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
 | 
	
		
			
				|  |  | +    uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
 | 
	
		
			
				|  |  | +    src += 8;
 | 
	
		
			
				|  |  | +    // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
 | 
	
		
			
				|  |  | +    uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
 | 
	
		
			
				|  |  | +                  (s4567 - losub) | (s4567 + hiadd);
 | 
	
		
			
				|  |  | +    if ((temp & 0x80808080) != 0) {
 | 
	
		
			
				|  |  | +      // We typically end up here on cr/lf/ht; src was incremented
 | 
	
		
			
				|  |  | +      int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
 | 
	
		
			
				|  |  | +                  (Tbl2[src[-6]] | Tbl2[src[-5]]);
 | 
	
		
			
				|  |  | +      if (e0123 != 0) {
 | 
	
		
			
				|  |  | +        src -= 8;
 | 
	
		
			
				|  |  | +        break;
 | 
	
		
			
				|  |  | +      }    // Exit on Non-interchange
 | 
	
		
			
				|  |  | +      e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
 | 
	
		
			
				|  |  | +              (Tbl2[src[-2]] | Tbl2[src[-1]]);
 | 
	
		
			
				|  |  | +      if (e0123 != 0) {
 | 
	
		
			
				|  |  | +        src -= 4;
 | 
	
		
			
				|  |  | +        break;
 | 
	
		
			
				|  |  | +      }    // Exit on Non-interchange
 | 
	
		
			
				|  |  | +      // Else OK, go around again
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  //----------------------------
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  // Byte-at-a-time scan
 | 
	
		
			
				|  |  | +  //----------------------------
 | 
	
		
			
				|  |  | +  const uint8* Tbl = Tbl_0;
 | 
	
		
			
				|  |  | +  while (src < srclimit) {
 | 
	
		
			
				|  |  | +    c = *src;
 | 
	
		
			
				|  |  | +    e = Tbl[c];
 | 
	
		
			
				|  |  | +    src++;
 | 
	
		
			
				|  |  | +    if (e >= kExitIllegalStructure) {break;}
 | 
	
		
			
				|  |  | +    Tbl = &Tbl_0[e << eshift];
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +  //----------------------------
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  // Exit posibilities:
 | 
	
		
			
				|  |  | +  //  Some exit code, !state0, back up over last char
 | 
	
		
			
				|  |  | +  //  Some exit code, state0, back up one byte exactly
 | 
	
		
			
				|  |  | +  //  source consumed, !state0, back up over partial char
 | 
	
		
			
				|  |  | +  //  source consumed, state0, exit OK
 | 
	
		
			
				|  |  | +  // For illegal byte in state0, avoid backup up over PREVIOUS char
 | 
	
		
			
				|  |  | +  // For truncated last char, back up to beginning of it
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  if (e >= kExitIllegalStructure) {
 | 
	
		
			
				|  |  | +    // Back up over exactly one byte of rejected/illegal UTF-8 character
 | 
	
		
			
				|  |  | +    src--;
 | 
	
		
			
				|  |  | +    // Back up more if needed
 | 
	
		
			
				|  |  | +    if (!InStateZero(st, Tbl)) {
 | 
	
		
			
				|  |  | +      do {
 | 
	
		
			
				|  |  | +        src--;
 | 
	
		
			
				|  |  | +      } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  } else if (!InStateZero(st, Tbl)) {
 | 
	
		
			
				|  |  | +    // Back up over truncated UTF-8 character
 | 
	
		
			
				|  |  | +    e = kExitIllegalStructure;
 | 
	
		
			
				|  |  | +    do {
 | 
	
		
			
				|  |  | +      src--;
 | 
	
		
			
				|  |  | +    } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
 | 
	
		
			
				|  |  | +  } else {
 | 
	
		
			
				|  |  | +    // Normal termination, source fully consumed
 | 
	
		
			
				|  |  | +    e = kExitOK;
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  if (e == kExitDoAgain) {
 | 
	
		
			
				|  |  | +    // Loop back up to the fast scan
 | 
	
		
			
				|  |  | +    goto DoAgain;
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  *bytes_consumed = src - isrc;
 | 
	
		
			
				|  |  | +  return e;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
 | 
	
		
			
				|  |  | +                    const char * str,
 | 
	
		
			
				|  |  | +                    int str_length,
 | 
	
		
			
				|  |  | +                    int* bytes_consumed) {
 | 
	
		
			
				|  |  | +  *bytes_consumed = 0;
 | 
	
		
			
				|  |  | +  if (str_length == 0) return kExitOK;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  const uint8* isrc =  reinterpret_cast<const uint8*>(str);
 | 
	
		
			
				|  |  | +  const uint8* src = isrc;
 | 
	
		
			
				|  |  | +  const uint8* srclimit = isrc + str_length;
 | 
	
		
			
				|  |  | +  const uint8* srclimit8 = srclimit - 7;
 | 
	
		
			
				|  |  | +  int n;
 | 
	
		
			
				|  |  | +  int rest_consumed;
 | 
	
		
			
				|  |  | +  int exit_reason;
 | 
	
		
			
				|  |  | +  do {
 | 
	
		
			
				|  |  | +    while ((src < srclimit8) &&
 | 
	
		
			
				|  |  | +           (((reinterpret_cast<const uint32*>(src)[0] |
 | 
	
		
			
				|  |  | +              reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
 | 
	
		
			
				|  |  | +      src += 8;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    while ((src < srclimit) && (src[0] < 0x80)) {
 | 
	
		
			
				|  |  | +      src++;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +    // Run state table on the rest
 | 
	
		
			
				|  |  | +    n = src - isrc;
 | 
	
		
			
				|  |  | +    exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
 | 
	
		
			
				|  |  | +    src += rest_consumed;
 | 
	
		
			
				|  |  | +  } while ( exit_reason == kExitDoAgain );
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  *bytes_consumed = src - isrc;
 | 
	
		
			
				|  |  | +  return exit_reason;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +// Hack:  On some compilers the static tables are initialized at startup.
 | 
	
		
			
				|  |  | +//   We can't use them until they are initialized.  However, some Protocol
 | 
	
		
			
				|  |  | +//   Buffer parsing happens at static init time and may try to validate
 | 
	
		
			
				|  |  | +//   UTF-8 strings.  Since UTF-8 validation is only used for debugging
 | 
	
		
			
				|  |  | +//   anyway, we simply always return success if initialization hasn't
 | 
	
		
			
				|  |  | +//   occurred yet.
 | 
	
		
			
				|  |  | +namespace {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +bool module_initialized_ = false;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +struct InitDetector {
 | 
	
		
			
				|  |  | +  InitDetector() {
 | 
	
		
			
				|  |  | +    module_initialized_ = true;
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +InitDetector init_detector;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +}  // namespace
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +bool IsStructurallyValidUTF8(const char* buf, int len) {
 | 
	
		
			
				|  |  | +  if (!module_initialized_) return true;
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +  int bytes_consumed = 0;
 | 
	
		
			
				|  |  | +  UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
 | 
	
		
			
				|  |  | +                           buf, len, &bytes_consumed);
 | 
	
		
			
				|  |  | +  return (bytes_consumed == len);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +}  // namespace internal
 | 
	
		
			
				|  |  | +}  // namespace protobuf
 | 
	
		
			
				|  |  | +}  // namespace google
 |