convert.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // -----------------------------------------------------------------------------
  31. // Ruby <-> upb data conversion functions.
  32. //
  33. // This file Also contains a few other assorted algorithms on upb_msgval.
  34. //
  35. // None of the algorithms in this file require any access to the internal
  36. // representation of Ruby or upb objects.
  37. // -----------------------------------------------------------------------------
  38. #include "convert.h"
  39. #include "message.h"
  40. #include "protobuf.h"
  41. #include "third_party/wyhash/wyhash.h"
  42. static upb_strview Convert_StringData(VALUE str, upb_arena *arena) {
  43. upb_strview ret;
  44. if (arena) {
  45. char *ptr = upb_arena_malloc(arena, RSTRING_LEN(str));
  46. memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str));
  47. ret.data = ptr;
  48. } else {
  49. // Data is only needed temporarily (within map lookup).
  50. ret.data = RSTRING_PTR(str);
  51. }
  52. ret.size = RSTRING_LEN(str);
  53. return ret;
  54. }
  55. static bool is_ruby_num(VALUE value) {
  56. return (TYPE(value) == T_FLOAT ||
  57. TYPE(value) == T_FIXNUM ||
  58. TYPE(value) == T_BIGNUM);
  59. }
  60. static void Convert_CheckInt(const char* name, upb_fieldtype_t type,
  61. VALUE val) {
  62. if (!is_ruby_num(val)) {
  63. rb_raise(cTypeError,
  64. "Expected number type for integral field '%s' (given %s).", name,
  65. rb_class2name(CLASS_OF(val)));
  66. }
  67. // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
  68. // bound; we just need to do precision checks (i.e., disallow rounding) and
  69. // check for < 0 on unsigned types.
  70. if (TYPE(val) == T_FLOAT) {
  71. double dbl_val = NUM2DBL(val);
  72. if (floor(dbl_val) != dbl_val) {
  73. rb_raise(rb_eRangeError,
  74. "Non-integral floating point value assigned to integer field "
  75. "'%s' (given %s).",
  76. name, rb_class2name(CLASS_OF(val)));
  77. }
  78. }
  79. if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) {
  80. if (NUM2DBL(val) < 0) {
  81. rb_raise(
  82. rb_eRangeError,
  83. "Assigning negative value to unsigned integer field '%s' (given %s).",
  84. name, rb_class2name(CLASS_OF(val)));
  85. }
  86. }
  87. }
  88. static int32_t Convert_ToEnum(VALUE value, const char* name,
  89. const upb_enumdef* e) {
  90. int32_t val;
  91. switch (TYPE(value)) {
  92. case T_FLOAT:
  93. case T_FIXNUM:
  94. case T_BIGNUM:
  95. Convert_CheckInt(name, UPB_TYPE_INT32, value);
  96. val = NUM2INT(value);
  97. break;
  98. case T_STRING:
  99. if (!upb_enumdef_ntoi(e, RSTRING_PTR(value), RSTRING_LEN(value), &val)) {
  100. goto unknownval;
  101. }
  102. break;
  103. case T_SYMBOL:
  104. if (!upb_enumdef_ntoiz(e, rb_id2name(SYM2ID(value)), &val)) {
  105. goto unknownval;
  106. }
  107. break;
  108. default:
  109. rb_raise(cTypeError,
  110. "Expected number or symbol type for enum field '%s'.", name);
  111. }
  112. return val;
  113. unknownval:
  114. rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
  115. }
  116. upb_msgval Convert_RubyToUpb(VALUE value, const char* name, TypeInfo type_info,
  117. upb_arena* arena) {
  118. upb_msgval ret;
  119. switch (type_info.type) {
  120. case UPB_TYPE_FLOAT:
  121. if (!is_ruby_num(value)) {
  122. rb_raise(cTypeError, "Expected number type for float field '%s' (given %s).",
  123. name, rb_class2name(CLASS_OF(value)));
  124. }
  125. ret.float_val = NUM2DBL(value);
  126. break;
  127. case UPB_TYPE_DOUBLE:
  128. if (!is_ruby_num(value)) {
  129. rb_raise(cTypeError, "Expected number type for double field '%s' (given %s).",
  130. name, rb_class2name(CLASS_OF(value)));
  131. }
  132. ret.double_val = NUM2DBL(value);
  133. break;
  134. case UPB_TYPE_BOOL: {
  135. if (value == Qtrue) {
  136. ret.bool_val = 1;
  137. } else if (value == Qfalse) {
  138. ret.bool_val = 0;
  139. } else {
  140. rb_raise(cTypeError, "Invalid argument for boolean field '%s' (given %s).",
  141. name, rb_class2name(CLASS_OF(value)));
  142. }
  143. break;
  144. }
  145. case UPB_TYPE_STRING: {
  146. VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding());
  147. if (CLASS_OF(value) == rb_cSymbol) {
  148. value = rb_funcall(value, rb_intern("to_s"), 0);
  149. } else if (CLASS_OF(value) != rb_cString) {
  150. rb_raise(cTypeError, "Invalid argument for string field '%s' (given %s).",
  151. name, rb_class2name(CLASS_OF(value)));
  152. }
  153. if (rb_obj_encoding(value) != utf8) {
  154. // Note: this will not duplicate underlying string data unless necessary.
  155. value = rb_str_encode(value, utf8, 0, Qnil);
  156. if (rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
  157. rb_raise(rb_eEncodingError, "String is invalid UTF-8");
  158. }
  159. }
  160. ret.str_val = Convert_StringData(value, arena);
  161. break;
  162. }
  163. case UPB_TYPE_BYTES: {
  164. VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding());
  165. if (CLASS_OF(value) != rb_cString) {
  166. rb_raise(cTypeError, "Invalid argument for bytes field '%s' (given %s).",
  167. name, rb_class2name(CLASS_OF(value)));
  168. }
  169. if (rb_obj_encoding(value) != bytes) {
  170. // Note: this will not duplicate underlying string data unless necessary.
  171. // TODO(haberman): is this really necessary to get raw bytes?
  172. value = rb_str_encode(value, bytes, 0, Qnil);
  173. }
  174. ret.str_val = Convert_StringData(value, arena);
  175. break;
  176. }
  177. case UPB_TYPE_MESSAGE:
  178. ret.msg_val =
  179. Message_GetUpbMessage(value, type_info.def.msgdef, name, arena);
  180. break;
  181. case UPB_TYPE_ENUM:
  182. ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef);
  183. break;
  184. case UPB_TYPE_INT32:
  185. case UPB_TYPE_INT64:
  186. case UPB_TYPE_UINT32:
  187. case UPB_TYPE_UINT64:
  188. Convert_CheckInt(name, type_info.type, value);
  189. switch (type_info.type) {
  190. case UPB_TYPE_INT32:
  191. ret.int32_val = NUM2INT(value);
  192. break;
  193. case UPB_TYPE_INT64:
  194. ret.int64_val = NUM2LL(value);
  195. break;
  196. case UPB_TYPE_UINT32:
  197. ret.uint32_val = NUM2UINT(value);
  198. break;
  199. case UPB_TYPE_UINT64:
  200. ret.uint64_val = NUM2ULL(value);
  201. break;
  202. default:
  203. break;
  204. }
  205. break;
  206. default:
  207. break;
  208. }
  209. return ret;
  210. }
  211. VALUE Convert_UpbToRuby(upb_msgval upb_val, TypeInfo type_info, VALUE arena) {
  212. switch (type_info.type) {
  213. case UPB_TYPE_FLOAT:
  214. return DBL2NUM(upb_val.float_val);
  215. case UPB_TYPE_DOUBLE:
  216. return DBL2NUM(upb_val.double_val);
  217. case UPB_TYPE_BOOL:
  218. return upb_val.bool_val ? Qtrue : Qfalse;
  219. case UPB_TYPE_INT32:
  220. return INT2NUM(upb_val.int32_val);
  221. case UPB_TYPE_INT64:
  222. return LL2NUM(upb_val.int64_val);
  223. case UPB_TYPE_UINT32:
  224. return UINT2NUM(upb_val.uint32_val);
  225. case UPB_TYPE_UINT64:
  226. return ULL2NUM(upb_val.int64_val);
  227. case UPB_TYPE_ENUM: {
  228. const char* name =
  229. upb_enumdef_iton(type_info.def.enumdef, upb_val.int32_val);
  230. if (name) {
  231. return ID2SYM(rb_intern(name));
  232. } else {
  233. return INT2NUM(upb_val.int32_val);
  234. }
  235. }
  236. case UPB_TYPE_STRING: {
  237. VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
  238. rb_enc_associate(str_rb, rb_utf8_encoding());
  239. rb_obj_freeze(str_rb);
  240. return str_rb;
  241. }
  242. case UPB_TYPE_BYTES: {
  243. VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
  244. rb_enc_associate(str_rb, rb_ascii8bit_encoding());
  245. rb_obj_freeze(str_rb);
  246. return str_rb;
  247. }
  248. case UPB_TYPE_MESSAGE:
  249. return Message_GetRubyWrapper((upb_msg*)upb_val.msg_val,
  250. type_info.def.msgdef, arena);
  251. default:
  252. rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d",
  253. (int)type_info.type);
  254. }
  255. }
  256. upb_msgval Msgval_DeepCopy(upb_msgval msgval, TypeInfo type_info,
  257. upb_arena* arena) {
  258. upb_msgval new_msgval;
  259. switch (type_info.type) {
  260. default:
  261. memcpy(&new_msgval, &msgval, sizeof(msgval));
  262. break;
  263. case UPB_TYPE_STRING:
  264. case UPB_TYPE_BYTES: {
  265. size_t n = msgval.str_val.size;
  266. char *mem = upb_arena_malloc(arena, n);
  267. new_msgval.str_val.data = mem;
  268. new_msgval.str_val.size = n;
  269. memcpy(mem, msgval.str_val.data, n);
  270. break;
  271. }
  272. case UPB_TYPE_MESSAGE:
  273. new_msgval.msg_val =
  274. Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena);
  275. break;
  276. }
  277. return new_msgval;
  278. }
  279. bool Msgval_IsEqual(upb_msgval val1, upb_msgval val2, TypeInfo type_info) {
  280. switch (type_info.type) {
  281. case UPB_TYPE_BOOL:
  282. return memcmp(&val1, &val2, 1) == 0;
  283. case UPB_TYPE_FLOAT:
  284. case UPB_TYPE_INT32:
  285. case UPB_TYPE_UINT32:
  286. case UPB_TYPE_ENUM:
  287. return memcmp(&val1, &val2, 4) == 0;
  288. case UPB_TYPE_DOUBLE:
  289. case UPB_TYPE_INT64:
  290. case UPB_TYPE_UINT64:
  291. return memcmp(&val1, &val2, 8) == 0;
  292. case UPB_TYPE_STRING:
  293. case UPB_TYPE_BYTES:
  294. return val1.str_val.size == val2.str_val.size &&
  295. memcmp(val1.str_val.data, val2.str_val.data,
  296. val1.str_val.size) == 0;
  297. case UPB_TYPE_MESSAGE:
  298. return Message_Equal(val1.msg_val, val2.msg_val, type_info.def.msgdef);
  299. default:
  300. rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
  301. }
  302. }
  303. uint64_t Msgval_GetHash(upb_msgval val, TypeInfo type_info, uint64_t seed) {
  304. switch (type_info.type) {
  305. case UPB_TYPE_BOOL:
  306. return wyhash(&val, 1, seed, _wyp);
  307. case UPB_TYPE_FLOAT:
  308. case UPB_TYPE_INT32:
  309. case UPB_TYPE_UINT32:
  310. case UPB_TYPE_ENUM:
  311. return wyhash(&val, 4, seed, _wyp);
  312. case UPB_TYPE_DOUBLE:
  313. case UPB_TYPE_INT64:
  314. case UPB_TYPE_UINT64:
  315. return wyhash(&val, 8, seed, _wyp);
  316. case UPB_TYPE_STRING:
  317. case UPB_TYPE_BYTES:
  318. return wyhash(val.str_val.data, val.str_val.size, seed, _wyp);
  319. case UPB_TYPE_MESSAGE:
  320. return Message_Hash(val.msg_val, type_info.def.msgdef, seed);
  321. default:
  322. rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
  323. }
  324. }