Program.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // http://github.com/jskeet/dotnet-protobufs/
  4. // Original C++/Java/Python code:
  5. // http://code.google.com/p/protobuf/
  6. //
  7. // Redistribution and use in source and binary forms, with or without
  8. // modification, are permitted provided that the following conditions are
  9. // met:
  10. //
  11. // * Redistributions of source code must retain the above copyright
  12. // notice, this list of conditions and the following disclaimer.
  13. // * Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other materials provided with the
  16. // distribution.
  17. // * Neither the name of Google Inc. nor the names of its
  18. // contributors may be used to endorse or promote products derived from
  19. // this software without specific prior written permission.
  20. //
  21. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. using System;
  33. using System.Collections;
  34. using System.Collections.Generic;
  35. using System.IO;
  36. using Google.ProtocolBuffers.Descriptors;
  37. namespace Google.ProtocolBuffers.ProtoMunge
  38. {
  39. /// <summary>
  40. /// Utility console application which takes a message descriptor and a corresponding message,
  41. /// and produces a new message with similar but random data. The data is the same length
  42. /// as the original, but with random values within appropriate bands. (For instance, a compressed
  43. /// integer in the range 0-127 will end up as another integer in the same range, to keep the length
  44. /// the same.)
  45. /// TODO(jonskeet): Potentially refactor to use an instance instead, making it simpler to
  46. /// be thread-safe for external use.
  47. /// </summary>
  48. public sealed class Program {
  49. static readonly Random rng = new Random();
  50. static int Main(string[] args) {
  51. if (args.Length != 3) {
  52. Console.Error.WriteLine("Usage: ProtoMunge <descriptor type name> <input data> <output file>");
  53. Console.Error.WriteLine("The descriptor type name is the fully-qualified message name, including assembly.");
  54. Console.Error.WriteLine("(At a future date it may be possible to do this without building the .NET assembly at all.)");
  55. return 1;
  56. }
  57. IMessage defaultMessage;
  58. try {
  59. defaultMessage = MessageUtil.GetDefaultMessage(args[0]);
  60. } catch (ArgumentException e) {
  61. Console.Error.WriteLine(e.Message);
  62. return 1;
  63. }
  64. try {
  65. IBuilder builder = defaultMessage.WeakCreateBuilderForType();
  66. byte[] inputData = File.ReadAllBytes(args[1]);
  67. builder.WeakMergeFrom(ByteString.CopyFrom(inputData));
  68. IMessage original = builder.WeakBuild();
  69. IMessage munged = Munge(original);
  70. if (original.SerializedSize != munged.SerializedSize) {
  71. throw new Exception("Serialized sizes don't match");
  72. }
  73. File.WriteAllBytes(args[2], munged.ToByteArray());
  74. return 0;
  75. } catch (Exception e) {
  76. Console.Error.WriteLine("Error: {0}", e.Message);
  77. Console.Error.WriteLine();
  78. Console.Error.WriteLine("Detailed exception information: {0}", e);
  79. return 1;
  80. }
  81. }
  82. /// <summary>
  83. /// Munges a message recursively.
  84. /// </summary>
  85. /// <returns>A new message of the same type as the original message,
  86. /// but munged so that all the data is desensitised.</returns>
  87. private static IMessage Munge(IMessage message) {
  88. IBuilder builder = message.WeakCreateBuilderForType();
  89. foreach (var pair in message.AllFields) {
  90. if (pair.Key.IsRepeated) {
  91. foreach (object singleValue in (IEnumerable)pair.Value) {
  92. builder.WeakAddRepeatedField(pair.Key, CheckedMungeValue(pair.Key, singleValue));
  93. }
  94. } else {
  95. builder[pair.Key] = CheckedMungeValue(pair.Key, pair.Value);
  96. }
  97. }
  98. IMessage munged = builder.WeakBuild();
  99. if (message.SerializedSize != munged.SerializedSize) {
  100. Console.WriteLine("Sub message sizes: {0}/{1}", message.SerializedSize, munged.SerializedSize);
  101. }
  102. return munged;
  103. }
  104. /// <summary>
  105. /// Munges a single value and checks that the length ends up the same as it was before.
  106. /// </summary>
  107. private static object CheckedMungeValue(FieldDescriptor fieldDescriptor, object value) {
  108. int currentSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, value);
  109. object mungedValue = MungeValue(fieldDescriptor, value);
  110. int mungedSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, mungedValue);
  111. // Exceptions log more easily than assertions
  112. if (currentSize != mungedSize) {
  113. throw new Exception("Munged value had wrong size. Field type: " + fieldDescriptor.FieldType
  114. + "; old value: " + value + "; new value: " + mungedValue);
  115. }
  116. return mungedValue;
  117. }
  118. /// <summary>
  119. /// Munges a single value of the specified field descriptor. (i.e. if the field is
  120. /// actually a repeated int, this method receives a single int value to munge, and
  121. /// is called multiple times).
  122. /// </summary>
  123. private static object MungeValue(FieldDescriptor fieldDescriptor, object value) {
  124. switch (fieldDescriptor.FieldType) {
  125. case FieldType.SInt64:
  126. case FieldType.Int64:
  127. return (long) MungeVarint64((ulong) (long)value);
  128. case FieldType.UInt64:
  129. return MungeVarint64((ulong)value);
  130. case FieldType.SInt32:
  131. return (int)MungeVarint32((uint)(int)value);
  132. case FieldType.Int32:
  133. return MungeInt32((int) value);
  134. case FieldType.UInt32:
  135. return MungeVarint32((uint)value);
  136. case FieldType.Double:
  137. return rng.NextDouble();
  138. case FieldType.Float:
  139. return (float)rng.NextDouble();
  140. case FieldType.Fixed64: {
  141. byte[] data = new byte[8];
  142. rng.NextBytes(data);
  143. return BitConverter.ToUInt64(data, 0);
  144. }
  145. case FieldType.Fixed32: {
  146. byte[] data = new byte[4];
  147. rng.NextBytes(data);
  148. return BitConverter.ToUInt32(data, 0);
  149. }
  150. case FieldType.Bool:
  151. return rng.Next(2) == 1;
  152. case FieldType.String:
  153. return MungeString((string)value);
  154. case FieldType.Group:
  155. case FieldType.Message:
  156. return Munge((IMessage)value);
  157. case FieldType.Bytes:
  158. return MungeByteString((ByteString)value);
  159. case FieldType.SFixed64: {
  160. byte[] data = new byte[8];
  161. rng.NextBytes(data);
  162. return BitConverter.ToInt64(data, 0);
  163. }
  164. case FieldType.SFixed32: {
  165. byte[] data = new byte[4];
  166. rng.NextBytes(data);
  167. return BitConverter.ToInt32(data, 0);
  168. }
  169. case FieldType.Enum:
  170. return MungeEnum(fieldDescriptor, (EnumValueDescriptor) value);
  171. default:
  172. // TODO(jonskeet): Different exception?
  173. throw new ArgumentException("Invalid field descriptor");
  174. }
  175. }
  176. private static object MungeString(string original) {
  177. foreach (char c in original) {
  178. if (c > 127) {
  179. throw new ArgumentException("Can't handle non-ascii yet");
  180. }
  181. }
  182. char[] chars = new char[original.Length];
  183. // Convert to pure ASCII - no control characters.
  184. for (int i = 0; i < chars.Length; i++) {
  185. chars[i] = (char) rng.Next(32, 127);
  186. }
  187. return new string(chars);
  188. }
  189. /// <summary>
  190. /// Int32 fields are slightly strange - we need to keep the sign the same way it is:
  191. /// negative numbers can munge to any other negative number (it'll always take
  192. /// 10 bytes) but positive numbers have to stay positive, so we can't use the
  193. /// full range of 32 bits.
  194. /// </summary>
  195. private static int MungeInt32(int value) {
  196. if (value < 0) {
  197. return rng.Next(int.MinValue, 0);
  198. }
  199. int length = CodedOutputStream.ComputeRawVarint32Size((uint) value);
  200. uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
  201. uint max = length == 5 ? int.MaxValue : (1U << (length * 7)) - 1;
  202. return (int) NextRandomUInt64(min, max);
  203. }
  204. private static uint MungeVarint32(uint original) {
  205. int length = CodedOutputStream.ComputeRawVarint32Size(original);
  206. uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
  207. uint max = length == 5 ? uint.MaxValue : (1U << (length * 7)) - 1;
  208. return (uint)NextRandomUInt64(min, max);
  209. }
  210. private static ulong MungeVarint64(ulong original) {
  211. int length = CodedOutputStream.ComputeRawVarint64Size(original);
  212. ulong min = length == 1 ? 0 : 1UL << ((length - 1) * 7);
  213. ulong max = length == 10 ? ulong.MaxValue : (1UL<< (length * 7)) - 1;
  214. return NextRandomUInt64(min, max);
  215. }
  216. /// <summary>
  217. /// Returns a random number in the range [min, max] (both inclusive).
  218. /// </summary>
  219. private static ulong NextRandomUInt64(ulong min, ulong max) {
  220. if (min > max) {
  221. throw new ArgumentException("min must be <= max; min=" + min + "; max = " + max);
  222. }
  223. ulong range = max - min;
  224. // This isn't actually terribly good at very large ranges - but it doesn't really matter for the sake
  225. // of this program.
  226. return min + (ulong)(range * rng.NextDouble());
  227. }
  228. private static object MungeEnum(FieldDescriptor fieldDescriptor, EnumValueDescriptor original) {
  229. // Find all the values which get encoded to the same size as the current value, and pick one at random
  230. int originalSize = CodedOutputStream.ComputeRawVarint32Size((uint)original.Number);
  231. List<EnumValueDescriptor> sameSizeValues = new List<EnumValueDescriptor> ();
  232. foreach (EnumValueDescriptor candidate in fieldDescriptor.EnumType.Values) {
  233. if (CodedOutputStream.ComputeRawVarint32Size((uint)candidate.Number) == originalSize) {
  234. sameSizeValues.Add(candidate);
  235. }
  236. }
  237. return sameSizeValues[rng.Next(sameSizeValues.Count)];
  238. }
  239. private static object MungeByteString(ByteString byteString) {
  240. byte[] data = new byte[byteString.Length];
  241. rng.NextBytes(data);
  242. return ByteString.CopyFrom(data);
  243. }
  244. }
  245. }