ソースを参照

Add C# codegen changes to enum value names (mostly C++)

Overview of changes:
- A new C#-specific command-line option, legacy_enum_values to revert to the old behavior
- When legacy_enum_values isn't specified, we strip the enum name as a prefix, and PascalCase the value name
- A new attribute within the C# code so that we can always tell the original in-proto name

Regenerating the C# code with legacy_enum_values leads to code which still compiles and works - but
there's more still to do.
Jon Skeet 9 年 前
コミット
75626ed79c

+ 1 - 0
Makefile.am

@@ -154,6 +154,7 @@ csharp_EXTRA_DIST=                                                           \
   csharp/src/Google.Protobuf/Reflection/MethodDescriptor.cs                  \
   csharp/src/Google.Protobuf/Reflection/OneofAccessor.cs                     \
   csharp/src/Google.Protobuf/Reflection/OneofDescriptor.cs                   \
+  csharp/src/Google.Protobuf/Reflection/OriginalNameAttribute.cs             \
   csharp/src/Google.Protobuf/Reflection/PackageDescriptor.cs                 \
   csharp/src/Google.Protobuf/Reflection/PartialClasses.cs                    \
   csharp/src/Google.Protobuf/Reflection/ReflectionUtil.cs                    \

+ 1 - 0
csharp/src/Google.Protobuf/Google.Protobuf.csproj

@@ -117,6 +117,7 @@
     <Compile Include="Reflection\MethodDescriptor.cs" />
     <Compile Include="Reflection\OneofAccessor.cs" />
     <Compile Include="Reflection\OneofDescriptor.cs" />
+    <Compile Include="Reflection\OriginalNameAttribute.cs" />
     <Compile Include="Reflection\PackageDescriptor.cs" />
     <Compile Include="Reflection\PartialClasses.cs" />
     <Compile Include="Reflection\ReflectionUtil.cs" />

+ 58 - 0
csharp/src/Google.Protobuf/Reflection/OriginalNameAttribute.cs

@@ -0,0 +1,58 @@
+#region Copyright notice and license
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#endregion
+
+using System;
+
+namespace Google.Protobuf.Reflection
+{
+    /// <summary>
+    /// Specifies the original name (in the .proto file) of a named element,
+    /// such as an enum value.
+    /// </summary>
+    [AttributeUsage(AttributeTargets.Field)]
+    public class OriginalNameAttribute : Attribute
+    {
+        /// <summary>
+        /// The name of the element in the .proto file.
+        /// </summary>
+        public string Name { get; set; }
+
+        /// <summary>
+        /// Constructs a new attribute instance for the given name.
+        /// </summary>
+        /// <param name="name">The name of the element in the .proto file.</param>
+        public OriginalNameAttribute(string name)
+        {
+            Name = ProtoPreconditions.CheckNotNull(name, nameof(name));
+        }
+    }
+}

+ 17 - 3
src/google/protobuf/compiler/csharp/csharp_enum.cc

@@ -41,6 +41,7 @@
 #include <google/protobuf/compiler/csharp/csharp_doc_comment.h>
 #include <google/protobuf/compiler/csharp/csharp_enum.h>
 #include <google/protobuf/compiler/csharp/csharp_helpers.h>
+#include <google/protobuf/compiler/csharp/csharp_options.h>
 
 using google::protobuf::internal::scoped_ptr;
 
@@ -64,11 +65,24 @@ void EnumGenerator::Generate(io::Printer* printer) {
                  "access_level", class_access_level(),
                  "name", descriptor_->name());
   printer->Indent();
+  std::set<string> used_names;
   for (int i = 0; i < descriptor_->value_count(); i++) {
       WriteEnumValueDocComment(printer, descriptor_->value(i));
-      printer->Print("$name$ = $number$,\n",
-                   "name", descriptor_->value(i)->name(),
-                   "number", SimpleItoa(descriptor_->value(i)->number()));
+      string original_name = descriptor_->value(i)->name();
+      string name = options()->legacy_enum_values
+          ? descriptor_->value(i)->name()
+          : GetEnumValueName(descriptor_->name(), descriptor_->value(i)->name());
+      // Make sure we don't get any duplicate names due to prefix removal.
+      while (!used_names.insert(name).second) {
+        // It's possible we'll end up giving this warning multiple times, but that's better than not at all.
+        GOOGLE_LOG(WARNING) << "Duplicate enum value " << name << " (originally " << original_name
+          << ") in " << descriptor_->name() << "; adding underscore to distinguish";
+        name += "_";
+      }
+      printer->Print("[pbr::OriginalName(\"$original_name$\")] $name$ = $number$,\n",
+         "original_name", original_name,
+         "name", name,
+         "number", SimpleItoa(descriptor_->value(i)->number()));         
   }
   printer->Outdent();
   printer->Print("}\n");

+ 3 - 0
src/google/protobuf/compiler/csharp/csharp_generator.cc

@@ -83,6 +83,9 @@ bool Generator::Generate(
       cli_options.base_namespace_specified = true;
     } else if (options[i].first == "internal_access") {
       cli_options.internal_access = true;
+    } else if (options[i].first == "legacy_enum_values") {
+      // TODO: Remove this before final release
+      cli_options.legacy_enum_values = true;
     } else {
       *error = "Unknown generator option: " + options[i].first;
       return false;

+ 18 - 2
src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc

@@ -30,8 +30,8 @@
 
 #include <memory>
 
-#include <google/protobuf/compiler/ruby/ruby_generator.h>
 #include <google/protobuf/compiler/command_line_interface.h>
+#include <google/protobuf/compiler/csharp/csharp_helpers.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/io/printer.h>
 
@@ -45,7 +45,23 @@ namespace compiler {
 namespace csharp {
 namespace {
 
-// TODO(jtattermusch): add some tests.
+TEST(CSharpEnumValue, PascalCasedPrefixStripping) {
+  EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
+  EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
+  EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO_BAR"));
+  EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO__BAR"));
+  EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "FOO_BAR_BAZ"));
+  EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "Foo_BarBaz"));
+  EXPECT_EQ("Bar", GetEnumValueName("FO_O", "FOO_BAR"));
+  EXPECT_EQ("Bar", GetEnumValueName("FOO", "F_O_O_BAR"));
+  EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
+  EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
+  EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO"));
+  EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO___"));
+  // Identifiers can't start with digits
+  EXPECT_EQ("_2Bar", GetEnumValueName("Foo", "FOO_2_BAR"));
+  EXPECT_EQ("_2", GetEnumValueName("Foo", "FOO___2"));
+}
 
 }  // namespace
 }  // namespace csharp

+ 93 - 0
src/google/protobuf/compiler/csharp/csharp_helpers.cc

@@ -178,6 +178,99 @@ std::string UnderscoresToPascalCase(const std::string& input) {
   return UnderscoresToCamelCase(input, true);
 }
 
+// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
+// into a PascalCase string. Precise rules implemented:
+
+// Previous input character      Current character         Case
+// Any                           Non-alphanumeric          Skipped
+// None - first char of input    Alphanumeric              Upper
+// Non-letter (e.g. _ or 1)      Alphanumeric              Upper
+// Numeric                       Alphanumeric              Upper
+// Lower letter                  Alphanumeric              Same as current
+// Upper letter                  Alphanumeric              Lower
+std::string ShoutyToPascalCase(const std::string& input) {
+  string result;
+  // Simple way of implementing "always start with upper"
+  char previous = '_';
+  for (int i = 0; i < input.size(); i++) {
+    char current = input[i];
+    if (!ascii_isalnum(current)) {
+      previous = current;
+      continue;      
+    }
+    if (!ascii_isalnum(previous)) {
+      result += ascii_toupper(current);
+    } else if (ascii_isdigit(previous)) {
+      result += ascii_toupper(current);
+    } else if (ascii_islower(previous)) {
+      result += current;
+    } else {
+      result += ascii_tolower(current);
+    }
+    previous = current;
+  }
+  return result;
+}
+
+// Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
+// (foo, foo_bar) => bar - underscore after prefix is skipped
+// (FOO, foo_bar) => bar - casing is ignored
+// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
+// (foobar, foo_barbaz) => baz - underscore in value is ignored
+// (foo, bar) => bar - prefix isn't matched; return original value
+std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
+  // First normalize to a lower-case no-underscores prefix to match against
+  std::string prefix_to_match = "";
+  for (size_t i = 0; i < prefix.size(); i++) {
+    if (prefix[i] != '_') {
+      prefix_to_match += ascii_tolower(prefix[i]);
+    }
+  }
+  
+  // This keeps track of how much of value we've consumed
+  size_t prefix_index, value_index;
+  for (prefix_index = 0, value_index = 0;
+      prefix_index < prefix_to_match.size() && value_index < value.size();
+      value_index++) {
+    // Skip over underscores in the value
+    if (value[value_index] == '_') {
+      continue;
+    }
+    if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
+      // Failed to match the prefix - bail out early.
+      return value;
+    }
+  }
+
+  // If we didn't finish looking through the prefix, we can't strip it.
+  if (prefix_index < prefix_to_match.size()) {
+    return value;
+  }
+
+  // Step over any underscores after the prefix
+  while (value_index < value.size() && value[value_index] == '_') {
+    value_index++;
+  }
+
+  // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
+  if (value_index == value.size()) {
+    return value;
+  }
+
+  return value.substr(value_index);
+}
+
+std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
+  std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
+  std::string result = ShoutyToPascalCase(stripped);
+  // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
+  // string is a valid identifier.
+  if (ascii_isdigit(result[0])) {
+    result = "_" + result;
+  }
+  return result;
+}
+
 std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
   std::string result = GetFileNamespace(file);
   if (result != "") {

+ 2 - 0
src/google/protobuf/compiler/csharp/csharp_helpers.h

@@ -93,6 +93,8 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex
 
 std::string UnderscoresToPascalCase(const std::string& input);
 
+std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name);
+
 // TODO(jtattermusch): perhaps we could move this to strutil
 std::string StringToBase64(const std::string& input);
 

+ 8 - 1
src/google/protobuf/compiler/csharp/csharp_options.h

@@ -45,7 +45,8 @@ struct Options {
       file_extension(".cs"),
       base_namespace(""),
       base_namespace_specified(false),
-      internal_access(false) {
+      internal_access(false),
+      legacy_enum_values(false) {
   }
   // Extension of the generated file. Defaults to ".cs"
   string file_extension;
@@ -68,6 +69,12 @@ struct Options {
   // Whether the generated classes should have accessibility level of "internal".
   // Defaults to false that generates "public" classes.
   bool internal_access;
+  // By default, C# codegen now uses PascalCased enum values names, after
+  // removing the enum type name as a prefix (if it *is* a prefix of the value).
+  // Setting this option reverts to the previous behavior of just copying the
+  // value name specified in the .proto file, allowing gradual migration.
+  // This option will be removed before final release.
+  bool legacy_enum_values;
 };
 
 }  // namespace csharp