Pārlūkot izejas kodu

Merge pull request #4013 from laszlocsomor/io_win32

io_win32: support non-ASCII paths
Jisi Liu 7 gadi atpakaļ
vecāks
revīzija
1b5b3b2839

+ 108 - 57
src/google/protobuf/stubs/io_win32.cc

@@ -30,10 +30,11 @@
 
 // Author: laszlocsomor@google.com (Laszlo Csomor)
 //
-// Implementation for long-path-aware open/mkdir/etc. on Windows.
+// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well
+// as for the supporting utility functions.
 //
 // These functions convert the input path to an absolute Windows path
-// with "\\?\" prefix if necessary, then pass that to _wopen/_wmkdir/etc.
+// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc.
 // (declared in <io.h>) respectively. This allows working with files/directories
 // whose paths are longer than MAX_PATH (260 chars).
 //
@@ -59,7 +60,6 @@
 #include <google/protobuf/stubs/io_win32.h>
 #include <google/protobuf/stubs/scoped_ptr.h>
 
-#include <cassert>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -89,6 +89,11 @@ struct CharTraits<wchar_t> {
   static bool is_alpha(wchar_t ch) { return iswalpha(ch); }
 };
 
+template <typename char_type>
+bool null_or_empty(const char_type* s) {
+  return s == nullptr || *s == 0;
+}
+
 // Returns true if the path starts with a drive letter, e.g. "c:".
 // Note that this won't check for the "\" after the drive letter, so this also
 // returns true for "c:foo" (which is "c:\${PWD}\foo").
@@ -121,16 +126,7 @@ bool is_drive_relative(const char_type* path) {
   return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2]));
 }
 
-template <typename char_type>
-void replace_directory_separators(char_type* p) {
-  for (; *p; ++p) {
-    if (*p == '/') {
-      *p = '\\';
-    }
-  }
-}
-
-string join_paths(const string& path1, const string& path2) {
+wstring join_paths(const wstring& path1, const wstring& path2) {
   if (path1.empty() || is_path_absolute(path2.c_str()) ||
       has_longpath_prefix(path2.c_str())) {
     return path2;
@@ -144,23 +140,23 @@ string join_paths(const string& path1, const string& path2) {
                                        : (path1 + path2);
   } else {
     return is_separator(path2[0]) ? (path1 + path2)
-                                       : (path1 + '\\' + path2);
+                                       : (path1 + L'\\' + path2);
   }
 }
 
-string normalize(string path) {
+wstring normalize(wstring path) {
   if (has_longpath_prefix(path.c_str())) {
     path = path.substr(4);
   }
 
-  static const string dot(".");
-  static const string dotdot("..");
+  static const wstring dot(L".");
+  static const wstring dotdot(L"..");
 
-  std::vector<string> segments;
+  std::vector<wstring> segments;
   int segment_start = -1;
   // Find the path segments in `path` (separated by "/").
   for (int i = 0;; ++i) {
-    if (!is_separator(path[i]) && path[i] != '\0') {
+    if (!is_separator(path[i]) && path[i] != L'\0') {
       // The current character does not end a segment, so start one unless it's
       // already started.
       if (segment_start < 0) {
@@ -169,7 +165,7 @@ string normalize(string path) {
     } else if (segment_start >= 0 && i > segment_start) {
       // The current character is "/" or "\0", so this ends a segment.
       // Add that to `segments` if there's anything to add; handle "." and "..".
-      string segment(path, segment_start, i - segment_start);
+      wstring segment(path, segment_start, i - segment_start);
       segment_start = -1;
       if (segment == dotdot) {
         if (!segments.empty() &&
@@ -180,7 +176,7 @@ string normalize(string path) {
         segments.push_back(segment);
       }
     }
-    if (path[i] == '\0') {
+    if (path[i] == L'\0') {
       break;
     }
   }
@@ -189,64 +185,62 @@ string normalize(string path) {
   // form of it, e.g. "c:\..").
   if (segments.size() == 1 && segments[0].size() == 2 &&
       has_drive_letter(segments[0].c_str())) {
-    return segments[0] + '\\';
+    return segments[0] + L'\\';
   }
 
   // Join all segments.
   bool first = true;
-  std::ostringstream result;
+  std::wstringstream result;
   for (int i = 0; i < segments.size(); ++i) {
     if (!first) {
-      result << '\\';
+      result << L'\\';
     }
     first = false;
     result << segments[i];
   }
   // Preserve trailing separator if the input contained it.
   if (!path.empty() && is_separator(path[path.size() - 1])) {
-    result << '\\';
+    result << L'\\';
   }
   return result.str();
 }
 
-WCHAR* as_wstring(const string& s) {
-  int len = ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), NULL, 0);
-  WCHAR* result = new WCHAR[len + 1];
-  ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), result, len + 1);
-  result[len] = 0;
-  return result;
-}
-
-void as_wchar_path(const string& path, wstring* wchar_path) {
-  scoped_array<WCHAR> wbuf(as_wstring(path));
-  replace_directory_separators(wbuf.get());
-  wchar_path->assign(wbuf.get());
-}
-
-bool as_windows_path(const string& path, wstring* result) {
-  if (path.empty()) {
+bool as_windows_path(const char* path, wstring* result) {
+  if (null_or_empty(path)) {
     result->clear();
     return true;
   }
-  if (is_separator(path[0]) || is_drive_relative(path.c_str())) {
+  wstring wpath;
+  if (!strings::utf8_to_wcs(path, &wpath)) {
+    return false;
+  }
+  if (has_longpath_prefix(wpath.c_str())) {
+    *result = wpath;
+    return true;
+  }
+  if (is_separator(path[0]) || is_drive_relative(path)) {
     return false;
   }
 
-  string mutable_path = path;
-  if (!is_path_absolute(mutable_path.c_str()) &&
-      !has_longpath_prefix(mutable_path.c_str())) {
-    char cwd[MAX_PATH];
-    ::GetCurrentDirectoryA(MAX_PATH, cwd);
-    mutable_path = join_paths(cwd, mutable_path);
+
+  if (!is_path_absolute(wpath.c_str())) {
+    int size = ::GetCurrentDirectoryW(0, NULL);
+    if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+      return false;
+    }
+    scoped_array<WCHAR> wcwd(new WCHAR[size]);
+    ::GetCurrentDirectoryW(size, wcwd.get());
+    wpath = join_paths(wcwd.get(), wpath);
   }
-  as_wchar_path(normalize(mutable_path), result);
-  if (!has_longpath_prefix(result->c_str())) {
+  wpath = normalize(wpath);
+  if (!has_longpath_prefix(wpath.c_str())) {
     // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API
     // from processing the path and "helpfully" removing trailing dots from the
     // path, for example.
     // See https://github.com/bazelbuild/bazel/issues/2935
-    *result = wstring(L"\\\\?\\") + *result;
+    wpath = wstring(L"\\\\?\\") + wpath;
   }
+  *result = wpath;
   return true;
 }
 
@@ -319,13 +313,21 @@ int stat(const char* path, struct _stat* buffer) {
 
 FILE* fopen(const char* path, const char* mode) {
 #ifdef SUPPORT_LONGPATHS
+  if (null_or_empty(path)) {
+    errno = EINVAL;
+    return NULL;
+  }
   wstring wpath;
   if (!as_windows_path(path, &wpath)) {
     errno = ENOENT;
     return NULL;
   }
-  scoped_array<WCHAR> wmode(as_wstring(mode));
-  return ::_wfopen(wpath.c_str(), wmode.get());
+  wstring wmode;
+  if (!strings::utf8_to_wcs(mode, &wmode)) {
+    errno = EINVAL;
+    return NULL;
+  }
+  return ::_wfopen(wpath.c_str(), wmode.c_str());
 #else
   return ::fopen(path, mode);
 #endif
@@ -347,16 +349,65 @@ int write(int fd, const void* buffer, size_t size) {
   return ::_write(fd, buffer, size);
 }
 
-wstring testonly_path_to_winpath(const string& path) {
+wstring testonly_utf8_to_winpath(const char* path) {
   wstring wpath;
-  as_windows_path(path, &wpath);
-  return wpath;
+  return as_windows_path(path, &wpath) ? wpath : wstring();
+}
+
+namespace strings {
+
+bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) {
+  if (null_or_empty(s)) {
+    out->clear();
+    return true;
+  }
+  BOOL usedDefaultChar = FALSE;
+  SetLastError(0);
+  int size = WideCharToMultiByte(
+      outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL,
+      outUtf8 ? NULL : &usedDefaultChar);
+  if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+      || usedDefaultChar) {
+    return false;
+  }
+  scoped_array<CHAR> astr(new CHAR[size]);
+  WideCharToMultiByte(
+      outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL);
+  out->assign(astr.get());
+  return true;
+}
+
+bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) {
+  if (null_or_empty(s)) {
+    out->clear();
+    return true;
+  }
+
+  SetLastError(0);
+  int size =
+      MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0);
+  if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+    return false;
+  }
+  scoped_array<WCHAR> wstr(new WCHAR[size]);
+  MultiByteToWideChar(
+      inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1);
+  out->assign(wstr.get());
+  return true;
 }
 
+bool utf8_to_wcs(const char* input, wstring* out) {
+  return mbs_to_wcs(input, out, true);
+}
+
+bool wcs_to_utf8(const wchar_t* input, string* out) {
+  return wcs_to_mbs(input, out, true);
+}
+
+}  // namespace strings
 }  // namespace win32
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google
 
 #endif  // defined(_WIN32)
-

+ 19 - 2
src/google/protobuf/stubs/io_win32.h

@@ -69,8 +69,25 @@ LIBPROTOBUF_EXPORT int read(int fd, void* buffer, size_t size);
 LIBPROTOBUF_EXPORT int setmode(int fd, int mode);
 LIBPROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer);
 LIBPROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size);
-LIBPROTOBUF_EXPORT std::wstring testonly_path_to_winpath(
-    const std::string& path);
+LIBPROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path);
+
+namespace strings {
+
+// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text.
+LIBPROTOBUF_EXPORT bool wcs_to_mbs(
+    const wchar_t* s, std::string* out, bool outUtf8);
+
+// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16.
+LIBPROTOBUF_EXPORT bool mbs_to_wcs(
+    const char* s, std::wstring* out, bool inUtf8);
+
+// Convert from UTF-8-encoded text to UTF-16.
+LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out);
+
+// Convert from UTF-16-encoded text to UTF-8.
+LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out);
+
+}  // namespace strings
 
 }  // namespace win32
 }  // namespace internal

+ 147 - 67
src/google/protobuf/stubs/io_win32_unittest.cc

@@ -30,7 +30,8 @@
 
 // Author: laszlocsomor@google.com (Laszlo Csomor)
 //
-// Unit tests for long-path-aware open/mkdir/access on Windows.
+// Unit tests for long-path-aware open/mkdir/access/etc. on Windows, as well as
+// for the supporting utility functions.
 //
 // This file is only used on Windows, it's empty on other platforms.
 
@@ -48,7 +49,6 @@
 
 #include <google/protobuf/stubs/io_win32.h>
 #include <google/protobuf/stubs/scoped_ptr.h>
-#include <google/protobuf/testing/googletest.h>
 #include <gtest/gtest.h>
 
 #include <memory>
@@ -61,6 +61,27 @@ namespace internal {
 namespace win32 {
 namespace {
 
+const char kUtf8Text[] = {
+    'h', 'i', ' ',
+    // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F
+    0xd0, 0x9f,
+    // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440
+    0xd1, 0x80,
+    // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438
+    0xd0, 0xb8,
+    // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432
+    0xd0, 0xb2,
+    // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435
+    0xd0, 0xb5,
+    // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442
+    0xd1, 0x82, 0
+};
+
+const wchar_t kUtf16Text[] = {
+  L'h', L'i', L' ',
+  L'\x41f', L'\x440', L'\x438', L'\x432', L'\x435', L'\x442', 0
+};
+
 using std::string;
 using std::wstring;
 
@@ -73,6 +94,7 @@ class IoWin32Test : public ::testing::Test {
   bool CreateAllUnder(wstring path);
   bool DeleteAllUnder(wstring path);
 
+  WCHAR working_directory[MAX_PATH];
   string test_tmpdir;
   wstring wtest_tmpdir;
 };
@@ -89,71 +111,88 @@ void StripTrailingSlashes(string* str) {
   for (; i >= 0 && ((*str)[i] == '/' || (*str)[i] == '\\'); --i) {}
   str->resize(i+1);
 }
+
+bool GetEnvVarAsUtf8(const WCHAR* name, string* result) {
+  DWORD size = ::GetEnvironmentVariableW(name, NULL, 0);
+  if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) {
+    scoped_array<WCHAR> wcs(new WCHAR[size]);
+    ::GetEnvironmentVariableW(name, wcs.get(), size);
+    // GetEnvironmentVariableA retrieves an Active-Code-Page-encoded text which
+    // we'd first need to convert to UTF-16 then to UTF-8, because there seems
+    // to be no API function to do that conversion directly.
+    // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need
+    // to convert to UTF-8.
+    return strings::wcs_to_utf8(wcs.get(), result);
+  } else {
+    return false;
+  }
+}
+
+bool GetCwdAsUtf8(string* result) {
+  DWORD size = ::GetCurrentDirectoryW(0, NULL);
+  if (size > 0) {
+    scoped_array<WCHAR> wcs(new WCHAR[size]);
+    ::GetCurrentDirectoryW(size, wcs.get());
+    // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which
+    // we'd first need to convert to UTF-16 then to UTF-8, because there seems
+    // to be no API function to do that conversion directly.
+    // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need
+    // to convert to UTF-8.
+    return strings::wcs_to_utf8(wcs.get(), result);
+  } else {
+    return false;
+  }
+}
+
 }  // namespace
 
 void IoWin32Test::SetUp() {
-  test_tmpdir = string(TestTempDir());
+  test_tmpdir.clear();
   wtest_tmpdir.clear();
-  if (test_tmpdir.empty()) {
-    const char* test_tmpdir_env = getenv("TEST_TMPDIR");
-    if (test_tmpdir_env != NULL && *test_tmpdir_env) {
-      test_tmpdir = string(test_tmpdir_env);
-    }
-
-    // Only Bazel defines TEST_TMPDIR, CMake does not, so look for other
-    // suitable environment variables.
-    if (test_tmpdir.empty()) {
-      static const char* names[] = {"TEMP", "TMP"};
-      for (int i = 0; i < sizeof(names)/sizeof(names[0]); ++i) {
-        const char* name = names[i];
-        test_tmpdir_env = getenv(name);
-        if (test_tmpdir_env != NULL && *test_tmpdir_env) {
-          test_tmpdir = string(test_tmpdir_env);
-          break;
-        }
-      }
-    }
+  EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, working_directory), 0);
 
-    // No other temp directory was found. Use the current director
-    if (test_tmpdir.empty()) {
-      char buffer[MAX_PATH];
-      // Use GetCurrentDirectoryA instead of GetCurrentDirectoryW, because the
-      // current working directory must always be shorter than MAX_PATH, even
-      // with
-      // "\\?\" prefix (except on Windows 10 version 1607 and beyond, after
-      // opting in to long paths by default [1]).
-      //
-      // [1] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
-      DWORD result = ::GetCurrentDirectoryA(MAX_PATH, buffer);
-      if (result > 0) {
-        test_tmpdir = string(buffer);
-      } else {
-        // Using assertions in SetUp/TearDown seems to confuse the test
-        // framework, so just leave the member variables empty in case of
-        // failure.
-        GOOGLE_CHECK_OK(false);
-        return;
-      }
-    }
+  string tmp;
+  bool ok = false;
+  if (!ok) {
+    // Bazel sets this environment variable when it runs tests.
+    ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp);
   }
-
-  StripTrailingSlashes(&test_tmpdir);
-  test_tmpdir += "\\io_win32_unittest.tmp";
-
-  // CreateDirectoryA's limit is 248 chars, see MSDN.
-  // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363855(v=vs.85).aspx
-  wtest_tmpdir = testonly_path_to_winpath(test_tmpdir);
-  if (!DeleteAllUnder(wtest_tmpdir) || !CreateAllUnder(wtest_tmpdir)) {
-    GOOGLE_CHECK_OK(false);
-    test_tmpdir.clear();
-    wtest_tmpdir.clear();
+  if (!ok) {
+    // Bazel 0.8.0 sets this environment for every build and test action.
+    ok = GetEnvVarAsUtf8(L"TEMP", &tmp);
   }
+  if (!ok) {
+    // Bazel 0.8.0 sets this environment for every build and test action.
+    ok = GetEnvVarAsUtf8(L"TMP", &tmp);
+  }
+  if (!ok) {
+    // Fall back to using the current directory.
+    ok = GetCwdAsUtf8(&tmp);
+  }
+  if (!ok || tmp.empty()) {
+    FAIL() << "Cannot find a temp directory.";
+  }
+
+  StripTrailingSlashes(&tmp);
+  std::stringstream result;
+  // Deleting files and directories is asynchronous on Windows, and if TearDown
+  // just deleted the previous temp directory, sometimes we cannot recreate the
+  // same directory.
+  // Use a counter so every test method gets its own temp directory.
+  static unsigned int counter = 0;
+  result << tmp << "\\w32tst" << counter++ << ".tmp";
+  test_tmpdir = result.str();
+  wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str());
+  ASSERT_FALSE(wtest_tmpdir.empty());
+  ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir));
+  ASSERT_TRUE(CreateAllUnder(wtest_tmpdir));
 }
 
 void IoWin32Test::TearDown() {
   if (!wtest_tmpdir.empty()) {
     DeleteAllUnder(wtest_tmpdir);
   }
+  ::SetCurrentDirectoryW(working_directory);
 }
 
 bool IoWin32Test::CreateAllUnder(wstring path) {
@@ -191,8 +230,8 @@ bool IoWin32Test::DeleteAllUnder(wstring path) {
     path = wstring(L"\\\\?\\") + path;
   }
   // Append "\" if necessary.
-  if (path[path.size() - 1] != '\\') {
-    path.push_back('\\');
+  if (path[path.size() - 1] != L'\\') {
+    path.push_back(L'\\');
   }
 
   WIN32_FIND_DATAW metadata;
@@ -309,13 +348,24 @@ TEST_F(IoWin32Test, MkdirTest) {
   ASSERT_EQ(errno, ENOENT);
 }
 
+TEST_F(IoWin32Test, MkdirTestNonAscii) {
+  ASSERT_INITIALIZED;
+
+  // Create a non-ASCII path.
+  // Ensure that we can create the directory using SetCurrentDirectoryW.
+  EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1").c_str(), NULL));
+  EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1\\" + kUtf16Text).c_str(), NULL));
+  // Ensure that we can create a very similarly named directory using mkdir.
+  // We don't attemp to delete and recreate the same directory, because on
+  // Windows, deleting files and directories seems to be asynchronous.
+  EXPECT_EQ(mkdir((test_tmpdir + "\\2").c_str(), 0644), 0);
+  EXPECT_EQ(mkdir((test_tmpdir + "\\2\\" + kUtf8Text).c_str(), 0644), 0);
+}
+
 TEST_F(IoWin32Test, ChdirTest) {
-  char owd[MAX_PATH];
-  EXPECT_GT(::GetCurrentDirectoryA(MAX_PATH, owd), 0);
   string path("C:\\");
   EXPECT_EQ(access(path.c_str(), F_OK), 0);
   ASSERT_EQ(chdir(path.c_str()), 0);
-  EXPECT_TRUE(::SetCurrentDirectoryA(owd));
 
   // Do not try to chdir into the test_tmpdir, it may already contain directory
   // names with trailing dots.
@@ -330,17 +380,37 @@ TEST_F(IoWin32Test, ChdirTest) {
   ASSERT_NE(chdir(path.c_str()), 0);
 }
 
+TEST_F(IoWin32Test, ChdirTestNonAscii) {
+  ASSERT_INITIALIZED;
+
+  // Create a directory with a non-ASCII path and ensure we can cd into it.
+  wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text);
+  string nonAscii;
+  EXPECT_TRUE(strings::wcs_to_utf8(wNonAscii.c_str(), &nonAscii));
+  EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), NULL));
+  WCHAR cwd[MAX_PATH];
+  EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd));
+  // Ensure that we can cd into the path using SetCurrentDirectoryW.
+  EXPECT_TRUE(SetCurrentDirectoryW(wNonAscii.c_str()));
+  EXPECT_TRUE(SetCurrentDirectoryW(cwd));
+  // Ensure that we can cd into the path using chdir.
+  ASSERT_EQ(chdir(nonAscii.c_str()), 0);
+  // Ensure that the GetCurrentDirectoryW returns the desired path.
+  EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd));
+  ASSERT_EQ(wNonAscii, cwd);
+}
+
 TEST_F(IoWin32Test, AsWindowsPathTest) {
   DWORD size = GetCurrentDirectoryW(0, NULL);
   scoped_array<wchar_t> cwd_str(new wchar_t[size]);
   EXPECT_GT(GetCurrentDirectoryW(size, cwd_str.get()), 0);
   wstring cwd = wstring(L"\\\\?\\") + cwd_str.get();
 
-  ASSERT_EQ(testonly_path_to_winpath("relative_mkdirtest"),
+  ASSERT_EQ(testonly_utf8_to_winpath("relative_mkdirtest"),
             cwd + L"\\relative_mkdirtest");
-  ASSERT_EQ(testonly_path_to_winpath("preserve//\\trailing///"),
+  ASSERT_EQ(testonly_utf8_to_winpath("preserve//\\trailing///"),
             cwd + L"\\preserve\\trailing\\");
-  ASSERT_EQ(testonly_path_to_winpath("./normalize_me\\/../blah"),
+  ASSERT_EQ(testonly_utf8_to_winpath("./normalize_me\\/../blah"),
             cwd + L"\\blah");
   std::ostringstream relpath;
   for (wchar_t* p = cwd_str.get(); *p; ++p) {
@@ -349,18 +419,28 @@ TEST_F(IoWin32Test, AsWindowsPathTest) {
     }
   }
   relpath << ".\\/../\\./beyond-toplevel";
-  ASSERT_EQ(testonly_path_to_winpath(relpath.str()),
+  ASSERT_EQ(testonly_utf8_to_winpath(relpath.str().c_str()),
             wstring(L"\\\\?\\") + cwd_str.get()[0] + L":\\beyond-toplevel");
 
   // Absolute unix paths lack drive letters, driveless absolute windows paths
   // do too. Neither can be converted to a drive-specifying absolute Windows
   // path.
-  ASSERT_EQ(testonly_path_to_winpath("/absolute/unix/path"), L"");
+  ASSERT_EQ(testonly_utf8_to_winpath("/absolute/unix/path"), L"");
   // Though valid on Windows, we also don't support UNC paths (\\UNC\\blah).
-  ASSERT_EQ(testonly_path_to_winpath("\\driveless\\absolute"), L"");
+  ASSERT_EQ(testonly_utf8_to_winpath("\\driveless\\absolute"), L"");
   // Though valid in cmd.exe, drive-relative paths are not supported.
-  ASSERT_EQ(testonly_path_to_winpath("c:foo"), L"");
-  ASSERT_EQ(testonly_path_to_winpath("c:/foo"), L"\\\\?\\c:\\foo");
+  ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L"");
+  ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo");
+  ASSERT_EQ(testonly_utf8_to_winpath("\\\\?\\C:\\foo"), L"\\\\?\\C:\\foo");
+}
+
+TEST_F(IoWin32Test, Utf8Utf16ConversionTest) {
+  string mbs;
+  wstring wcs;
+  ASSERT_TRUE(strings::utf8_to_wcs(kUtf8Text, &wcs));
+  ASSERT_TRUE(strings::wcs_to_utf8(kUtf16Text, &mbs));
+  ASSERT_EQ(wcs, kUtf16Text);
+  ASSERT_EQ(mbs, kUtf8Text);
 }
 
 }  // namespace