[m-rev.] for review: unicode paths on windows

Peter Wang novalazy at gmail.com
Wed Dec 14 13:04:33 AEDT 2011


Branches: main, 11.07

Support Unicode file paths on Windows with C back-ends.

This works by calling the wide-character versions of file functions,
e.g. _wfopen instead of fopen.  To do so we have to convert between the
internal string representation (UTF-8) and that used by the Windows API
(UTF-16).

library/io.m:
	Add functions to convert path string representation on Windows.

	Call Unicode-aware functions in io.open_input, etc.

	Call Unicode-aware functions in io.file_modification_time,
	io.file_type, io.check_file_accessibility, io.file_id, io.make_temp,
	io.remove_file, io.rename_file.

library/dir.m:
	Call Unicode-aware functions in dir.make_single_directory and
	fold predicates.

diff --git a/library/dir.m b/library/dir.m
index 984fb1d..3c4a4a0 100644
--- a/library/dir.m
+++ b/library/dir.m
@@ -1092,7 +1092,7 @@ dir.make_single_directory(DirName, Result, !IO) :-
         may_not_duplicate],
 "
 #if defined(MR_WIN32)
-    if (CreateDirectory(DirName, NULL)) {
+    if (CreateDirectoryW(ML_utf8_to_wide(DirName), NULL)) {
         Result = ML_make_mkdir_res_ok();
     } else {
         int error;
@@ -1627,16 +1627,17 @@ dir.open(DirName, Res, !IO) :-
         terminates, will_not_modify_trail, does_not_affect_liveness],
 "
 #if defined(MR_WIN32)
-    WIN32_FIND_DATA file_data;
-    ML_DIR_STREAM Dir;
-    LPTSTR FirstFileName;
-    char *dir_pattern;
-    MR_Integer is_readable;
+    WIN32_FIND_DATAW    file_data;
+    ML_DIR_STREAM       Dir;
+    LPTSTR              FirstFileName;
+    char                *dir_pattern;
+    MR_Integer          is_readable;
+    char                *filename;
 
     ML_check_dir_readable(DirName, &is_readable, &Result);
     if (is_readable) {
         dir_pattern = ML_make_path_name(DirName, MR_make_string_const(""*""));
-        Dir = FindFirstFile(dir_pattern, &file_data);
+        Dir = FindFirstFileW(ML_utf8_to_wide(dir_pattern), &file_data);
         if (Dir == INVALID_HANDLE_VALUE) {
             int error = GetLastError();
             if (error == ERROR_NO_MORE_FILES) {
@@ -1645,8 +1646,8 @@ dir.open(DirName, Res, !IO) :-
                 ML_make_dir_open_result_error(error, &Result);
             }
         } else {
-            ML_make_win32_dir_open_result_ok(Dir,
-                (MR_Word) file_data.cFileName, &Result);
+            filename = ML_wide_to_utf8(file_data.cFileName, MR_ALLOC_ID);
+            ML_make_win32_dir_open_result_ok(Dir, filename, &Result);
         }
     }
 
@@ -1768,14 +1769,13 @@ dir.check_dir_readable(DirName, IsReadable, Result, !IO) :-
 dir.read_first_entry(Dir, Result, !IO) :-
     dir.read_entry(Dir, Result, !IO).
 
-:- pred make_win32_dir_open_result_ok(dir.stream::in, c_pointer::in,
+:- pred make_win32_dir_open_result_ok(dir.stream::in, string::in,
     io.result({dir.stream, string})::out, io::di, io::uo) is det.
 :- pragma foreign_export("C",
     make_win32_dir_open_result_ok(in, in, out, di, uo),
     "ML_make_win32_dir_open_result_ok").
 
-make_win32_dir_open_result_ok(Dir, FirstFilePtr, Result, !IO) :-
-    FirstFile0 = copy_c_string(FirstFilePtr),
+make_win32_dir_open_result_ok(Dir, FirstFile0, Result, !IO) :-
     (
         ( FirstFile0 = dir.this_directory
         ; FirstFile0 = dir.parent_directory
@@ -1800,27 +1800,6 @@ make_win32_dir_open_result_ok(Dir, FirstFilePtr, Result, !IO) :-
         Result = ok({Dir, FirstFile0})
     ).
 
-    % This is needed because the heap pointer is not valid in the
-    % `may_call_mercury' foreign proc for dir.open_2. Instead, we pass it
-    % as a c_pointer to copy_c_string, which doesn't call Mercury, so the
-    % heap pointer is valid. Passing it as a c_pointer avoids having the
-    % accurate garbage collector attempt to copy a potentially unaligned
-    % string.
-    %
-:- func copy_c_string(c_pointer) = string.
-
-copy_c_string(_) = _ :-
-    error("dir.copy_c_string should only be called " ++
-        "by code generated by C backends").
-
-:- pragma foreign_proc("C",
-    copy_c_string(Ptr::in) = (Str::out),
-    [will_not_call_mercury, promise_pure, thread_safe,
-        will_not_modify_trail, does_not_affect_liveness],
-"
-    MR_make_aligned_string_copy_msg(Str, (char *) Ptr, MR_ALLOC_ID);
-").
-
 :- func make_dir_open_result_eof = io.result({dir.stream, string}).
 :- pragma foreign_export("C", (make_dir_open_result_eof = out),
     "ML_make_dir_open_result_eof").
@@ -1945,13 +1924,12 @@ dir.read_entry(Dir0, Res, !IO) :-
         will_not_modify_trail, does_not_affect_liveness],
 "
 #if defined(MR_WIN32)
-    WIN32_FIND_DATA file_data;
+    WIN32_FIND_DATAW file_data;
 
     Dir = Dir0;
-    if (FindNextFile(Dir, &file_data)) {
+    if (FindNextFileW(Dir, &file_data)) {
         Status = 1;
-        MR_make_aligned_string_copy_msg(FileName, file_data.cFileName,
-            MR_ALLOC_ID);
+        FileName = ML_wide_to_utf8(file_data.cFileName, MR_ALLOC_ID);
     } else {
         Error = GetLastError();
         Status = (Error == ERROR_NO_MORE_FILES ? -1 : 0);
diff --git a/library/io.m b/library/io.m
index 8d63129..8efa737 100644
--- a/library/io.m
+++ b/library/io.m
@@ -2836,8 +2836,15 @@ io.file_modification_time(File, Result, !IO) :-
         does_not_affect_liveness, no_sharing],
 "
 #ifdef MR_HAVE_STAT
+  #ifdef MR_WIN32
+    struct _stat s;
+    int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+  #else
     struct stat s;
-    if (stat(FileName, &s) == 0) {
+    int stat_result = stat(FileName, &s);
+  #endif
+
+    if (stat_result == 0) {
         Time = ML_construct_time_t(s.st_mtime);
         Msg = MR_string_const("""", 0);
         Status = 1;
@@ -2852,6 +2859,7 @@ io.file_modification_time(File, Result, !IO) :-
         ""io.file_modification_time not available on this platform"");
 #endif
 ").
+
 :- pragma foreign_proc("C#",
     io.file_modification_time_2(FileName::in, Status::out, Msg::out,
         Time::out, _IO0::di, _IO::uo),
@@ -2979,8 +2987,12 @@ file_type_implemented :-
         does_not_affect_liveness, no_sharing],
 "
 #ifdef MR_HAVE_STAT
+  #ifdef MR_WIN32
+    struct _stat s;
+    int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+  #else
     struct stat s;
-    int         stat_result;
+    int stat_result;
 
     if (FollowSymLinks == 1) {
         stat_result = stat(FileName, &s);
@@ -2991,6 +3003,7 @@ file_type_implemented :-
             stat_result = stat(FileName, &s);
         #endif
     }
+  #endif
 
     if (stat_result == 0) {
         MR_Word type;
@@ -3363,7 +3376,12 @@ io.check_file_accessibility(FileName, AccessTypes, Result, !IO) :-
   #endif
     }
 
+  #ifdef MR_WIN32
+    access_result = _waccess(ML_utf8_to_wide(FileName), mode);
+  #else
     access_result = access(FileName, mode);
+  #endif
+
     if (access_result == 0) {
         Result = ML_make_io_res_0_ok();
     } else {
@@ -3915,9 +3933,15 @@ io.file_id(FileName, Result, !IO) :-
         does_not_affect_liveness, no_sharing],
 "
 #ifdef MR_HAVE_STAT
+  #ifdef MR_WIN32
+    struct _stat s;
+    int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+  #else
     struct stat s;
+    int stat_result = stat(FileName, &s);
+  #endif
 
-    if (stat(FileName, &s) == 0) {
+    if (stat_result == 0) {
         FileId.device = s.st_dev;
         FileId.inode = s.st_ino;
         Msg = MR_string_const("""", 0);
@@ -5384,7 +5408,7 @@ io.progname_base(DefaultName, PrognameBase, !IO) :-
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
 
-% environment interface predicates
+% Environment interface predicates
 
 :- pragma promise_pure(io.get_environment_var/4).
 
@@ -5684,6 +5708,12 @@ void            mercury_print_string(MercuryFilePtr mf, const char *s);
 int             mercury_get_byte(MercuryFilePtr mf);
 void            mercury_close(MercuryFilePtr mf);
 int             ML_fprintf(MercuryFilePtr mf, const char *format, ...);
+
+#ifdef MR_WIN32
+    wchar_t     *ML_utf8_to_wide(const char *s);
+    char        *ML_wide_to_utf8(const wchar_t *ws,
+                    MR_AllocSiteInfoPtr alloc_id);
+#endif
 ").
 
 :- pragma foreign_code("C#", "
@@ -6908,7 +6938,12 @@ mercury_open(const char *filename, const char *openmode,
     MercuryFilePtr  mf;
     FILE            *f;
 
+#ifdef MR_WIN32
+    f = _wfopen(ML_utf8_to_wide(filename), ML_utf8_to_wide(openmode));
+#else
     f = fopen(filename, openmode);
+#endif
+
     if (f == NULL) {
         return NULL;
     }
@@ -7403,6 +7438,50 @@ ML_fprintf(MercuryFilePtr mf, const char *format, ...)
 }
 ").
 
+:- pragma foreign_code("C", "
+#ifdef MR_WIN32
+
+/*
+** Accessing Unicode file names on Windows requires that we use the functions
+** taking wide character strings.
+*/
+wchar_t *
+ML_utf8_to_wide(const char *s)
+{
+    int     wslen;
+    wchar_t *ws;
+
+    wslen = MultiByteToWideChar(CP_UTF8, 0, s, -1, NULL, 0);
+    if (wslen == 0) {
+        MR_fatal_error(""ML_utf8_to_wide: MultiByteToWideChar failed"");
+    }
+    ws = MR_GC_NEW_ARRAY(wchar_t, wslen);
+    if (0 == MultiByteToWideChar(CP_UTF8, 0, s, -1, ws, wslen)) {
+        MR_fatal_error(""ML_utf8_to_wide: MultiByteToWideChar failed"");
+    }
+    return ws;
+}
+
+char *
+ML_wide_to_utf8(const wchar_t *ws, MR_AllocSiteInfoPtr alloc_id)
+{
+    char    *s;
+    int     bytes;
+
+    bytes = WideCharToMultiByte(CP_UTF8, 0, ws, -1, NULL, 0, NULL, NULL);
+    if (bytes == 0) {
+        MR_fatal_error(""ML_wide_to_utf8: WideCharToMultiByte failed"");
+    }
+    MR_allocate_aligned_string_msg(s, bytes, alloc_id);
+    if (0 == WideCharToMultiByte(CP_UTF8, 0, ws, -1, s, bytes, NULL, NULL)) {
+        MR_fatal_error(""ML_wide_to_utf8: WideCharToMultiByte failed"");
+    }
+    return s;
+}
+
+#endif /* MR_WIN32 */
+").
+
 %----------------------------------------------------------------------------%
 %
 % Input predicates
@@ -9941,7 +10020,7 @@ command_line_argument(_, "") :-
     }
 ").
 
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
 
 % io.getenv and io.setenv.
 
@@ -10100,7 +10179,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
         true
     ).
 
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
 
 :- pred io.do_make_temp(string::in, string::in, string::in,
     string::out, int::out, string::out, io::di, io::uo) is det.
@@ -10164,6 +10243,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
     int     len, err, fd, num_tries;
     char    countstr[256];
     MR_Word filename_word;
+    int     flags;
 
     len = strlen(Dir) + 1 + 5 + 3 + 1 + 3 + 1;
     /* Dir + / + Prefix + counter_high + . + counter_low + \\0 */
@@ -10183,8 +10263,13 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
         strncat(FileName, countstr, 3);
         strcat(FileName, ""."");
         strncat(FileName, countstr + 3, 3);
+        flags = O_WRONLY | O_CREAT | O_EXCL;
         do {
-            fd = open(FileName, O_WRONLY | O_CREAT | O_EXCL, 0600);
+            #ifdef MR_WIN32
+                fd = _wopen(ML_utf8_to_wide(FileName), flags, 0600);
+            #else
+                fd = open(FileName, flags, 0600);
+            #endif
         } while (fd == -1 && MR_is_eintr(errno));
         num_tries++;
         ML_io_tempnam_counter += (1 << num_tries);
@@ -10349,7 +10434,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
         end.
 ").
 
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
 
 :- pragma foreign_decl("C", "
 
@@ -10488,7 +10573,11 @@ io.remove_file(FileName, Result, !IO) :-
     [will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
         does_not_affect_liveness, no_sharing],
 "
+#ifdef MR_WIN32
+    RetVal = _wremove(ML_utf8_to_wide(FileName));
+#else
     RetVal = remove(FileName);
+#endif
     ML_maybe_make_err_msg(RetVal != 0, errno, ""remove failed: "",
         MR_ALLOC_ID, MR_TRUE, RetStr);
 ").
@@ -10626,7 +10715,12 @@ io.rename_file(OldFileName, NewFileName, Result, IO0, IO) :-
     [will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
         does_not_affect_liveness, no_sharing],
 "
+#ifdef MR_WIN32
+    RetVal = _wrename(ML_utf8_to_wide(OldFileName),
+        ML_utf8_to_wide(NewFileName));
+#else
     RetVal = rename(OldFileName, NewFileName);
+#endif
     ML_maybe_make_err_msg(RetVal != 0, errno, ""rename failed: "",
         MR_ALLOC_ID, MR_TRUE, RetStr);
 ").

--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list