[m-rev.] for review: unicode paths on windows
Peter Wang
novalazy at gmail.com
Wed Dec 14 13:04:33 AEDT 2011
Branches: main, 11.07
Support Unicode file paths on Windows with C back-ends.
This works by calling the wide-character versions of file functions,
e.g. _wfopen instead of fopen. To do so we have to convert between the
internal string representation (UTF-8) and that used by the Windows API
(UTF-16).
library/io.m:
Add functions to convert path string representation on Windows.
Call Unicode-aware functions in io.open_input, etc.
Call Unicode-aware functions in io.file_modification_time,
io.file_type, io.check_file_accessibility, io.file_id, io.make_temp,
io.remove_file, io.rename_file.
library/dir.m:
Call Unicode-aware functions in dir.make_single_directory and
fold predicates.
diff --git a/library/dir.m b/library/dir.m
index 984fb1d..3c4a4a0 100644
--- a/library/dir.m
+++ b/library/dir.m
@@ -1092,7 +1092,7 @@ dir.make_single_directory(DirName, Result, !IO) :-
may_not_duplicate],
"
#if defined(MR_WIN32)
- if (CreateDirectory(DirName, NULL)) {
+ if (CreateDirectoryW(ML_utf8_to_wide(DirName), NULL)) {
Result = ML_make_mkdir_res_ok();
} else {
int error;
@@ -1627,16 +1627,17 @@ dir.open(DirName, Res, !IO) :-
terminates, will_not_modify_trail, does_not_affect_liveness],
"
#if defined(MR_WIN32)
- WIN32_FIND_DATA file_data;
- ML_DIR_STREAM Dir;
- LPTSTR FirstFileName;
- char *dir_pattern;
- MR_Integer is_readable;
+ WIN32_FIND_DATAW file_data;
+ ML_DIR_STREAM Dir;
+ LPTSTR FirstFileName;
+ char *dir_pattern;
+ MR_Integer is_readable;
+ char *filename;
ML_check_dir_readable(DirName, &is_readable, &Result);
if (is_readable) {
dir_pattern = ML_make_path_name(DirName, MR_make_string_const(""*""));
- Dir = FindFirstFile(dir_pattern, &file_data);
+ Dir = FindFirstFileW(ML_utf8_to_wide(dir_pattern), &file_data);
if (Dir == INVALID_HANDLE_VALUE) {
int error = GetLastError();
if (error == ERROR_NO_MORE_FILES) {
@@ -1645,8 +1646,8 @@ dir.open(DirName, Res, !IO) :-
ML_make_dir_open_result_error(error, &Result);
}
} else {
- ML_make_win32_dir_open_result_ok(Dir,
- (MR_Word) file_data.cFileName, &Result);
+ filename = ML_wide_to_utf8(file_data.cFileName, MR_ALLOC_ID);
+ ML_make_win32_dir_open_result_ok(Dir, filename, &Result);
}
}
@@ -1768,14 +1769,13 @@ dir.check_dir_readable(DirName, IsReadable, Result, !IO) :-
dir.read_first_entry(Dir, Result, !IO) :-
dir.read_entry(Dir, Result, !IO).
-:- pred make_win32_dir_open_result_ok(dir.stream::in, c_pointer::in,
+:- pred make_win32_dir_open_result_ok(dir.stream::in, string::in,
io.result({dir.stream, string})::out, io::di, io::uo) is det.
:- pragma foreign_export("C",
make_win32_dir_open_result_ok(in, in, out, di, uo),
"ML_make_win32_dir_open_result_ok").
-make_win32_dir_open_result_ok(Dir, FirstFilePtr, Result, !IO) :-
- FirstFile0 = copy_c_string(FirstFilePtr),
+make_win32_dir_open_result_ok(Dir, FirstFile0, Result, !IO) :-
(
( FirstFile0 = dir.this_directory
; FirstFile0 = dir.parent_directory
@@ -1800,27 +1800,6 @@ make_win32_dir_open_result_ok(Dir, FirstFilePtr, Result, !IO) :-
Result = ok({Dir, FirstFile0})
).
- % This is needed because the heap pointer is not valid in the
- % `may_call_mercury' foreign proc for dir.open_2. Instead, we pass it
- % as a c_pointer to copy_c_string, which doesn't call Mercury, so the
- % heap pointer is valid. Passing it as a c_pointer avoids having the
- % accurate garbage collector attempt to copy a potentially unaligned
- % string.
- %
-:- func copy_c_string(c_pointer) = string.
-
-copy_c_string(_) = _ :-
- error("dir.copy_c_string should only be called " ++
- "by code generated by C backends").
-
-:- pragma foreign_proc("C",
- copy_c_string(Ptr::in) = (Str::out),
- [will_not_call_mercury, promise_pure, thread_safe,
- will_not_modify_trail, does_not_affect_liveness],
-"
- MR_make_aligned_string_copy_msg(Str, (char *) Ptr, MR_ALLOC_ID);
-").
-
:- func make_dir_open_result_eof = io.result({dir.stream, string}).
:- pragma foreign_export("C", (make_dir_open_result_eof = out),
"ML_make_dir_open_result_eof").
@@ -1945,13 +1924,12 @@ dir.read_entry(Dir0, Res, !IO) :-
will_not_modify_trail, does_not_affect_liveness],
"
#if defined(MR_WIN32)
- WIN32_FIND_DATA file_data;
+ WIN32_FIND_DATAW file_data;
Dir = Dir0;
- if (FindNextFile(Dir, &file_data)) {
+ if (FindNextFileW(Dir, &file_data)) {
Status = 1;
- MR_make_aligned_string_copy_msg(FileName, file_data.cFileName,
- MR_ALLOC_ID);
+ FileName = ML_wide_to_utf8(file_data.cFileName, MR_ALLOC_ID);
} else {
Error = GetLastError();
Status = (Error == ERROR_NO_MORE_FILES ? -1 : 0);
diff --git a/library/io.m b/library/io.m
index 8d63129..8efa737 100644
--- a/library/io.m
+++ b/library/io.m
@@ -2836,8 +2836,15 @@ io.file_modification_time(File, Result, !IO) :-
does_not_affect_liveness, no_sharing],
"
#ifdef MR_HAVE_STAT
+ #ifdef MR_WIN32
+ struct _stat s;
+ int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+ #else
struct stat s;
- if (stat(FileName, &s) == 0) {
+ int stat_result = stat(FileName, &s);
+ #endif
+
+ if (stat_result == 0) {
Time = ML_construct_time_t(s.st_mtime);
Msg = MR_string_const("""", 0);
Status = 1;
@@ -2852,6 +2859,7 @@ io.file_modification_time(File, Result, !IO) :-
""io.file_modification_time not available on this platform"");
#endif
").
+
:- pragma foreign_proc("C#",
io.file_modification_time_2(FileName::in, Status::out, Msg::out,
Time::out, _IO0::di, _IO::uo),
@@ -2979,8 +2987,12 @@ file_type_implemented :-
does_not_affect_liveness, no_sharing],
"
#ifdef MR_HAVE_STAT
+ #ifdef MR_WIN32
+ struct _stat s;
+ int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+ #else
struct stat s;
- int stat_result;
+ int stat_result;
if (FollowSymLinks == 1) {
stat_result = stat(FileName, &s);
@@ -2991,6 +3003,7 @@ file_type_implemented :-
stat_result = stat(FileName, &s);
#endif
}
+ #endif
if (stat_result == 0) {
MR_Word type;
@@ -3363,7 +3376,12 @@ io.check_file_accessibility(FileName, AccessTypes, Result, !IO) :-
#endif
}
+ #ifdef MR_WIN32
+ access_result = _waccess(ML_utf8_to_wide(FileName), mode);
+ #else
access_result = access(FileName, mode);
+ #endif
+
if (access_result == 0) {
Result = ML_make_io_res_0_ok();
} else {
@@ -3915,9 +3933,15 @@ io.file_id(FileName, Result, !IO) :-
does_not_affect_liveness, no_sharing],
"
#ifdef MR_HAVE_STAT
+ #ifdef MR_WIN32
+ struct _stat s;
+ int stat_result = _wstat(ML_utf8_to_wide(FileName), &s);
+ #else
struct stat s;
+ int stat_result = stat(FileName, &s);
+ #endif
- if (stat(FileName, &s) == 0) {
+ if (stat_result == 0) {
FileId.device = s.st_dev;
FileId.inode = s.st_ino;
Msg = MR_string_const("""", 0);
@@ -5384,7 +5408,7 @@ io.progname_base(DefaultName, PrognameBase, !IO) :-
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
-% environment interface predicates
+% Environment interface predicates
:- pragma promise_pure(io.get_environment_var/4).
@@ -5684,6 +5708,12 @@ void mercury_print_string(MercuryFilePtr mf, const char *s);
int mercury_get_byte(MercuryFilePtr mf);
void mercury_close(MercuryFilePtr mf);
int ML_fprintf(MercuryFilePtr mf, const char *format, ...);
+
+#ifdef MR_WIN32
+ wchar_t *ML_utf8_to_wide(const char *s);
+ char *ML_wide_to_utf8(const wchar_t *ws,
+ MR_AllocSiteInfoPtr alloc_id);
+#endif
").
:- pragma foreign_code("C#", "
@@ -6908,7 +6938,12 @@ mercury_open(const char *filename, const char *openmode,
MercuryFilePtr mf;
FILE *f;
+#ifdef MR_WIN32
+ f = _wfopen(ML_utf8_to_wide(filename), ML_utf8_to_wide(openmode));
+#else
f = fopen(filename, openmode);
+#endif
+
if (f == NULL) {
return NULL;
}
@@ -7403,6 +7438,50 @@ ML_fprintf(MercuryFilePtr mf, const char *format, ...)
}
").
+:- pragma foreign_code("C", "
+#ifdef MR_WIN32
+
+/*
+** Accessing Unicode file names on Windows requires that we use the functions
+** taking wide character strings.
+*/
+wchar_t *
+ML_utf8_to_wide(const char *s)
+{
+ int wslen;
+ wchar_t *ws;
+
+ wslen = MultiByteToWideChar(CP_UTF8, 0, s, -1, NULL, 0);
+ if (wslen == 0) {
+ MR_fatal_error(""ML_utf8_to_wide: MultiByteToWideChar failed"");
+ }
+ ws = MR_GC_NEW_ARRAY(wchar_t, wslen);
+ if (0 == MultiByteToWideChar(CP_UTF8, 0, s, -1, ws, wslen)) {
+ MR_fatal_error(""ML_utf8_to_wide: MultiByteToWideChar failed"");
+ }
+ return ws;
+}
+
+char *
+ML_wide_to_utf8(const wchar_t *ws, MR_AllocSiteInfoPtr alloc_id)
+{
+ char *s;
+ int bytes;
+
+ bytes = WideCharToMultiByte(CP_UTF8, 0, ws, -1, NULL, 0, NULL, NULL);
+ if (bytes == 0) {
+ MR_fatal_error(""ML_wide_to_utf8: WideCharToMultiByte failed"");
+ }
+ MR_allocate_aligned_string_msg(s, bytes, alloc_id);
+ if (0 == WideCharToMultiByte(CP_UTF8, 0, ws, -1, s, bytes, NULL, NULL)) {
+ MR_fatal_error(""ML_wide_to_utf8: WideCharToMultiByte failed"");
+ }
+ return s;
+}
+
+#endif /* MR_WIN32 */
+").
+
%----------------------------------------------------------------------------%
%
% Input predicates
@@ -9941,7 +10020,7 @@ command_line_argument(_, "") :-
}
").
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
% io.getenv and io.setenv.
@@ -10100,7 +10179,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
true
).
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
:- pred io.do_make_temp(string::in, string::in, string::in,
string::out, int::out, string::out, io::di, io::uo) is det.
@@ -10164,6 +10243,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
int len, err, fd, num_tries;
char countstr[256];
MR_Word filename_word;
+ int flags;
len = strlen(Dir) + 1 + 5 + 3 + 1 + 3 + 1;
/* Dir + / + Prefix + counter_high + . + counter_low + \\0 */
@@ -10183,8 +10263,13 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
strncat(FileName, countstr, 3);
strcat(FileName, ""."");
strncat(FileName, countstr + 3, 3);
+ flags = O_WRONLY | O_CREAT | O_EXCL;
do {
- fd = open(FileName, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ #ifdef MR_WIN32
+ fd = _wopen(ML_utf8_to_wide(FileName), flags, 0600);
+ #else
+ fd = open(FileName, flags, 0600);
+ #endif
} while (fd == -1 && MR_is_eintr(errno));
num_tries++;
ML_io_tempnam_counter += (1 << num_tries);
@@ -10349,7 +10434,7 @@ io.make_temp(Dir, Prefix, Name, !IO) :-
end.
").
-/*---------------------------------------------------------------------------*/
+%-----------------------------------------------------------------------------%
:- pragma foreign_decl("C", "
@@ -10488,7 +10573,11 @@ io.remove_file(FileName, Result, !IO) :-
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
+#ifdef MR_WIN32
+ RetVal = _wremove(ML_utf8_to_wide(FileName));
+#else
RetVal = remove(FileName);
+#endif
ML_maybe_make_err_msg(RetVal != 0, errno, ""remove failed: "",
MR_ALLOC_ID, MR_TRUE, RetStr);
").
@@ -10626,7 +10715,12 @@ io.rename_file(OldFileName, NewFileName, Result, IO0, IO) :-
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
+#ifdef MR_WIN32
+ RetVal = _wrename(ML_utf8_to_wide(OldFileName),
+ ML_utf8_to_wide(NewFileName));
+#else
RetVal = rename(OldFileName, NewFileName);
+#endif
ML_maybe_make_err_msg(RetVal != 0, errno, ""rename failed: "",
MR_ALLOC_ID, MR_TRUE, RetStr);
").
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list