Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Make sure that dafny run always requests UTF-8 encoded output #3049

Merged
merged 6 commits into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Source/DafnyCore/Compilers/Compiler-Csharp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3284,6 +3284,7 @@ public override bool RunTargetProgram(string dafnyProgramName, string targetProg
throw new Exception("Cannot call run target on a compilation whose assembly has no entry.");
}
try {
Console.OutputEncoding = System.Text.Encoding.UTF8; // Force UTF-8 output in dafny run (#2999)
object[] parameters = entry.GetParameters().Length == 0 ? new object[] { } : new object[] { DafnyOptions.O.MainArgs.ToArray() };
entry.Invoke(null, parameters);
return true;
Expand Down
3 changes: 2 additions & 1 deletion Source/DafnyCore/Compilers/Compiler-java.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2285,7 +2285,8 @@ public override bool CompileTargetProgram(string dafnyProgramName, string target
public override bool RunTargetProgram(string dafnyProgramName, string targetProgramText, string callToMain, string /*?*/ targetFilename,
ReadOnlyCollection<string> otherFileNames, object compilationResult, TextWriter outputWriter) {
var psi = PrepareProcessStartInfo("java",
args: DafnyOptions.O.MainArgs.Prepend(Path.GetFileNameWithoutExtension(targetFilename)));
new List<string> { "-Dfile.encoding=UTF-8", Path.GetFileNameWithoutExtension(targetFilename) }
.Concat(DafnyOptions.O.MainArgs));
psi.WorkingDirectory = Path.GetFullPath(Path.GetDirectoryName(targetFilename));
psi.EnvironmentVariables["CLASSPATH"] = GetClassPath(targetFilename);
return 0 == RunProcess(psi, outputWriter);
Expand Down
1 change: 1 addition & 0 deletions Source/DafnyCore/Compilers/Compiler-js.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,7 @@ bool SendToNewNodeProcess(string dafnyProgramName, string targetProgramText, str
}
nodeProcess.StandardInput.Write(targetProgramText);
if (callToMain != null && DafnyOptions.O.RunAfterCompile) {
nodeProcess.StandardInput.WriteLine("require('process').stdout.setEncoding(\"utf-8\");");
nodeProcess.StandardInput.WriteLine("require('process').argv = [\"node\",\"stdin\", " + string.Join(",", DafnyOptions.O.MainArgs.Select(ToStringLiteral)) + "];");
nodeProcess.StandardInput.Write(callToMain);
}
Expand Down
1 change: 1 addition & 0 deletions Source/DafnyCore/Compilers/Compiler-python.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1745,6 +1745,7 @@ public override bool RunTargetProgram(string dafnyProgramName, string targetProg
string targetFilename, ReadOnlyCollection<string> otherFileNames, object compilationResult, TextWriter outputWriter) {
Contract.Requires(targetFilename != null || otherFileNames.Count == 0);
var psi = PrepareProcessStartInfo("python3", DafnyOptions.O.MainArgs.Prepend(targetFilename));
psi.EnvironmentVariables["PYTHONIOENCODING"] = "utf8";
return 0 == RunProcess(psi, outputWriter);
}
}
Expand Down
2 changes: 2 additions & 0 deletions Source/DafnyDriver/DafnyDriver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ public static bool CompileDafnyProgram(Dafny.Program dafnyProgram, string dafnyP
Contract.Requires(dafnyProgram != null);
Contract.Assert(dafnyProgramName != null);

// TODO: `outputWriter` seems to always be passed in as `null`. Remove it?
if (outputWriter == null) {
outputWriter = Console.Out;
}
Expand Down Expand Up @@ -702,6 +703,7 @@ public static bool CompileDafnyProgram(Dafny.Program dafnyProgram, string dafnyP
outputWriter.WriteLine("Running...");
outputWriter.WriteLine();
}

compiledCorrectly = compiler.RunTargetProgram(dafnyProgramName, targetProgramText, callToMain, paths.Filename, otherFileNames, compilationResult, outputWriter);
} else {
// make sure to give some feedback to the user
Expand Down
2 changes: 1 addition & 1 deletion Source/DafnyRuntime/DafnyRuntime.go
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ func (seq Seq) UniqueElements() Set {
func (seq Seq) String() string {
if seq.isString {
s := ""
// Note this doesn't produce the right string in UTF-8,
// FIXME: Note this doesn't produce the right string in UTF-8,
// since it converts surrogates independently.
for _, c := range seq.contents {
s += c.(Char).String()
Expand Down
12 changes: 9 additions & 3 deletions Source/XUnitExtensions/Lit/ShellLitCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,17 @@ public ShellLitCommand(string shellCommand, IEnumerable<string> arguments, IEnum
// codepage. That's a bug there, and in the meantime the best hack we
// have is to override that codepage by setting `OutputEncoding`.

Console.OutputEncoding = Encoding.UTF8;
process.StartInfo.CreateNoWindow = false;
Console.OutputEncoding = System.Text.Encoding.UTF8;

// Finally, for Java + Ubuntu, we make sure to set LANG:
process.StartInfo.EnvironmentVariables.Add("LANG", "C.UTF-8");
process.StartInfo.EnvironmentVariables["LANG"] = "C.UTF-8";
// … and For Python + Windows, we set PYTHONIOENCODING
process.StartInfo.EnvironmentVariables.Add("PYTHONIOENCODING", "UTF-8");
process.StartInfo.EnvironmentVariables["PYTHONIOENCODING"] = "UTF-8";

// Note that all of this, except the Console.OutputEncoding part, is necessary only if we run compiled Dafny
// artifacts directly, since `dafny run` already enforces UTF-8 output. The Console.OutputEncoding part is still
// needed because that is also what C# will use to decode the output of `process`.

process.Start();
if (inputReader != null) {
Expand All @@ -104,6 +108,8 @@ public ShellLitCommand(string shellCommand, IEnumerable<string> arguments, IEnum
process.StandardInput.Write(input);
process.StandardInput.Close();
}

// FIXME the code below will deadlock if process fills the stderr buffer.
string output = process.StandardOutput.ReadToEnd();
outputWriter?.Write(output);
outputWriter?.Close();
Expand Down
14 changes: 10 additions & 4 deletions Test/metatests/OutputEncoding.dfy
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
// RUN: %baredafny verify %args "%s" > "%t"
// RUN: %baredafny run --no-verify --target=cs %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=js %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=go %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=py %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=cs %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=js %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=go %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=py %args "%s" >> "%t"
// RUN: %baredafny run --no-verify --target=java %args "%s" >> "%t"
// RUN: %diff "%s.expect" "%t"

method Main() {
// This works fine in all languages because € is a single UTF-16 code unit.
print "Euro sign: " + [0x20AC as char], "\n"; // €

// Unfortunately, the following does *not* work in all languages: some of our
// compilers don't correctly handle paired UTF-16 code units (e.g. Go)
// print "Emoji: " + [0xD83D as char, 0xDE14 as char], "\n"; // 😔
}
3 changes: 3 additions & 0 deletions Test/metatests/OutputEncoding.dfy.expect
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ Euro sign: €

Dafny program verifier did not attempt verification
Euro sign: €

Dafny program verifier did not attempt verification
Euro sign: €
18 changes: 17 additions & 1 deletion docs/DafnyRef/Statements.md
Original file line number Diff line number Diff line change
Expand Up @@ -1713,14 +1713,30 @@ explicitly invoke this conversion.
One can always write an explicit function to convert a data value to a string
and then call it explicitly in a `print` statement or elsewhere.

Dafny does not keep track of print effects. `print` statements are allowed
By default, Dafny does not keep track of print effects, but this can be changed
using the `-trackPrintEffects` command line flag. `print` statements are allowed
only in non-ghost contexts and not in expressions, with one exception.
The exception is that a function-by-method may contain `print` statements,
whose effect may be observed as part of the run-time evaluation of such functions.

The verifier checks that each expression is well-defined, but otherwise
ignores the `print` statement.

<a id="print-encoding"></a>

**Note:** `print` writes to standard output. To improve compatibility with
native code and external libraries, the process of encoding Dafny strings passed
to `print` into standard-output byte strings is left to the runtime of the
language that the Dafny code is compiled to (some language runtimes use UTF-8 in
all cases; others obey the current locale or console encoding).

In most cases, the standard-output encoding can be set before running the
compiled program using language-specific flags or environment variables
(e.g. `-Dfile.encoding=` for Java). This is in fact how `dafny run` operates:
it uses language-specific flags and variables to enforce UTF-8 output regardless
of the target language (but note that the C++ and Go backends currently have
limited support for UTF-16 surrogates).
Comment on lines +1735 to +1738
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great but I feel it should also be in documentation specifically for the dafny run command and not just for the print statement. Perhaps http://dafny.org/dafny/DafnyRef/DafnyRef#256-using-dafny-from-the-command-line?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.


## 20.20. Reveal Statement {#sec-reveal-statement}
````grammar
RevealStmt =
Expand Down
2 changes: 2 additions & 0 deletions docs/DafnyRef/UserGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ If building a `.dfy` file containing the `Main` method requires additional
the `--input` option for each extra file or use `dafny build` to build the executable and then run it in an
additional step.

**Note:** `dafny run` will typically produce the same results as the executables produced by `dafny build`. The only expected differences are performance (`dafny run` may not optimize as much as `dafny build`), and target-language-specific configuration issues (e.g. encoding issues: `dafny run` sets language-specific flags to request UTF-8 output for the [`print`](#print-encoding) statement in all languages, whereas `dafny build` leaves language-specific runtime configuration to the user).

The command-line also expects the following:
- Files are designated by absolute paths or paths relative to the current
working directory. A command-line argument not matching a known option is considered a filepath, and likely one
Expand Down
2 changes: 2 additions & 0 deletions docs/dev/news/3049.feat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
`dafny run` now consistently requests UTF-8 output from compiled code.
Use `chcp 65501` if you see garbled output on Windows.