Skip to content

Commit

Permalink
[IO-670] refine IOUtils.contentEquals(Reader, Reader)
Browse files Browse the repository at this point in the history
  • Loading branch information
XenoAmess committed Jan 31, 2025
1 parent e014cb5 commit 2b92efc
Show file tree
Hide file tree
Showing 4 changed files with 911 additions and 8 deletions.
96 changes: 88 additions & 8 deletions src/main/java/org/apache/commons/io/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.apache.commons.io.function.IOTriFunction;
import org.apache.commons.io.input.CharSequenceReader;
import org.apache.commons.io.input.QueueInputStream;
import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader;
import org.apache.commons.io.output.AppendableWriter;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.io.output.NullOutputStream;
Expand Down Expand Up @@ -1029,14 +1030,46 @@ private static boolean contentEquals(final Stream<?> stream1, final Stream<?> st
}

// TODO Consider making public
private static boolean contentEqualsIgnoreEOL(final BufferedReader reader1, final BufferedReader reader2) {
if (reader1 == reader2) {
return true;
}
if (reader1 == null || reader2 == null) {
return false;
private static boolean contentEqualsIgnoreEOL(
final LineEndUnifiedBufferedReader bufferedInput1,
final LineEndUnifiedBufferedReader bufferedInput2
) throws UncheckedIOException {
/*
* We use this variable to mark if last char be '\n'.
* Because "a" and "a\n" is thought contentEqualsIgnoreEOL,
* but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL.
*/
boolean justNewLine = true;

int currentChar1;
int currentChar2;

while (true) {
currentChar1 = bufferedInput1.peek();
currentChar2 = bufferedInput2.peek();

if (currentChar1 == EOF) {
if (currentChar2 == EOF) {
return true;
} else {
if (!justNewLine) {
return inputOnlyHaveCRLForEOF(bufferedInput2, currentChar2);
}
return false;
}
} else if (currentChar2 == EOF) {
if (!justNewLine) {
return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1);
}
return false;
}
if (currentChar1 != currentChar2) {
return false;
}
justNewLine = currentChar1 == '\n';
bufferedInput1.eat();
bufferedInput2.eat();
}
return contentEquals(reader1.lines(), reader2.lines());
}

/**
Expand All @@ -1062,7 +1095,54 @@ public static boolean contentEqualsIgnoreEOL(final Reader reader1, final Reader
if (reader1 == null || reader2 == null) {
return false;
}
return contentEqualsIgnoreEOL(toBufferedReader(reader1), toBufferedReader(reader2));

final LineEndUnifiedBufferedReader bufferedInput1;
if (reader1 instanceof LineEndUnifiedBufferedReader) {
bufferedInput1 = (LineEndUnifiedBufferedReader) reader1;
} else {
bufferedInput1 = new LineEndUnifiedBufferedReader(reader1);
}

final LineEndUnifiedBufferedReader bufferedInput2;
if (reader2 instanceof LineEndUnifiedBufferedReader) {
bufferedInput2 = (LineEndUnifiedBufferedReader) reader2;
} else {
bufferedInput2 = new LineEndUnifiedBufferedReader(reader2);
}
return contentEqualsIgnoreEOL(bufferedInput1, bufferedInput2);
}

/**
* private function used only in contentEqualsIgnoreEOL.
* used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF.
* @param input input reader
* @param currentChar current peek char of input
* @return true/false
* @throws IOException by input.read(), not me.
* @see #contentEqualsIgnoreEOL(Reader, Reader)
*/
private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws UncheckedIOException {

/*
* logically there should be some code like
*
* if (char1 == EOF) {
* return true;
* }
*
* here.
*
* But actually, if this input's read() is EOF, then we will not invoke this function at all.
* So the check is deleted.
*
* You can go contentEqualsIgnoreEOL for details.
*/

if (currentChar == '\n') {
input.eat();
return input.read() == EOF;
}
return false;
}

/**
Expand Down
Loading

0 comments on commit 2b92efc

Please sign in to comment.