Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use lowercase for charsets #11741 #12347

Merged
merged 12 commits into from
Oct 16, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.time.Duration;
import java.util.List;
import java.util.Locale;
Expand All @@ -28,6 +29,7 @@
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpStatus;
import org.eclipse.jetty.http.HttpURI;
import org.eclipse.jetty.http.MimeTypes;
import org.eclipse.jetty.http.Trailers;
import org.eclipse.jetty.io.Content;
import org.eclipse.jetty.server.Context;
Expand Down Expand Up @@ -118,6 +120,8 @@ public boolean handle(Request request, Response response, Callback callback) thr
// Gets the request Content-Type.
// Replaces:
// - servletRequest.getContentType()
MimeTypes.Type mimeType = Request.getContentMimeType(request);
Charset charset = Request.getCharset(request);
String contentType = request.getHeaders().get(HttpHeader.CONTENT_TYPE);

// Gets the request Content-Length.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.Response;
import org.eclipse.jetty.util.Callback;
import org.hamcrest.Matchers;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand Down Expand Up @@ -114,7 +116,7 @@ public boolean handle(Request request, Response response, Callback callback) thr
assertEquals(200, response.getStatus());
assertEquals(content, response.getContentAsString());
assertEquals(mediaType, response.getMediaType());
assertEquals(encoding, response.getEncoding());
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
}

@ParameterizedTest
Expand Down Expand Up @@ -144,6 +146,6 @@ public boolean handle(Request request, Response response, Callback callback) thr
assertEquals(200, response.getStatus());
assertEquals(content, response.getContentAsString());
assertEquals(mediaType, response.getMediaType());
assertEquals(encoding, response.getEncoding());
assertThat(response.getEncoding(), Matchers.equalToIgnoringCase(encoding));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.Response;
import org.eclipse.jetty.util.Fields;
import org.hamcrest.Matchers;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;

Expand Down Expand Up @@ -96,7 +97,7 @@ public void testFormContentProviderWithDifferentContentType(Scenario scenario) t
protected void service(Request request, Response response) throws Throwable
{
assertEquals("POST", request.getMethod());
assertEquals(contentType, request.getHeaders().get(HttpHeader.CONTENT_TYPE));
assertThat(request.getHeaders().get(HttpHeader.CONTENT_TYPE), Matchers.equalToIgnoringCase(contentType));
assertEquals(content, Content.Source.asString(request));
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.eclipse.jetty.http.HttpTokens.EndOfContent;
Expand Down Expand Up @@ -138,25 +137,16 @@ public class HttpParser
.with(new HttpField(HttpHeader.EXPIRES, "Fri, 01 Jan 1990 00:00:00 GMT"))
.withAll(() ->
{
Map<String, HttpField> map = new LinkedHashMap<>();
// Add common Content types as fields
for (String type : new String[]{
"text/plain", "text/html", "text/xml", "text/json", "application/json", "application/x-www-form-urlencoded"
})
Map<String, HttpField> map = new LinkedHashMap<>();
for (MimeTypes.Type mimetype : MimeTypes.Type.values())
{
HttpField field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type);
map.put(field.toString(), field);

for (String charset : new String[]{"utf-8", "iso-8859-1"})
MimeTypes.ContentTypeField contentTypeField = mimetype.getContentTypeField();
map.put(contentTypeField.toString(), contentTypeField);
if (contentTypeField.getValue().contains(";charset="))
{
PreEncodedHttpField field1 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset);
map.put(field1.toString(), field1);
PreEncodedHttpField field2 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset);
map.put(field2.toString(), field2);
PreEncodedHttpField field3 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + ";charset=" + charset.toUpperCase(Locale.ENGLISH));
map.put(field3.toString(), field3);
PreEncodedHttpField field4 = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, type + "; charset=" + charset.toUpperCase(Locale.ENGLISH));
map.put(field4.toString(), field4);
HttpField contentTypeFieldWithSpace = new MimeTypes.ContentTypeField(contentTypeField.getMimeType(), contentTypeField.getValue().replace(";charset=", "; charset="));
map.put(contentTypeFieldWithSpace.toString(), contentTypeFieldWithSpace);
}
}
return map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public HttpField getContentTypeField(Charset charset)
private final Charset _charset;
private final String _charsetString;
private final boolean _assumedCharset;
private final HttpField _field;
private final ContentTypeField _field;

Type(String name)
{
Expand All @@ -133,18 +133,18 @@ public HttpField getContentTypeField(Charset charset)
_charset = null;
_charsetString = null;
_assumedCharset = false;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

Type(String name, Type base)
{
_string = name;
_base = base;
_base = Objects.requireNonNull(base);
int i = name.indexOf(";charset=");
_charset = Charset.forName(name.substring(i + 9));
_charsetString = _charset.toString().toLowerCase(Locale.ENGLISH);
_assumedCharset = false;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

Type(String name, Charset cs)
Expand All @@ -154,9 +154,12 @@ public HttpField getContentTypeField(Charset charset)
_charset = cs;
_charsetString = _charset == null ? null : _charset.toString().toLowerCase(Locale.ENGLISH);
_assumedCharset = true;
_field = new PreEncodedHttpField(HttpHeader.CONTENT_TYPE, _string);
_field = new ContentTypeField(this);
}

/**
* @return The {@link Charset} for this type or {@code null} if it is not known
*/
public Charset getCharset()
{
return _charset;
Expand All @@ -167,6 +170,11 @@ public String getCharsetString()
return _charsetString;
}

/**
* Check if this type is equal to the type passed as a string
* @param type The type to compare to
* @return {@code true} if this is the same type
*/
public boolean is(String type)
{
return _string.equalsIgnoreCase(type);
Expand All @@ -183,12 +191,15 @@ public String toString()
return _string;
}

/**
* @return {@code true} If the {@link Charset} for this type is assumed rather than being explicitly declared.
*/
public boolean isCharsetAssumed()
{
return _assumedCharset;
}

public HttpField getContentTypeField()
public ContentTypeField getContentTypeField()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not the method below public HttpField getContentTypeField(Charset charset) also have its return type changed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, because there are two aspect of ContentTypeField: it has a getMimeType method and it is a PreEncodedHttpField. For an arbitrary contentType returned from this method, it is not guaranteed that it will have a known mimeType and it is not known if it is worthwhile going to the effort of pre-encoding.

That is why I think this class belongs as a subtype of this class, as it is pretty special purpose for long held constants for known types.

{
return _field;
}
Expand All @@ -200,6 +211,10 @@ public HttpField getContentTypeField(Charset charset)
return new HttpField(HttpHeader.CONTENT_TYPE, getContentTypeWithoutCharset(_string) + ";charset=" + charset.name());
}

/**
* Get the base type of this type, which is the type without a charset specified
* @return The base type or this type if it is a base type
*/
public Type getBaseType()
{
return _base;
Expand Down Expand Up @@ -227,23 +242,34 @@ public Type getBaseType()
})
.build();

/**
* Get the base value, stripped of any parameters
* @param value The value
* @return A string with any semicolon separated parameters removed
*/
public static String getBase(String value)
{
int index = value.indexOf(';');
return index == -1 ? value : value.substring(0, index);
}

/**
* Get the base type of this type, which is the type without a charset specified
* @param contentType The mimetype as a string
* @return The base type or this type if it is a base type
*/
public static Type getBaseType(String contentType)
{
if (StringUtil.isEmpty(contentType))
return null;
Type type = CACHE.getBest(contentType);
if (type == null)
return null;
if (type.asString().length() == contentType.length())
return type.getBaseType();
if (contentType.charAt(type.asString().length()) == ';')
return type.getBaseType();
contentType = contentType.replace(" ", "");
if (type.asString().length() == contentType.length())
return type.getBaseType();
if (contentType.charAt(type.asString().length()) == ';')
return type.getBaseType();
return null;
{
type = CACHE.get(getBase(contentType));
if (type == null)
return null;
}
return type.getBaseType();
}

public static boolean isKnownLocale(Locale locale)
Expand Down Expand Up @@ -326,6 +352,23 @@ public MimeTypes(MimeTypes defaults)
}
}

/**
* Get the explicit, assumed, or inferred Charset for a HttpField containing a mime type value
* @param field HttpField with a mime type value (e.g. Content-Type)
* @return A {@link Charset} or null;
* @throws IllegalCharsetNameException
* If the given charset name is illegal
* @throws UnsupportedCharsetException
* If no support for the named charset is available
* in this instance of the Java virtual machine
*/
public Charset getCharset(HttpField field) throws IllegalCharsetNameException, UnsupportedCharsetException
{
if (field instanceof ContentTypeField contentTypeField)
return contentTypeField.getMimeType().getCharset();
return getCharset(field.getValue());
}

/**
* Get the explicit, assumed, or inferred Charset for a mime type
* @param mimeType String form or a mimeType
Expand Down Expand Up @@ -876,4 +919,29 @@ else if (' ' != b)
return value;
return builder.toString();
}

/**
* A {@link PreEncodedHttpField} for `Content-Type` that can hold a {@link MimeTypes.Type} field
* for later recovery.
*/
public static class ContentTypeField extends PreEncodedHttpField
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this make more sense as a standalone class?
After all we have all of the other specialized Fields as standalone classes, why is this one special?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only other extension of PreEncodedHttpField we have is ResponseHttpFields.PersesitentPreEncodedHttpField. Of the HttpField extensions, about half are sub-types.

So this is not unusual. It is a PreEncodedHttpField that is extended specifically to hold a MimeTypes.Type, so having it associated with MimeTypes seams reasonable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ContentTypeField feels like the HostPortHttpField. (also a field used in HttpParser)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of agree that this would be better as standalone class, especially now that is the return value of a method: it's shorter to assign to a variable than MimeTypes.ContentTypeField.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this is a rather special purpose class, I've just made is package protected and provided static methods to extract the info if needed. I could make it fully private if we want to give up on the optimized lookup of content-types with spaces in HttpParser

{
private final Type _type;

public ContentTypeField(MimeTypes.Type type)
{
this(type, type.toString());
}

public ContentTypeField(MimeTypes.Type type, String value)
{
super(HttpHeader.CONTENT_TYPE, value);
_type = type;
}

public Type getMimeType()
{
return _type;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2601,7 +2601,25 @@ public void testRequestMaxHeaderBytesCumulative(String eoln)
@ParameterizedTest
@ValueSource(strings = {"\r\n", "\n"})
@SuppressWarnings("ReferenceEquality")
public void testCachedField(String eoln)
public void testInsensitiveCachedField(String eoln)
{
ByteBuffer buffer = BufferUtil.toBuffer(
"GET / HTTP/1.1" + eoln +
"Content-Type: text/plain;Charset=UTF-8" + eoln +
eoln);

HttpParser.RequestHandler handler = new Handler();
HttpParser parser = new HttpParser(handler);
parseAll(parser, buffer);

HttpField field = _fields.get(0);
assertThat(field.getValue(), is("text/plain;charset=utf-8"));
}

@ParameterizedTest
@ValueSource(strings = {"\r\n", "\n"})
@SuppressWarnings("ReferenceEquality")
public void testDynamicCachedField(String eoln)
{
ByteBuffer buffer = BufferUtil.toBuffer(
"GET / HTTP/1.1" + eoln +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.nio.charset.StandardCharsets;
import java.util.concurrent.CompletableFuture;

import org.eclipse.jetty.http.HttpField;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.MimeTypes;
import org.eclipse.jetty.io.Content;
Expand Down Expand Up @@ -51,7 +52,18 @@ public static Charset getFormEncodedCharset(Request request)
if (!config.getFormEncodedMethods().contains(request.getMethod()))
return null;

String contentType = request.getHeaders().get(HttpHeader.CONTENT_TYPE);
HttpField contentTypeField = request.getHeaders().getField(HttpHeader.CONTENT_TYPE);
if (contentTypeField == null)
return null;

if (contentTypeField instanceof MimeTypes.ContentTypeField contentMimeTypeField)
{
MimeTypes.Type type = contentMimeTypeField.getMimeType();
if (type != null && type.getCharset() != null)
return type.getCharset();
}

String contentType = contentTypeField.getValue();
if (request.getLength() == 0 || StringUtil.isBlank(contentType))
return null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

import org.eclipse.jetty.http.ComplianceViolation;
import org.eclipse.jetty.http.HttpCookie;
import org.eclipse.jetty.http.HttpField;
import org.eclipse.jetty.http.HttpFields;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpScheme;
Expand Down Expand Up @@ -535,6 +536,21 @@ static InputStream asInputStream(Request request)
return Content.Source.asInputStream(request);
}

/**
* Get a known {@link MimeTypes.Type} from the request {@link HttpHeader#CONTENT_TYPE}, if any.
* @param request The request.
* @return A {@link MimeTypes} or {@code null} if the {@code Content-Type} is not set or not known.
*/
static MimeTypes.Type getContentMimeType(Request request)
{
HttpField contentType = request.getHeaders().getField(HttpHeader.CONTENT_TYPE);
if (contentType instanceof MimeTypes.ContentTypeField contentTypeField)
return contentTypeField.getMimeType();
if (contentType == null)
return null;
return MimeTypes.CACHE.get(contentType.getValue());
}

/**
* Get a {@link Charset} from the request {@link HttpHeader#CONTENT_TYPE}, if any.
* @param request The request.
Expand All @@ -544,7 +560,9 @@ static InputStream asInputStream(Request request)
*/
static Charset getCharset(Request request) throws IllegalCharsetNameException, UnsupportedCharsetException
{
String contentType = request.getHeaders().get(HttpHeader.CONTENT_TYPE);
HttpField contentType = request.getHeaders().getField(HttpHeader.CONTENT_TYPE);
if (contentType == null)
return null;
return Objects.requireNonNullElse(request.getContext().getMimeTypes(), MimeTypes.DEFAULTS).getCharset(contentType);
}

Expand Down
Loading
Loading