Skip to content

Commit

Permalink
Use actual CSV parser to correctly handle quoted values (#16)
Browse files Browse the repository at this point in the history
This causes quoted CSV values with embedded commas to be parsed correctly as a single value. eg. \"Testing 1, 2, 3\" will now be correctly parsed as a single value.
  • Loading branch information
Dan Torrey authored and bernd committed Apr 11, 2019
1 parent fa3f199 commit 0983497
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
*/
package org.graylog.integrations.inputs.paloalto;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
Expand All @@ -27,6 +30,11 @@

import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -75,8 +83,8 @@ public PaloAltoMessageBase parse(@NotNull String raw) {
String fieldsString = matcher.group(3);

DateTime timestamp = DateTime.parse(timestampString);
ImmutableList<String> fields = ImmutableList.copyOf(Splitter.on(",").split(fieldsString));
return PaloAltoMessageBase.create(source, timestamp, fieldsString, fields.get(3), fields);

return buildPaloAltoMessageBase(timestamp, fieldsString, source);
} else {
LOG.error("Cannot parse malformed Panorama message: {}", raw);
return null;
Expand All @@ -93,12 +101,11 @@ public PaloAltoMessageBase parse(@NotNull String raw) {
// Remove two spaces in one digit day number "Apr 8 01:47:32"
// This solution feels terrible. Sorry.
String dateWithoutYear = matcher.group(1).replaceFirst(DOUBLE_SPACE, SINGLE_SPACE);
DateTime timestamp = SYSLOG_TIMESTAMP_FORMATTER.parseDateTime(dateWithoutYear + SINGLE_SPACE + DateTime.now().getYear());
DateTime timestamp = SYSLOG_TIMESTAMP_FORMATTER.parseDateTime(dateWithoutYear + SINGLE_SPACE + DateTime.now().getYear());
String source = matcher.group(2);
String panData = matcher.group(3);

ImmutableList<String> fields = ImmutableList.copyOf(Splitter.on(",").split(panData));
return PaloAltoMessageBase.create(source, timestamp, panData, fields.get(3), fields);
return buildPaloAltoMessageBase(timestamp, panData, source);
} else {
LOG.error("Cannot parse malformed Syslog message: {}", raw);
return null;
Expand All @@ -114,18 +121,63 @@ public PaloAltoMessageBase parse(@NotNull String raw) {
DateTime timestamp = SYSLOG_TIMESTAMP_FORMATTER.parseDateTime(dateWithoutYear + SINGLE_SPACE + DateTime.now().getYear());
String panData = matcher.group(2);

ImmutableList<String> fields = ImmutableList.copyOf(Splitter.on(",").split(panData));

// No source (host)
return PaloAltoMessageBase.create("", timestamp, panData, fields.get(3), fields);
// No source is supplied, so use a blank one
return buildPaloAltoMessageBase(timestamp, panData, "");
} else {
LOG.error("Cannot parse malformed Syslog message: {}", raw);
return null;
}
}
}

LOG.error("Cannot parse malformed PAN message [unrecognized format]: {}", raw);
return null;
}

/**
* @param timestamp The message timestamp.
* @param messagePayload The full CSV message payload. eg. <14>Aug 22 11:21:04 hq-lx-net-7.dart.org 1,2018/08/22...
* @param source The message source.
* @return The PaloAltoMessageBase, which contains all data needed to build the message.
*/
private PaloAltoMessageBase buildPaloAltoMessageBase(DateTime timestamp, String messagePayload, String source) {

// Attempt to parse CSV fields.
ImmutableList<String> fields = parseCSVFields(messagePayload);
if (fields == null) {
return null;
}

return PaloAltoMessageBase.create(source, timestamp, messagePayload, fields.get(3), fields);
}

/**
* Use a CSV parser to ensure that quotes text with embedded (escaped) commas works correctly.
* This logic used to split just on the comma, which produced malformed parsing results when commas were embedded
* within quoted CSV values. eg. "testing 1, 2, 3"
*
* @param messagePayload The full CSV message payload. eg. <14>Aug 22 11:21:04 hq-lx-net-7.dart.org 1,2018/08/22...
* @return A list of strings containing the individual field values.
* All quoted values will be properly extracted (quotes will be removed, and only the inner value will be returned).
*/
private ImmutableList<String> parseCSVFields(String messagePayload) {

Reader stringReader = new StringReader(messagePayload);
List<CSVRecord> csvRecords = null;
try {
csvRecords = new CSVParser(stringReader, CSVFormat.DEFAULT).getRecords();
} catch (IOException e) {
LOG.error("Cannot parse CSV PAN message: {}", messagePayload, e);
return null;
}

if (csvRecords.size() != 1) {
LOG.error("Cannot parse malformed/multiline Syslog message: {}", messagePayload);
return null;
}

// Convert the first row to a list
ArrayList<String> fieldValues = Lists.newArrayList(csvRecords.get(0).iterator());
return ImmutableList.copyOf(fieldValues);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ public class PaloAltoCodecTest {

// These messages are in Panorama format. Panorama is Palo Alto's log management system.
// Messages forwarded from Panorama will have the - - - - delimiter.
private final static String PANORAMA_TRAFFIC_MESSAGE = "<14>1 2018-09-19T11:50:32-05:00 Panorama--2 - - - - 1,2018/09/19 11:50:32,007255000045717,TRAFFIC,end,2049,2018/09/19 11:50:32,10.20.30.40,10.20.30.40,10.20.30.40,10.20.30.40,HTTPS-strict,,,incomplete,vsys1,Public,Public,ethernet1/1,ethernet1/1,ALK Logging,2018/09/19 11:50:32,205742,1,64575,443,41304,443,0x400070,tcp,allow,412,272,140,6,2018/09/19 11:50:15,0,any,0,54196730,0x8000000000000000,10.20.30.40-10.20.30.40,10.20.30.40-10.20.30.40,0,4,2,tcp-fin,13,16,0,0,,Prod--2,from-policy,,,0,,0,,N/A,0,0,0,0";
private final static String PANORAMA_SYSTEM_MESSAGE = "<14>1 2018-09-19T11:50:35-05:00 Panorama-1 - - - - 1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\"Deviating device: Prod--2, Serial: 007255000045717, Object: N/A, Metric: mp-cpu, Value: 34\",1163103,0x0,0,0,0,0,,Panorama-1";
private final static String PANORAMA_TRAFFIC_MESSAGE = "<14>1 2018-09-19T11:50:32-05:00 Panorama--2 - - - - 1,2018/09/19 11:50:32,453524335,TRAFFIC,end,2049,2018/09/19 11:50:32,10.20.30.40,10.20.30.40,10.20.30.40,10.20.30.40,HTTPS-strict,,,incomplete,vsys1,Public,Public,ethernet1/1,ethernet1/1,ALK Logging,2018/09/19 11:50:32,205742,1,64575,443,41304,443,0x400070,tcp,allow,412,272,140,6,2018/09/19 11:50:15,0,any,0,54196730,0x8000000000000000,10.20.30.40-10.20.30.40,10.20.30.40-10.20.30.40,0,4,2,tcp-fin,13,16,0,0,,Prod--2,from-policy,,,0,,0,,N/A,0,0,0,0";
private final static String PANORAMA_SYSTEM_MESSAGE = "<14>1 2018-09-19T11:50:35-05:00 Panorama-1 - - - - 1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\"Deviating device: Prod--2, Serial: 453524335, Object: N/A, Metric: mp-cpu, Value: 34\",1163103,0x0,0,0,0,0,,Panorama-1";
private final static String PANORAMA_THREAT_MESSAGE = "<14>1 2018-09-19T11:50:33-05:00 Panorama--1 - - - - 1,2018/09/19 11:50:33,007255000045716,THREAT,spyware,2049,2018/09/19 11:50:33,10.20.30.40,10.20.30.40,10.20.30.40,10.20.30.40,HTTPS-strict,,,ssl,vsys1,Public,Public,ethernet1/1,ethernet1/1,ALK Logging,2018/09/19 11:50:33,201360,1,21131,443,56756,443,0x80403000,tcp,alert,\"test.com/\",Suspicious TLS Evasion Found(14978),online_test.com,informational,client-to-server,1007133,0xa000000000000000,10.20.30.40-10.20.30.40,10.20.30.40-10.20.30.40,0,,1204440535977427988,,,0,,,,,,,,0,13,16,0,0,,Prod--1,,,,,0,,0,,N/A,spyware,AppThreat-8065-5006,0x0,0,4294967295";
private final static String PANORAMA_WITH_LINE_BREAK = "<14>1 2018-09-19T11:50:35-05:00 Panorama-1 - - - - 1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\\\"Deviating device: Prod--2, Serial: 007255000045717, Object: N/A, Metric: mp-cpu, Value: 34\\\",1163103,0x0,0,0,0,0,,Panorama-1\n";
private final static String PANORAMA_WITH_LINE_BREAK = "<14>1 2018-09-19T11:50:35-05:00 Panorama-1 - - - - 1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\\\"Deviating device: Prod--2, Serial: 453524335, Object: N/A, Metric: mp-cpu, Value: 34\\\",1163103,0x0,0,0,0,0,,Panorama-1\n";

// Raw PAN device messages.
// These help to test the various combinations that we might see.
Expand Down Expand Up @@ -124,16 +124,18 @@ public void valuesTest() {
Message message = codec.decode(new RawMessage(PANORAMA_SYSTEM_MESSAGE.getBytes()));
assertEquals("SYSTEM", message.getField("type"));
assertEquals(message.getField("module"), "general");
assertEquals(message.getField("description"), "\"Deviating device: Prod--2");

// Test quoted value with embedded commas.
assertEquals(message.getField("description"), "Deviating device: Prod--2, Serial: 453524335, Object: N/A, Metric: mp-cpu, Value: 34");
assertEquals(message.getField("serial_number"), "000710000506");
assertEquals(message.getField("source"), "Panorama-1");
assertEquals(message.getField("message"), "1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\"Deviating device: Prod--2, Serial: 007255000045717, Object: N/A, Metric: mp-cpu, Value: 34\",1163103,0x0,0,0,0,0,,Panorama-1");
assertEquals(message.getField("message"), "1,2018/09/19 11:50:35,000710000506,SYSTEM,general,0,2018/09/19 11:50:35,,general,,0,0,general,informational,\"Deviating device: Prod--2, Serial: 453524335, Object: N/A, Metric: mp-cpu, Value: 34\",1163103,0x0,0,0,0,0,,Panorama-1");
assertEquals(message.getField("severity"), "informational");
assertEquals(message.getField("generated_time"), "2018/09/19 11:50:35");
assertEquals(message.getField("event_id"), "general");
assertEquals(message.getField("device_name"), "0");
assertEquals(message.getField("device_name"), "Panorama-1");
assertEquals(message.getField("content_threat_type"), "general");
assertEquals(message.getField("virtual_system_name"), "0");
assertEquals(message.getField("virtual_system_name"), null);
assertEquals(0, ((DateTime) message.getField("timestamp")).compareTo(new DateTime("2018-09-19T11:50:35.000-05:00")));

// Test Traffic message results
Expand All @@ -158,8 +160,8 @@ public void valuesTest() {
assertEquals(message.getField("rule_name"), "HTTPS-strict");
assertEquals(message.getField("nat_src_addr"), "10.20.30.40");
assertEquals(message.getField("session_id"), 205742L);
assertEquals(message.getField("serial_number"), "007255000045717");
assertEquals(message.getField("message"), "1,2018/09/19 11:50:32,007255000045717,TRAFFIC,end,2049,2018/09/19 11:50:32,10.20.30.40,10.20.30.40,10.20.30.40,10.20.30.40,HTTPS-strict,,,incomplete,vsys1,Public,Public,ethernet1/1,ethernet1/1,ALK Logging,2018/09/19 11:50:32,205742,1,64575,443,41304,443,0x400070,tcp,allow,412,272,140,6,2018/09/19 11:50:15,0,any,0,54196730,0x8000000000000000,10.20.30.40-10.20.30.40,10.20.30.40-10.20.30.40,0,4,2,tcp-fin,13,16,0,0,,Prod--2,from-policy,,,0,,0,,N/A,0,0,0,0");
assertEquals(message.getField("serial_number"), "453524335");
assertEquals(message.getField("message"), "1,2018/09/19 11:50:32,453524335,TRAFFIC,end,2049,2018/09/19 11:50:32,10.20.30.40,10.20.30.40,10.20.30.40,10.20.30.40,HTTPS-strict,,,incomplete,vsys1,Public,Public,ethernet1/1,ethernet1/1,ALK Logging,2018/09/19 11:50:32,205742,1,64575,443,41304,443,0x400070,tcp,allow,412,272,140,6,2018/09/19 11:50:15,0,any,0,54196730,0x8000000000000000,10.20.30.40-10.20.30.40,10.20.30.40-10.20.30.40,0,4,2,tcp-fin,13,16,0,0,,Prod--2,from-policy,,,0,,0,,N/A,0,0,0,0");
assertEquals(message.getField("bytes_sent"), 272L);
assertEquals(message.getField("dest_zone"), "Public");
assertEquals(message.getField("nat_src_port"), 41304L);
Expand All @@ -175,22 +177,6 @@ public void valuesTest() {
assertEquals(message.getField("nat_dest_addr"), "10.20.30.40");
assertEquals(message.getField("category"), "any");
assertEquals(message.getField("nat_dest_port"), 443L);

// TODO: Implement and test THREAT parsing.
// message = codec.decode(new RawMessage(THREAT_MESSAGE.getBytes()));
// assertEquals("THREAT", message.getField("pa_type"));
}

// TODO: Implement this test.
@Ignore
@Test
public void invalidPositionTest() {

// Verify that fields that have invalid positions (do not exist in the logs) are ignored.
PaloAltoCodec codec = new PaloAltoCodec(Configuration.EMPTY_CONFIGURATION);

Message message = codec.decode(new RawMessage(PANORAMA_SYSTEM_MESSAGE.getBytes()));
assertEquals("SYSTEM", message.getField("type"));
}

/**
Expand Down

0 comments on commit 0983497

Please sign in to comment.