Skip to content

Commit

Permalink
commented CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
brunoczim committed Nov 12, 2020
1 parent 29ee086 commit 6e7b1f3
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 18 deletions.
8 changes: 8 additions & 0 deletions src/csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,20 @@ double csv_parse_double(

i = 0;

/* Scans the string while a space is found. */
while (string[i] == ' ') {
i++;
}

/* Do parse. */
value = strtod(string + i, &end);

/* Scans the string while a space is found. */
while (*end == ' ') {
end++;
}

/* If we are not at the end of the string, the parse failed. */
if (*end != 0) {
error_string = moviedb_alloc(strlen(string) + 1, error);
if (error->code == error_none) {
Expand Down Expand Up @@ -80,18 +84,22 @@ void csv_parse_field(
while (!done) {
symbol = input_file_read(parser->file, error);
if (error->code == error_none) {
/* Updates line and column for errors. */
update_line_column(parser, symbol);
/* Makes the automaton's transition. */
transition(parser, symbol, out, error);
}

if (error->code != error_none) {
done = true;
} else if (parser->state == csv_error) {
/* Sets the error with line and column. */
error_set_code(error, error_csv);
error->data.csv.line = parser->line;
error->data.csv.column = parser->column;
done = true;
} else {
/* Otherwise test if filed is at the end. */
done = parser->state == csv_comma;
done = done || csv_is_row_boundary(parser);
done = done || error->code != error_none;
Expand Down
27 changes: 23 additions & 4 deletions src/csv/movie.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,30 @@ void movie_parser_init(
do {
csv_parse_field(&parser->csv_parser, buf, error);

/* Converts the string buffer into a C String. */
if (error->code == error_none) {
strbuf_make_cstr(buf, error);
}

if (error->code == error_none) {
if (strcmp(buf->ptr, "movieId") == 0) {
/* Registers movieId column number, does not allow repeat. */
if (found_id) {
error_set_code(error, error_csv_header);
} else {
found_id = true;
parser->id_column = column;
}
} else if (strcmp(buf->ptr, "title") == 0) {
/* Registers title column number, does not allow repeat. */
if (found_title) {
error_set_code(error, error_csv_header);
} else {
found_title = true;
parser->title_column = column;
}
} else if (strcmp(buf->ptr, "genres") == 0) {
/* Registers genres column number, does not allow repeat. */
if (found_genres) {
error_set_code(error, error_csv_header);
} else {
Expand All @@ -49,13 +53,16 @@ void movie_parser_init(
} else {
error_set_code(error, error_csv_header);
}
/* Accounts next column number. */
column++;
/* We will stop at row boundary. */
row_boundary = csv_is_row_boundary(&parser->csv_parser);
}
} while (error->code == error_none && !row_boundary);

found_all = found_id && found_title && found_genres;

/* Sets an error if we did not find all of the columns. */
if (error->code == error_none && !found_all) {
error_set_code(error, error_csv_header);
}
Expand All @@ -75,43 +82,55 @@ bool movie_row_parse(
row_out->title = NULL;
row_out->genres = NULL;

/*
* Loops while minimum column number has not been reached (and no error and
* no eof).
*/
while (column < COLUMNS && error->code == error_none && !end_of_file) {
csv_parse_field(&parser->csv_parser, buf, error);
end_of_file = csv_is_end_of_file(&parser->csv_parser) && column == 0;
if (!end_of_file && error->code == error_none) {
row_boundary = csv_is_row_boundary(&parser->csv_parser);
if (column < COLUMNS - 1 && row_boundary) {
/* Error if row boundary is found to early. */
error_set_code(error, error_movie);
error->data.csv_movie.line = parser->csv_parser.line - 1;
} else if (column == parser->id_column) {
/* Parses an ID. */
strbuf_make_cstr(buf, error);
if (error->code == error_none) {
row_out->id = moviedb_id_parse(buf->ptr, error);
}
} else if (column == parser->title_column) {
/* Copies the title. */
row_out->title = strbuf_copy_cstr(buf, error);
} else if (column == parser->genres_column) {
/* Copies the genres. */
row_out->genres= strbuf_copy_cstr(buf, error);
}
}
column++;
}

/* We must end the row with a row boundary (duh). */
if (error->code == error_none && !row_boundary) {
error_set_code(error, error_movie);
error->data.csv_movie.line = parser->csv_parser.line;
}

/* If error, destroys row data. */
if (error->code != error_none) {
movie_row_destroy(row_out);
if (error->code == error_id) {
/* Gets the line for an ID error. */
error->data.id.has_line = true;
if (csv_is_row_boundary(&parser->csv_parser)) {
error->data.id.line = parser->csv_parser.line - 1;
} else {
error->data.id.line = parser->csv_parser.line;
}
}
} else if (!csv_is_row_boundary(&parser->csv_parser)) {
error_set_code(error, error_movie);
error->data.csv_movie.line = parser->csv_parser.line;
}

return !end_of_file && error->code == error_none;
}

Expand Down
42 changes: 31 additions & 11 deletions src/csv/rating.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,41 @@ void rating_parser_init(
do {
csv_parse_field(&parser->csv_parser, buf, error);

/* Converts the string buffer into a C String. */
if (error->code == error_none) {
strbuf_make_cstr(buf, error);
}

if (error->code == error_none) {
if (strcmp(buf->ptr, "userId") == 0) {
/* Registers userId column number, does not allow repeat. */
if (found_userid) {
error_set_code(error, error_csv_header);
} else {
found_userid = true;
parser->userid_column = column;
}
} else if (strcmp(buf->ptr, "movieId") == 0) {
/* Registers movieId column number, does not allow repeat. */
if (found_movieid) {
error_set_code(error, error_csv_header);
} else {
found_movieid = true;
parser->movieid_column = column;
}
} else if (strcmp(buf->ptr, "rating") == 0) {
/* Registers rating column number, does not allow repeat. */
if (found_value) {
error_set_code(error, error_csv_header);
} else {
found_value = true;
parser->value_column = column;
}
} else if (strcmp(buf->ptr, "timestamp") == 0) {
/*
* Registers timestamp column number, does not allow repeat.
* Timestamp is not actually used.
*/
if (found_timestamp) {
error_set_code(error, error_csv_header);
} else {
Expand All @@ -59,13 +67,16 @@ void rating_parser_init(
} else {
error_set_code(error, error_csv_header);
}
/* Accounts next column number. */
column++;
/* We will stop at row boundary. */
row_boundary = csv_is_row_boundary(&parser->csv_parser);
}
} while (error->code == error_none && !row_boundary);

found_all = found_userid && found_movieid && found_value && found_timestamp;

/* Sets an error if we did not find all of the columns. */
if (error->code == error_none && !found_all) {
error_set_code(error, error_csv_header);
}
Expand All @@ -85,12 +96,17 @@ bool rating_row_parse(
row_out->userid = 0;
row_out->value = 0.0;

/*
* Loops while minimum column number has not been reached (and no error and
* no eof).
*/
while (column < COLUMNS && error->code == error_none && !end_of_file) {
csv_parse_field(&parser->csv_parser, buf, error);
end_of_file = csv_is_end_of_file(&parser->csv_parser) && column == 0;
if (!end_of_file && error->code == error_none) {
row_boundary = csv_is_row_boundary(&parser->csv_parser);
if (column < COLUMNS - 1 && row_boundary) {
/* Error if row boundary is found to early. */
error_set_code(error, error_rating);
error->data.csv_movie.line = parser->csv_parser.line - 1;
} else if (column != parser->timestamp_column) {
Expand All @@ -100,30 +116,34 @@ bool rating_row_parse(

if (error->code == error_none) {
if (column == parser->userid_column) {
/* Parses an ID. */
row_out->userid = moviedb_id_parse(buf->ptr, error);
} else if (column == parser->movieid_column) {
/* Parses an ID. */
row_out->movieid = moviedb_id_parse(buf->ptr, error);
} else if (column == parser->value_column) {
/* Parses a double. */
row_out->value = csv_parse_double(buf->ptr, error);
}
}
}
column++;
}

if (error->code != error_none) {
if (error->code == error_id) {
error->data.id.has_line = true;
if (csv_is_row_boundary(&parser->csv_parser)) {
error->data.id.line = parser->csv_parser.line - 1;
} else {
error->data.id.line = parser->csv_parser.line;
}
}
} else if (!csv_is_row_boundary(&parser->csv_parser)) {
error_set_code(error, error_rating);
/* We must end the row with a row boundary (duh). */
if (error->code == error_none && !row_boundary) {
error_set_code(error, error_movie);
error->data.csv_movie.line = parser->csv_parser.line;
}

/* Gets the line for an ID error. */
if (error->code == error_id) {
error->data.id.has_line = true;
if (csv_is_row_boundary(&parser->csv_parser)) {
error->data.id.line = parser->csv_parser.line - 1;
} else {
error->data.id.line = parser->csv_parser.line;
}
}
return !end_of_file && error->code == error_none;
}
32 changes: 29 additions & 3 deletions src/csv/tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,44 @@ void tag_parser_init(
do {
csv_parse_field(&parser->csv_parser, buf, error);

/* Converts the string buffer into a C String. */
if (error->code == error_none) {
strbuf_make_cstr(buf, error);
}

if (error->code == error_none) {
if (strcmp(buf->ptr, "userId") == 0) {
/*
* Registers userId column number, does not allow repeat.
* userId is not actually used.
*/
if (found_userid) {
error_set_code(error, error_csv_header);
} else {
found_userid = true;
parser->userid_column = column;
}
} else if (strcmp(buf->ptr, "movieId") == 0) {
/* Registers movieId column number, does not allow repeat. */
if (found_movieid) {
error_set_code(error, error_csv_header);
} else {
found_movieid = true;
parser->movieid_column = column;
}
} else if (strcmp(buf->ptr, "tag") == 0) {
/* Registers tag movieId column number, does not allow repeat. */
if (found_name) {
error_set_code(error, error_csv_header);
} else {
found_name = true;
parser->name_column = column;
}
} else if (strcmp(buf->ptr, "timestamp") == 0) {
/*
* Registers timestamp column number, does not allow repeat.
* Timestamp is not actually used.
*/
if (found_timestamp) {
error_set_code(error, error_csv_header);
} else {
Expand All @@ -59,13 +70,16 @@ void tag_parser_init(
} else {
error_set_code(error, error_csv_header);
}
/* Accounts next column number. */
column++;
/* We will stop at row boundary. */
row_boundary = csv_is_row_boundary(&parser->csv_parser);
}
} while (error->code == error_none && !row_boundary);

found_all = found_userid && found_movieid && found_name && found_timestamp;

/* Sets an error if we did not find all of the columns. */
if (error->code == error_none && !found_all) {
error_set_code(error, error_csv_header);
}
Expand All @@ -84,40 +98,52 @@ bool tag_row_parse(
row_out->name = NULL;
row_out->movieid = 0;

/*
* Loops while minimum column number has not been reached (and no error and
* no eof).
*/
while (column < COLUMNS && error->code == error_none && !end_of_file) {
csv_parse_field(&parser->csv_parser, buf, error);
end_of_file = csv_is_end_of_file(&parser->csv_parser) && column == 0;
if (!end_of_file && error->code == error_none) {
row_boundary = csv_is_row_boundary(&parser->csv_parser);
if (column < COLUMNS - 1 && row_boundary) {
/* Error if row boundary is found to early. */
error_set_code(error, error_tag);
error->data.csv_tag.line = parser->csv_parser.line - 1;
} else if (column == parser->movieid_column) {
/* Parses an ID. */
strbuf_make_cstr(buf, error);
if (error->code == error_none) {
row_out->movieid = moviedb_id_parse(buf->ptr, error);
}
} else if (column == parser->name_column) {
/* Copies a tag name. */
row_out->name = strbuf_copy_cstr(buf, error);
}
/* userid and timestamp ignored */
}
column++;
}

/* We must end the row with a row boundary (duh). */
if (error->code == error_none && !row_boundary) {
error_set_code(error, error_movie);
error->data.csv_movie.line = parser->csv_parser.line;
}

/* If error, destroys row data. */
if (error->code != error_none) {
tag_row_destroy(row_out);
if (error->code == error_id) {
/* Gets the line for an ID error. */
error->data.id.has_line = true;
if (csv_is_row_boundary(&parser->csv_parser)) {
error->data.id.line = parser->csv_parser.line - 1;
} else {
error->data.id.line = parser->csv_parser.line;
}
}
} else if (!csv_is_row_boundary(&parser->csv_parser)) {
error_set_code(error, error_tag);
error->data.csv_tag.line = parser->csv_parser.line;
}

return !end_of_file && error->code == error_none;
Expand Down

0 comments on commit 6e7b1f3

Please sign in to comment.