Skip to content

Commit

Permalink
csv module utils: detect unicode BOM in content (#6662)
Browse files Browse the repository at this point in the history
* csv module utils: detect unicode BOM in content

* fix handling of py2

* fix comment

* add changelog frag

* add missing link

* simplification
  • Loading branch information
russoz authored Jun 15, 2023
1 parent 9395df1 commit bb21693
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 13 deletions.
2 changes: 2 additions & 0 deletions changelogs/fragments/6662-csv-bom.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bugfixes:
- csv module utils - detects and remove unicode BOM markers from incoming CSV content (https://github.com/ansible-collections/community.general/pull/6662).
4 changes: 3 additions & 1 deletion plugins/module_utils/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ class unix_dialect(csv.Dialect):


def read_csv(data, dialect, fieldnames=None):

BOM = to_native(u'\ufeff')
data = to_native(data, errors='surrogate_or_strict')
if data.startswith(BOM):
data = data[len(BOM):]

if PY3:
fake_fh = StringIO(data)
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/targets/read_csv/meta/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# Copyright (c) Ansible Project
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later

dependencies:
- setup_remote_tmp_dir
50 changes: 38 additions & 12 deletions tests/integration/targets/read_csv/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
# Create basic CSV file
- name: Create unique CSV file
copy:
content: |
content: &users_content |
name,uid,gid,gecos
dag,500,500,Dag Wieërs
jeroen,501,500,Jeroen Hoekx
dest: users_unique.csv
dest: "{{ remote_tmp_dir }}/users_unique.csv"

# Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary
read_csv:
path: users_unique.csv
path: "{{ remote_tmp_dir }}/users_unique.csv"
key: name
register: users_unique

Expand All @@ -35,10 +35,10 @@
- users_unique.dict.jeroen.uid == '501'
- users_unique.dict.jeroen.gid == '500'

# Read a CSV file and access the first item
# Read a CSV file and access the first item
- name: Read users from CSV file and return a list
read_csv:
path: users_unique.csv
path: "{{ remote_tmp_dir }}/users_unique.csv"
register: users_unique

- assert:
Expand All @@ -61,12 +61,12 @@
dag;500;500;Dag Wieërs
jeroen;501;500;Jeroen Hoekx
dag;502;500;Dag Wieers
dest: users_nonunique.csv
dest: "{{ remote_tmp_dir }}/users_nonunique.csv"

# Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary
read_csv:
path: users_nonunique.csv
path: "{{ remote_tmp_dir }}/users_nonunique.csv"
key: name
unique: false
delimiter: ';'
Expand All @@ -87,7 +87,7 @@
# Read a CSV file using an non-existing dialect
- name: Read users from CSV file and return a dictionary
read_csv:
path: users_nonunique.csv
path: "{{ remote_tmp_dir }}/users_nonunique.csv"
dialect: placebo
register: users_placebo
ignore_errors: true
Expand All @@ -104,12 +104,12 @@
content: |
dag,500,500,Dag Wieërs
jeroen,501,500,Jeroen Hoekx
dest: users_noheader.csv
dest: "{{ remote_tmp_dir }}/users_noheader.csv"

# Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary
read_csv:
path: users_noheader.csv
path: "{{ remote_tmp_dir }}/users_noheader.csv"
key: name
fieldnames: name,uid,gid,gecos
register: users_noheader
Expand All @@ -133,12 +133,12 @@
name,uid,gid,gecos
dag,500,500,Dag Wieërs
jeroen,501,500,"Jeroen"Hoekx"
dest: users_broken.csv
dest: "{{ remote_tmp_dir }}/users_broken.csv"

# Read a broken CSV file using strict
- name: Read users from a broken CSV file
read_csv:
path: users_broken.csv
path: "{{ remote_tmp_dir }}/users_broken.csv"
key: name
strict: true
register: users_broken
Expand All @@ -148,3 +148,29 @@
that:
- users_broken is failed
- "'Unable to process file' in users_broken.msg"

# Create basic CSV file with BOM
- name: Create unique CSV file with BOM
copy:
content: "{{ bom + content }}"
dest: "{{ remote_tmp_dir }}/users_bom.csv"
vars:
content: *users_content
bom: "{{ '\ufeff' }}"

# Read a CSV file and access the first item
- name: Read users from CSV file and return a list
read_csv:
path: "{{ remote_tmp_dir }}/users_bom.csv"
register: users_bom

- assert:
that:
- users_bom.list.0.name == 'dag'
- users_bom.list.0.gecos == 'Dag Wieërs'
- users_bom.list.0.uid == '500'
- users_bom.list.0.gid == '500'
- users_bom.list.1.name == 'jeroen'
- users_bom.list.1.gecos == 'Jeroen Hoekx'
- users_bom.list.1.uid == '501'
- users_bom.list.1.gid == '500'

0 comments on commit bb21693

Please sign in to comment.