Skip to content

Commit

Permalink
Set encoding="utf-8" to avoid Windows default encoding issue
Browse files Browse the repository at this point in the history
  • Loading branch information
moshi4 committed Sep 26, 2024
1 parent d9e4756 commit c8a9b59
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/pycirclize/parser/bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def parse(bed_file: str | Path) -> list[BedRecord]:
BED records
"""
bed_records = []
with open(bed_file) as f:
with open(bed_file, encoding="utf-8") as f:
reader = csv.reader(f, delimiter="\t")
for row in reader:
if row[0].startswith("#") or len(row) < 3:
Expand Down
16 changes: 10 additions & 6 deletions src/pycirclize/parser/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def write_genome_fasta(self, outfile: str | Path) -> None:
outfile : str | Path
Output genome fasta file
"""
with open(outfile, "w") as f:
with open(outfile, "w", encoding="utf-8") as f:
for seqid, seq in self.get_seqid2seq().items():
f.write(f">{seqid}\n{seq}\n")

Expand All @@ -406,19 +406,23 @@ def _parse_gbk_source(
list[SeqRecord]
Genbank SeqRecords
"""
# Parse compressed file
# Parse file
if isinstance(gbk_source, (str, Path)):
if Path(gbk_source).suffix == ".gz":
with gzip.open(gbk_source, mode="rt") as f:
with gzip.open(gbk_source, mode="rt", encoding="utf-8") as f:
return list(SeqIO.parse(f, "genbank"))
elif Path(gbk_source).suffix == ".bz2":
with bz2.open(gbk_source, mode="rt") as f:
with bz2.open(gbk_source, mode="rt", encoding="utf-8") as f:
return list(SeqIO.parse(f, "genbank"))
elif Path(gbk_source).suffix == ".zip":
with zipfile.ZipFile(gbk_source) as zip:
with zip.open(zip.namelist()[0]) as f:
return list(SeqIO.parse(TextIOWrapper(f), "genbank"))
# Parse no compressed file or TextIOWrapper
io = TextIOWrapper(f, encoding="utf-8")
return list(SeqIO.parse(io, "genbank"))
else:
with open(gbk_source, encoding="utf-8") as f:
return list(SeqIO.parse(f, "genbank"))
# Parse TextIOWrapper
return list(SeqIO.parse(gbk_source, "genbank"))

def _is_straddle_feature(self, feature: SeqFeature) -> bool:
Expand Down
8 changes: 4 additions & 4 deletions src/pycirclize/parser/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,18 +280,18 @@ def _parse_gff(
"""
gff_file = Path(gff_file)
if gff_file.suffix == ".gz":
with gzip.open(gff_file, mode="rt") as f:
with gzip.open(gff_file, mode="rt", encoding="utf-8") as f:
gff_records, start, end = self._parse_gff_textio(f, target_seqid)
elif gff_file.suffix == ".bz2":
with bz2.open(gff_file, mode="rt") as f:
with bz2.open(gff_file, mode="rt", encoding="utf-8") as f:
gff_records, start, end = self._parse_gff_textio(f, target_seqid)
elif gff_file.suffix == ".zip":
with zipfile.ZipFile(gff_file) as zip:
with zip.open(zip.namelist()[0]) as f:
io = TextIOWrapper(f)
io = TextIOWrapper(f, encoding="utf-8")
gff_records, start, end = self._parse_gff_textio(io, target_seqid)
else:
with open(gff_file) as f:
with open(gff_file, encoding="utf-8") as f:
gff_records, start, end = self._parse_gff_textio(f, target_seqid)

return gff_records, start, end
Expand Down
6 changes: 4 additions & 2 deletions src/pycirclize/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,12 @@ def load_tree(data: str | Path | Tree, format: str) -> Tree:
"""
if isinstance(data, str) and urlparse(data).scheme in ("http", "https"):
# Load tree file from URL
return Phylo.read(io.StringIO(urlopen(data).read().decode()), format=format)
treeio = io.StringIO(urlopen(data).read().decode(encoding="utf-8"))
return Phylo.read(treeio, format=format)
elif isinstance(data, (str, Path)) and os.path.isfile(data):
# Load tree file
return Phylo.read(data, format=format)
with open(data, encoding="utf-8") as f:
return Phylo.read(f, format=format)
elif isinstance(data, str):
# Load tree string
return Phylo.read(io.StringIO(data), format=format)
Expand Down
4 changes: 2 additions & 2 deletions src/pycirclize/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def fetch_genbank_by_accid(
)
if gbk_outfile is not None:
gbk_text = gbk_fetch_data.read()
with open(gbk_outfile, "w") as f:
with open(gbk_outfile, "w", encoding="utf-8") as f:
f.write(gbk_text)
gbk_fetch_data = StringIO(gbk_text)

Expand Down Expand Up @@ -258,7 +258,7 @@ def load(chr_link_file: str | Path) -> list[ChrLink]:
Chromosome link list
"""
chr_link_list = []
with open(chr_link_file) as f:
with open(chr_link_file, encoding="utf-8") as f:
reader = csv.reader(f, delimiter="\t")
for row in reader:
qchr, qstart, qend = row[0], int(row[1]), int(row[2])
Expand Down

0 comments on commit c8a9b59

Please sign in to comment.