From f30d95d23fe21f16ee4665138a247066d9afc5ec Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Thu, 3 Aug 2023 11:11:40 -0400
Subject: [PATCH 1/6] feat: Parse study name from GapExchange file if present

---
 src/dug/core/parsers/dbgap_parser.py | 31 ++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py
index b01432c4..bb6161c5 100644
--- a/src/dug/core/parsers/dbgap_parser.py
+++ b/src/dug/core/parsers/dbgap_parser.py
@@ -1,9 +1,10 @@
 import logging
-import re
+import re, os
 from typing import List
 from xml.etree import ElementTree as ET
 
 from dug import utils as utils
+from pathlib import Path
 from ._base import DugElement, FileParser, Indexable, InputFile
 
 logger = logging.getLogger('dug')
@@ -13,13 +14,29 @@ class DbGaPParser(FileParser):
     # Class for parsers DBGaP Data dictionary into a set of Dug Elements
 
     @staticmethod
-    def parse_study_name_from_filename(filename: str):
+    def parse_study_name_from_filename(filename: str) -> str:
         # Parse the study name from the xml filename if it exists. Return None if filename isn't right format to get id from
         dbgap_file_pattern = re.compile(r'.*/*phs[0-9]+\.v[0-9]+\.pht[0-9]+\.v[0-9]+\.(.+)\.data_dict.*')
         match = re.match(dbgap_file_pattern, filename)
         if match is not None:
             return match.group(1)
         return None
+    
+    @staticmethod
+    def parse_study_name_from_gap_exchange_file(filepath: Path) -> str:
+        # Parse the study name from the GapExchange file adjacent to the file passed in
+        parent_dir = filepath.parent.absolute()
+        gap_exchange_filename_str = "GapExchange_" + parent_dir.name
+        gap_exchange_filepath = None
+        for item in os.scandir(parent_dir):
+            if item.is_file and gap_exchange_filename_str in item.name:
+                gap_exchange_filepath = item.path
+        if gap_exchange_filepath is None:
+            return None
+        tree = ET.parse(gap_exchange_filepath, ET.XMLParser(encoding='iso-8859-5'))
+        tree_root = tree.getroot()
+        return tree_root.attrib['Configuration']['StudyNameEntrez'].text
+
 
     def _get_element_type(self):
         return "DbGaP"
@@ -28,12 +45,14 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
         logger.debug(input_file)
         tree = ET.parse(input_file, ET.XMLParser(encoding='iso-8859-5'))
         root = tree.getroot()
-        study_id = root.attrib['study_id']
+        study_id = root.attrib['study_id'].text
         participant_set = root.get('participant_set','0')
 
-        # Parse study name from file handle
-        study_name = self.parse_study_name_from_filename(str(input_file))
-
+        # Parse study name from GapExchange file, and if that fails try from file handle
+        # If still None, raise an error message
+        study_name = self.parse_study_name_from_gap_exchange_file(Path(input_file))
+        if study_name is None:
+            study_name = self.parse_study_name_from_filename(str(input_file))
         if study_name is None:
             err_msg = f"Unable to parse DbGaP study name from data dictionary: {input_file}!"
             logger.error(err_msg)

From 61904163a159b5b92b03a5327fae032c0f720d28 Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Fri, 4 Aug 2023 09:40:36 -0400
Subject: [PATCH 2/6] fix: Fix small bug with .text

---
 src/dug/core/parsers/dbgap_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py
index bb6161c5..7de924e5 100644
--- a/src/dug/core/parsers/dbgap_parser.py
+++ b/src/dug/core/parsers/dbgap_parser.py
@@ -35,7 +35,7 @@ def parse_study_name_from_gap_exchange_file(filepath: Path) -> str:
             return None
         tree = ET.parse(gap_exchange_filepath, ET.XMLParser(encoding='iso-8859-5'))
         tree_root = tree.getroot()
-        return tree_root.attrib['Configuration']['StudyNameEntrez'].text
+        return tree_root.attrib['Configuration']['StudyNameEntrez']
 
 
     def _get_element_type(self):
@@ -45,7 +45,7 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
         logger.debug(input_file)
         tree = ET.parse(input_file, ET.XMLParser(encoding='iso-8859-5'))
         root = tree.getroot()
-        study_id = root.attrib['study_id'].text
+        study_id = root.attrib['study_id']
         participant_set = root.get('participant_set','0')
 
         # Parse study name from GapExchange file, and if that fails try from file handle

From 4257f9c1183e97a112907a82e7c1dfd731f061fc Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Fri, 4 Aug 2023 09:49:29 -0400
Subject: [PATCH 3/6] fix: Fix test_loaders.py to have the right number of
 files in test dir

---
 tests/integration/test_loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_loaders.py b/tests/integration/test_loaders.py
index 9287da44..8e67609b 100644
--- a/tests/integration/test_loaders.py
+++ b/tests/integration/test_loaders.py
@@ -18,7 +18,7 @@ def test_filesystem_loader():
         filepath=TEST_DATA_DIR,
     )
     files = list(targets)
-    assert len(files) == 10
+    assert len(files) == 12
 
     with pytest.raises(ValueError):
         targets = load_from_filesystem(

From 9c009398e4dfc68d93577c3dbc4735922074b121 Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Fri, 4 Aug 2023 11:40:47 -0400
Subject: [PATCH 4/6] fix: Fix study name from gap exchange function

---
 src/dug/core/parsers/dbgap_parser.py          |   2 +-
 .../GapExchange_phs001252.v1.p1.xml           | 390 ++++++++++++++++++
 ...pht006366.v1.ECLIPSE_Subject.data_dict.xml |   2 +
 tests/integration/test_loaders.py             |   2 +-
 tests/integration/test_parsers.py             |   7 +
 5 files changed, 401 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/data/phs001252.v1.p1/GapExchange_phs001252.v1.p1.xml
 create mode 100644 tests/integration/data/phs001252.v1.p1/phs001252.v1.pht006366.v1.ECLIPSE_Subject.data_dict.xml

diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py
index 7de924e5..baa37c45 100644
--- a/src/dug/core/parsers/dbgap_parser.py
+++ b/src/dug/core/parsers/dbgap_parser.py
@@ -35,7 +35,7 @@ def parse_study_name_from_gap_exchange_file(filepath: Path) -> str:
             return None
         tree = ET.parse(gap_exchange_filepath, ET.XMLParser(encoding='iso-8859-5'))
         tree_root = tree.getroot()
-        return tree_root.attrib['Configuration']['StudyNameEntrez']
+        return tree_root.find("./Studies/Study/Configuration/StudyNameEntrez").text
 
 
     def _get_element_type(self):
diff --git a/tests/integration/data/phs001252.v1.p1/GapExchange_phs001252.v1.p1.xml b/tests/integration/data/phs001252.v1.p1/GapExchange_phs001252.v1.p1.xml
new file mode 100644
index 00000000..33f615cb
--- /dev/null
+++ b/tests/integration/data/phs001252.v1.p1/GapExchange_phs001252.v1.p1.xml
@@ -0,0 +1,390 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<GaPExchange xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="./dbGaPEx2.1.5.xsd">
+
+<MetaVariables>
+	<Submitter/>
+	<Method/>
+</MetaVariables>
+<MetaLinks/>
+
+<Projects>
+	<Project/>
+</Projects>
+
+<Studies>
+
+<Study source="dbGaP" accession="phs001252.v1.p1" parentStudy="phs001252.v1.p1" createDate="2016-11-17" modDate="2016-12-01">
+
+<Configuration>
+	<Data_Provider><![CDATA[
+		<table border="1">
+		<tr><th>Title</th><th>Name</th><th>Institute</th></tr>
+		<tr><td>Principal Investigator</td><td>Jorgen Vestbo, Professor</td><td>the University of Manchester, Manchester, UK</td></tr>
+		<tr><td>Principal Investigator</td><td>Edwin K. Silverman, MD, PhD</td><td>Brigham and Women &#39;s Hospital, Boston, MA</td></tr>
+		<tr><td>ECLIPSE Investigators</td><td>Y. Ivanov</td><td>Pleven, Bulgaria</td></tr>
+		<tr><td>ECLIPSE Investigators</td><td>K. Kostov</td><td>Sofia, Bulgaria</td></tr>
+		<tr><td>ECLIPSE Investigators</td><td>J. Bourbeau</td><td>Montreal, Canada</td></tr>
+		</table>
+	]]></Data_Provider>
+	<StudyNameEntrez>Evaluation of COPD Longitudinally to Identify Predictive Surrogate Endpoints (ECLIPSE)</StudyNameEntrez>
+	<StudyNameReportPage>Evaluation of COPD Longitudinally to Identify Predictive Surrogate Endpoints (ECLIPSE)</StudyNameReportPage>
+	<StudyTypes>
+		<StudyType>Case-Control</StudyType>
+		<StudyType>Longitudinal Cohort</StudyType>
+		<StudyType>Cohort</StudyType>
+	</StudyTypes>
+	<Description><![CDATA[
+<p>ECLIPSE was a longitudinal observational study of 2164 COPD subjects and a smaller number of smoking controls (337) and nonsmoking controls (245) followed regularly for three years, with three chest CT scans (at baseline, one year, and three years) (Vestbo, European Respiratory Journal 2008; 31: 869). Inclusion criteria included age 40-75, at least 10 pack-years of smoking, and spirometry in GOLD grades 2-4 (COPD cases) or normal spirometry with post-bronchodilator FEV1 &#62;85% predicted and FEV1/FVC&#62;0.7 (controls). Study visits were performed at enrollment, three months, and every six months thereafter with spirometry, questionnaires, and other clinical evaluations. The ECLIPSE CT scans have been analyzed with the VIDA software for emphysema and airway phenotypes. ECLIPSE has provided key insights into the clinical epidemiology of COPD, including COPD exacerbations (Hurst, NEJM 2010; 363: 1128) and lung function decline in COPD (Vestbo, NEJM 2011; 365: 1184). ECLIPSE has been used in a number of genetic studies of COPD susceptibility and protein biomarkers(Faner, Thorax 2014; 69: 666). Genome-wide gene expression microarray data are available in 147 induced sputum samples from COPD subjects and 248 peripheral blood samples from COPD and control subjects.</p>]]>
+	</Description>
+	<StudyInEx><![CDATA[
+<p>Inclusion criteria included age 40-75, at least 10 pack-years of smoking, and spirometry in GOLD grades 2-4 (COPD cases) or normal spirometry with post-bronchodilator FEV1 &#62;85% predicted and FEV1/FVC &#62;0.7 (controls). Exclusion criteria included respiratory disorders other than COPD, known severe &#945;1-antitrypsin deficiency, history of significant inflammatory disease other than COPD, a COPD exacerbation or blood transfusions within 4 weeks of enrollment, prior lung surgery, recent diagnosis of cancer, inability to walk, and therapy with oral corticosteroids at inclusion. </p>]]>
+	</StudyInEx>
+	<StudyProjects>
+		<Project/>
+	</StudyProjects>
+	<Publications>
+		<Publication>
+			<Pubmed pmid="18216052"/>
+		</Publication>
+		<Publication>
+			<Pubmed pmid="24310110"/>
+		</Publication>
+		<Publication>
+			<Pubmed pmid="24552242"/>
+		</Publication>
+	</Publications>
+	<Diseases>
+		<Disease vocab_source="MESH" vocab_term="Pulmonary Disease, Chronic Obstructive"/>
+		<Disease vocab_source="MESH" vocab_term="Pulmonary Emphysema"/>
+		<Disease vocab_source="MESH" vocab_term="Smoking"/>
+	</Diseases>
+	<Attributions>
+		<Header title="Principal Investigator">
+			<AttName>Jorgen Vestbo, Professor</AttName>
+			<Institution>the University of Manchester, Manchester, UK</Institution>
+		</Header>
+		<Header title="Principal Investigator">
+			<AttName>Edwin K. Silverman, MD, PhD</AttName>
+			<Institution>Brigham and Women &#39;s Hospital, Boston, MA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>Y. Ivanov</AttName>
+			<Institution>Pleven, Bulgaria</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>K. Kostov</AttName>
+			<Institution>Sofia, Bulgaria</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>J. Bourbeau</AttName>
+			<Institution>Montreal, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>M. Fitzgerald</AttName>
+			<Institution>Vancouver, BC, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>P. Hernandez</AttName>
+			<Institution>Halifax, NS, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>K. Killian</AttName>
+			<Institution>Hamilton, ON, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>R. Levy</AttName>
+			<Institution>Vancouver, BC, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>F. Maltais</AttName>
+			<Institution>Montreal, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. O&#39;Donnell</AttName>
+			<Institution>Kingston, ON, Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>J. Krepelka</AttName>
+			<Institution>Prague, Czech Republic</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>J. Vestbo</AttName>
+			<Institution>Hvidovre, Denmark</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>E. Wouters</AttName>
+			<Institution>Horn-Maastricht, The Netherlands</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. Quinn</AttName>
+			<Institution>Wellington, New Zealand</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>P. Bakke</AttName>
+			<Institution>Bergen, Norway</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>M. Kosnik</AttName>
+			<Institution>Golnik, Slovenia</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>A. Agusti</AttName>
+			<Institution>Spain</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>J. Sauleda</AttName>
+			<Institution>Spain</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>P. de Mallorca</AttName>
+			<Institution>Spain</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>Y. Feschenko</AttName>
+			<Institution>Kiev, Ukraine</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>V. Gavrisyuk</AttName>
+			<Institution>Kiev, Ukraine</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>L. Yashina</AttName>
+			<Institution>Kiev, Ukraine</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>N. Monogarova</AttName>
+			<Institution>Donetsk, Ukraine</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>P. Calverley</AttName>
+			<Institution>Liverpool, United Kingdom</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. Lomas</AttName>
+			<Institution>Cambridge, United Kingdom</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>W. MacNee</AttName>
+			<Institution>Edinburgh, United Kingdom</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. Singh</AttName>
+			<Institution>Manchester, United Kingdom</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>J. Wedzicha</AttName>
+			<Institution>London, United Kingdom</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>A. Anzueto</AttName>
+			<Institution>San Antonio, TX, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>S. Braman</AttName>
+			<Institution>Providence, RI, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>R. Casaburi</AttName>
+			<Institution>Torrance, CA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>B. Celli</AttName>
+			<Institution>Boston, MA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>G. Giessel</AttName>
+			<Institution>Richmond, VA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>M. Gotfried</AttName>
+			<Institution>Phoenix, AZ, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>G. Greenwald</AttName>
+			<Institution>Rancho Mirage, CA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>N. Hanania</AttName>
+			<Institution>Houston, TX, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. Mahler</AttName>
+			<Institution>Lebanon, NH, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>B. Make</AttName>
+			<Institution>Denver, CO, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>S. Rennard</AttName>
+			<Institution>Omaha, NE, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>C. Rochester</AttName>
+			<Institution>New Haven, CT, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>P. Scanlon</AttName>
+			<Institution>Rochester, MN, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>D. Schuller</AttName>
+			<Institution>Omaha, NE, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>F. Sciurba</AttName>
+			<Institution>Pittsburgh, PA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>A. Sharafkhaneh</AttName>
+			<Institution>Houston, TX, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>T. Siler</AttName>
+			<Institution>St. Charles, MO, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>E. Silverman</AttName>
+			<Institution>Boston, MA, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>A. Wanner</AttName>
+			<Institution>Miami, FL, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>R. Wise</AttName>
+			<Institution>Baltimore, MD, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Investigators">
+			<AttName>R. ZuWallack</AttName>
+			<Institution>Hartford, CT, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>H. Coxson</AttName>
+			<Institution>Canada</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>C. Crim</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>L. Edwards</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>D. Lomas</AttName>
+			<Institution>UK</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>W. MacNee</AttName>
+			<Institution>UK</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>E. Silverman</AttName>
+			<Institution>USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>R. Tal-Singer</AttName>
+			<Institution>Co-chair, GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>J. Vestbo</AttName>
+			<Institution>Co-chair, Denmark</Institution>
+		</Header>
+		<Header title="ECLIPSE Steering Committee">
+			<AttName>J. Yates</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>A. Agusti</AttName>
+			<Institution>Spain</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>P. Calverley</AttName>
+			<Institution>UK</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>B. Celli</AttName>
+			<Institution>USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>C. Crim</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>B. Miller</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>W. MacNee</AttName>
+			<Institution>Chair, UK</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>S. Rennard</AttName>
+			<Institution>USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>R. Tal-Singer</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>E. Wouters</AttName>
+			<Institution>The Netherlands</Institution>
+		</Header>
+		<Header title="ECLIPSE Scientific Committee">
+			<AttName>J. Yates</AttName>
+			<Institution>GSK, USA</Institution>
+		</Header>
+	</Attributions>
+	<DisplayPublicSummary>yes</DisplayPublicSummary>
+	<StudyURLs>
+		<Url name="ECLIPSE" url="http://www.eclipse-copd.com/"/>
+	</StudyURLs>
+	<StudyHistory><![CDATA[
+<p>The ECLIPSE study protocol was finalized in 2005, and subject recruitment began in 2006.</p>]]>
+	</StudyHistory>
+	<ConsentGroups>
+		<ConsentGroup groupNum="1" shortName="DS-COPD-RD" longName="Disease-Specific (Chronic Obstructive Pulmonary Disease, RD)"/>
+	</ConsentGroups>
+</Configuration>
+
+  <AuthorizedAccess>
+    <DacInfo ssDacId="0">
+      <DacName>NHLBI</DacName>
+      <DacFullName>National Heart, Lung, and Blood Institute DAC</DacFullName>
+      <DacEmail>0</DacEmail>
+      <DacPhone></DacPhone>
+      <DacUrl></DacUrl>
+    </DacInfo>
+    <Policy Policy_ID="phs001252.v1.p1_policy" ref_ssDacId="0">
+      <DisplayResearchStatement>yes</DisplayResearchStatement>
+      <DisplayPublicSummary>yes</DisplayPublicSummary>
+      <EmbargoLength>0</EmbargoLength>
+      <YearsUntilRenewal>1</YearsUntilRenewal>
+      <WeeksCancelRequest>8</WeeksCancelRequest>
+      <PdfSupplementReqired>no</PdfSupplementReqired>
+      <AcknowledgementText>
+        <para>
+          http://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=DUC&amp;view_pdf&amp;stacc=phs001252.v1.p1
+        </para>
+      </AcknowledgementText>
+      <DocumentSet>
+        <DataUseCertificate Label="Data Use Certificate" FilePath="https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=DUC&amp;view_pdf&amp;stacc=phs001252.v1.p1" FileName=""/>
+      </DocumentSet>
+    </Policy>
+    <ConsentGroups>
+      <ParticipantSet groupNum-REF="1">
+        <ConsentName>Disease-Specific (Chronic Obstructive Pulmonary Disease, RD)</ConsentName>
+        <ConsentAbbrev>DS-COPD-RD</ConsentAbbrev>
+        <UseLimitation>Use of the data must be related to Chronic Obstructive Pulmonary Disease and related disorders.</UseLimitation>
+        <IrbRequired>No</IrbRequired>
+      </ParticipantSet>
+    </ConsentGroups>
+  </AuthorizedAccess>
+
+
+</Study>
+
+</Studies>
+
+</GaPExchange>
diff --git a/tests/integration/data/phs001252.v1.p1/phs001252.v1.pht006366.v1.ECLIPSE_Subject.data_dict.xml b/tests/integration/data/phs001252.v1.p1/phs001252.v1.pht006366.v1.ECLIPSE_Subject.data_dict.xml
new file mode 100644
index 00000000..c9c083e9
--- /dev/null
+++ b/tests/integration/data/phs001252.v1.p1/phs001252.v1.pht006366.v1.ECLIPSE_Subject.data_dict.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="./datadict_v2.xsl"?><data_table id="pht006366.v1" study_id="phs001252.v1" participant_set="1" date_created="Tue May 16 13:19:18 2017"><description/><variable id="phv00293196.v1"><name>SUBJECT_ID</name><description>Subject ID</description><type>string</type></variable><variable id="phv00293197.v1"><name>CONSENT</name><description>Consent group as determined by DAC</description><type>encoded value</type><value code="1">Disease-Specific (Chronic Obstructive Pulmonary Disease, RD) (DS-COPD-RD)</value></variable></data_table>
diff --git a/tests/integration/test_loaders.py b/tests/integration/test_loaders.py
index 8e67609b..799fd9dc 100644
--- a/tests/integration/test_loaders.py
+++ b/tests/integration/test_loaders.py
@@ -18,7 +18,7 @@ def test_filesystem_loader():
         filepath=TEST_DATA_DIR,
     )
     files = list(targets)
-    assert len(files) == 12
+    assert len(files) == 15
 
     with pytest.raises(ValueError):
         targets = load_from_filesystem(
diff --git a/tests/integration/test_parsers.py b/tests/integration/test_parsers.py
index a22d00c7..fa5ea2de 100644
--- a/tests/integration/test_parsers.py
+++ b/tests/integration/test_parsers.py
@@ -1,6 +1,7 @@
 from dug.core.parsers import DbGaPParser, NIDAParser, TOPMedTagParser, SciCrunchParser, AnvilDbGaPParser,\
     CRDCDbGaPParser, KFDRCDbGaPParser, SPRINTParser, BACPACParser
 from tests.integration.conftest import TEST_DATA_DIR
+from pathlib import Path
 
 def test_dbgap_parse_study_name_from_filename():
     parser = DbGaPParser()
@@ -21,6 +22,12 @@ def test_nida_parse_study_name_from_filename():
     studyname = parser.parse_study_name_from_filename(filename)
     assert studyname == "NIDA-CSP1019"
 
+def test_dbgap_parse_study_name_from_gap_exchange_file():
+    parser = DbGaPParser()
+    parse_filepath = Path(TEST_DATA_DIR / "phs001252.v1.p1" / "phs001252.v1.pht006366.v1.ECLIPSE_Subject.data_dict.xml")
+    studyname = parser.parse_study_name_from_gap_exchange_file(parse_filepath)
+    assert studyname == "Evaluation of COPD Longitudinally to Identify Predictive Surrogate Endpoints (ECLIPSE)"
+
 def test_dbgap_parser():
     parser = DbGaPParser()
     parse_file = str(TEST_DATA_DIR / "phs000166.v2.pht000700.v1.CAMP_CData.data_dict_2009_09_03.xml")

From 217d266726277fd7738c68dda745e7f217722ebc Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Mon, 7 Aug 2023 12:00:38 -0400
Subject: [PATCH 5/6] fix: Skip GapExchange files in __call__()

---
 src/dug/core/parsers/dbgap_parser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py
index baa37c45..46254d77 100644
--- a/src/dug/core/parsers/dbgap_parser.py
+++ b/src/dug/core/parsers/dbgap_parser.py
@@ -43,8 +43,13 @@ def _get_element_type(self):
 
     def __call__(self, input_file: InputFile) -> List[Indexable]:
         logger.debug(input_file)
+        if "GapExchange" in str(input_file).split("/")[-1]:
+            msg = f"Skipping parsing for GapExchange file: {input_file}!"
+            logger.info(msg)
+            return []
         tree = ET.parse(input_file, ET.XMLParser(encoding='iso-8859-5'))
         root = tree.getroot()
+        print(root.attrib)
         study_id = root.attrib['study_id']
         participant_set = root.get('participant_set','0')
 

From 58548cc2d82654b5c42a2dac2d8f6c69f40ce287 Mon Sep 17 00:00:00 2001
From: Hoid <tylerlcheek@gmail.com>
Date: Mon, 7 Aug 2023 12:02:34 -0400
Subject: [PATCH 6/6] fix: Remove print statement

---
 src/dug/core/parsers/dbgap_parser.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py
index 46254d77..2926b1f1 100644
--- a/src/dug/core/parsers/dbgap_parser.py
+++ b/src/dug/core/parsers/dbgap_parser.py
@@ -49,7 +49,6 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
             return []
         tree = ET.parse(input_file, ET.XMLParser(encoding='iso-8859-5'))
         root = tree.getroot()
-        print(root.attrib)
         study_id = root.attrib['study_id']
         participant_set = root.get('participant_set','0')