GeoDaCenter · mradamcox · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023
diff --git a/Work on County Level Data/Note_new.txt b/Work on County Level Data/Note_new.txt
diff --git a/Work on County Level Data/Original Data Dictionary for County Level Data.xlsx b/Work on County Level Data/Original Data Dictionary for County Level Data.xlsx
diff --git a/Work on County Level Data/original_county_leve_data.csv b/Work on County Level Data/original_county_leve_data.csv
diff --git a/data_final/county_consolidation/.~lock.CountyLevelDataDictionary.xlsx# b/data_final/county_consolidation/.~lock.CountyLevelDataDictionary.xlsx#
@@ -0,0 +1 @@
+,adam,legion-gis,16.08.2023 15:38,file:///home/adam/.config/libreoffice/4;
diff --git a/Work on County Level Data/Test_table_T&C.csv → data_final/county_consolidation/C_Latest.csv b/Work on County Level Data/Test_table_T&C.csv → data_final/county_consolidation/C_Latest.csv
diff --git a/data_final/county_consolidation/CountyLevelDataDictionary.xlsx b/data_final/county_consolidation/CountyLevelDataDictionary.xlsx
diff --git a/...ty Level Data/Tract&County_Merged code.py → ...al/county_consolidation/GetMergedTable.py b/...ty Level Data/Tract&County_Merged code.py → ...al/county_consolidation/GetMergedTable.py
@@ -1,18 +1,18 @@
 import pandas as pd
 import re 
 
-#if there is invalid character, the code can be used to find the position
-# file_path = "path to csv file"
-# position = xxx (error information)
+#the code below can used to check which invalid character
+# file_path = "file path"
+# position = error
 
 # with open(file_path, 'rb') as file:
 #     file_contents = file.read()
 #     character = file_contents[position]
 
 # print("Character at position", position, ":", character)
 
-tract_file = pd.read_csv("tarct level file path")
-county_file = pd.read_csv("county level file path")
+tract_file = pd.read_csv("tarct file path")
+county_file = pd.read_csv("county file path")
 #the type of GEOID for both county data and tract data is numpy.int.64 
 
 #create set to store variables respectively
@@ -21,6 +21,7 @@
 
 #get varibales in taract data which county data doesn't have
 dif_tract = tract_cols - county_cols
+
 #create geoid_c variale for tract data ro help match data with county data
 COUNTYFP = []
 for num in tract_file["GEOID"]:
@@ -46,24 +47,33 @@
 
 #Remove the columns which need to be calculated again or already existed
 columns_drop = []
+columns_recall = []
 for item in merged_table.columns:
-    if (("Wk" in item) or 
-    ("Bk" in item) or 
-    ("CntDr" in item) or 
-    ("MinDis" in item)):
-        columns_drop.append(item)
-
-final_table_no_access = merged_table.drop(columns_drop, axis = 1)
+    if (("TmWk" in item) or
+        ("TmBk" in item) or 
+        ("MinDis" in item)):
+            columns_drop.append(item)
+for item in merged_table.columns:
+    if(("CntDr" in item) or
+       ("CntWk" in item) or
+       ("CntBk") in item):
+            columns_recall.append(item)
+columns_drop.append("TRACTCE")
+final_table_no_access = merged_table.drop(columns_drop + columns_recall, axis = 1)
 
 #create some new variables
-for i, j in zip(final_table_no_access["TotSp"], final_table_no_access["TotPop"]):
-    if i != 0:
-        final_table_no_access["SpPerCap"] = j/i
-for i, j in zip(final_table_no_access["TotPcp"], final_table_no_access["TotPop"]):
-    if i != 0:
-        final_table_no_access["PcpPerCap"] = j/i
+SpPerCap = []
+PcpPerCap = []
+ChildrenP = []
+for item in final_table_no_access["TotSp"]:
+        SpPerCap.append(item/100000)
+final_table_no_access["SpPerCap"] = SpPerCap
+for item in final_table_no_access["TotPcp"]:
+        PcpPerCap.append(item/100000)
+final_table_no_access["PcpPerCap"] = PcpPerCap
 for i, j in zip(final_table_no_access["AgeOv18"], final_table_no_access["TotPop"]):
-    final_table_no_access["ChildrenP"] = j - i
+    ChildrenP.append(j-i)
+final_table_no_access["ChildrenP"] = ChildrenP
 
 #create and calculate the percentage of tracts with various providers in 30 biking/walking/driving distance
 prd_table = pd.DataFrame(columns=["GEOID", "CNT_T"])
@@ -76,27 +86,22 @@
 
 #calculate the tract number with none-zero value for "walking"/"Biking"/"Driving" etc
 #crreat table storing the count of tracts with various access variables
-access_P_table = pd.DataFrame(columns = columns_drop, index=final_table_no_access["GEOID"])
+access_P_table = pd.DataFrame(columns = columns_recall, index=final_table_no_access["GEOID"])
 for num in final_table_no_access["GEOID"]:
     sub_table = tract_file[tract_file["GEOID_C"] == num]
-    for var in columns_drop:
+    for var in columns_recall:
         rsl = (sub_table[var] != 0).sum()
         access_P_table[var][num] = rsl
-
 #Get the county level data with count of tracts
 final_table_with_cntT = final_table_no_access.merge(prd_table, left_on="GEOID", right_on="GEOID", how = "outer")
 
 #merging none_zero_record
-final_table_with_access  = final_table_with_cntT.merge(access_P_table, left_on="GEOID", right_index=True, how = "outer" )
+final_table_with_access  = final_table_with_cntT.merge(access_P_table, left_on = "GEOID", right_index = True, how = "outer" )
 
 #calculate percentage and create new variales
-for item in columns_drop:
+for item in columns_recall:
     final_table_with_access[item+"P"] = final_table_with_access[item]/final_table_with_access["CNT_T"]
 
 #drop original access variables
-final_table = final_table_with_access.drop[columns_drop]
-
-
-final_table.to_csv("the path to store created new file", index = False)
-
-
+final_table = final_table_with_access.drop(columns_recall, axis = 1)
+final_table.to_csv("position to store the file/C_Latest.csv", index = False)
diff --git a/... County Level Data/GetVeteran_CountyLevel → ...nty_consolidation/GetVeteranCountyLevel.R b/... County Level Data/GetVeteran_CountyLevel → ...nty_consolidation/GetVeteranCountyLevel.R
@@ -33,3 +33,9 @@ Vet_C <- Vet_C %>%
 
 #save dataset
 write.csv(Vet_C, "the path you want to save the table")
+
+
+
+
+
+
diff --git a/data_final/county_consolidation/Note.txt b/data_final/county_consolidation/Note.txt
@@ -0,0 +1,35 @@
+1.The table key is GEOID and GEOID_C respectively for county Level data and tract level data, which is combined by "STATEFP" (2 digits) + "COUNTYFP" (3 digits) 
+
+2."GetVeteranCountyLevel" is a R script to read local veteran file, calculating correct GEOID(table key) and related calculation, which has been uploaded to GitHub
+
+3."GetMergedTable.py" file can get the table on county level after merging tract level & calculating the percentage for some of Access variables. 
+
+4.Remove several access variables, including: 'NalTmBk', 'SutMinDis', 'OtpMinDis', 'MoudMinDis', 'MetTmWk', 'RxMinDis', 'HospMinDis', 'NalMinDis', 'MetTmBk', 'BupMinDis', 'FqhcMinDis', 
+'MhMinDis', 'MetMinDis', 'BupTmBk', 'NalTmWk', 'BupTmWk', 'TRACTCE', 'OtpCntDr', 'HospCntDr', 'MetCntBk60', 'BupCntDr30', 'MetCntBk30', 'MetCntWk60', 'NalCntBk30', 'BupCntWk60', 
+'MetCntDr30', 'BupCntBk60', 'NalCntWk30', 'BupCntWk30', 'BupCntBk30', 'FqhcCntDr', 'NalCntDr30', 'RxCntDr', 'MetCntWk30', 'MhCntDr', 'NalCntWk60', 'NalCntBk60', 'SutCntDr'
+
+However, create new variables for access, including 'MetCntWk30P', 'BupCntWk30P', 'OtpCntDrP', 'NalCntBk30P', 'RxCntDrP', 'MhCntDrP', 'BupCntWk60P', 'NalCntWk60P', 'NalCntWk30P', 'BupCntBk30P', 
+'MetCntBk60P', 'HospCntDrP', 'SutCntDrP', 'BupCntDr30P', 'FqhcCntDrP', 'BupCntBk60P', 'NalCntBk60P', 'MetCntWk60P', 'NalCntDr30P', 'MetCntDr30P', 'MetCntBk30P'(more details can see P6)
+
+5."C_Latest.csv" is the result of "GetMergedTable.py"
+
+6. Some other new variables are below: 
+	SpPerCap = TotSp/100,000
+  	PcpPerCap = TotPcp/100,000
+  	ChildrenP = TotPop - AgeOv18
+	CNT_T: the total number of tracts in each county
+all "percentage" variables for access variables = the count of tracts which value is not 0/Total count of tracts in one county
+
+7. "OriginalCountyLevelData.csv" is merged table only including county level data from metadata without that from tract level data
+
+8. What is changed in "GetMergedTable.py":
+    a. "TRACTCE" was removed
+    b. "SpPerCap", "PcpPerCap" was recalculated(based on "PerCap" means per 100,000 persons according to tract level data)
+    c. the name for final merged table was changed to "C_Latest"
+    d. percentage variables for "TmWk", "TmBk", and "MinDis" were removed
+    e. update dictionary named "CountyLevelDataDictionary" including all variables in merged table
+
+Tips: MedInc data has been merged into OriginalCountyLevelData.csv
+      if the source is empty, that means that the variables is calculated based on tract level data
+      “note” variable is red for reminding
+      "Totpop10", etc weren't removed. Can check with Dr. Kolak
diff --git a/data_final/county_consolidation/OriginalCountyLevelData.csv b/data_final/county_consolidation/OriginalCountyLevelData.csv
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		,adam,legion-gis,16.08.2023 15:38,file:///home/adam/.config/libreoffice/4;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,3 +33,9 @@ Vet_C <- Vet_C %>%

		#save dataset
		write.csv(Vet_C, "the path you want to save the table")