forked from emchristensen/weather_analysis
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclean_NOAA_data.r
52 lines (42 loc) · 2.01 KB
/
clean_NOAA_data.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# This script contains two functions for cleaning up downloaded NOAA weather station
# data (Portal4sw and SanSimon)
find_missing_dates = function(weathframe) {
# This function finds gaps in daily ppt/temp data
date_info = with(weathframe,paste(Year, Month, Day,sep='-'))
dates = as.Date(date_info)
d1 = head(dates,1)
d2 = tail(dates,1)
datesthereshouldbe = as.character(seq(d1,d2,by='day'))
missingdates = setdiff(datesthereshouldbe,as.character(dates))
missingdates = as.Date(missingdates)
# Create frame of NAs for missing dates
TempAirMax = rep(NA,length(missingdates))
TempAirMin = rep(NA,length(missingdates))
Precipitation = rep(NA,length(missingdates))
Year = as.integer(format(missingdates, '%Y'))
Month = as.integer(format(missingdates, '%m'))
Day = as.integer(format(missingdates, '%d'))
missingframe = data.frame(Year,Month,Day,Precipitation,TempAirMax,TempAirMin)
return(missingframe)
}
clean_NOAA_data = function(filename) {
# This function takes the raw data downloaded from http://www.ncdc.noaa.gov/cdo-web/
# and puts it into a usable data frame
dataframe = read.csv(filename)
dates = strptime(dataframe$DATE,'%Y%m%d')
Year = as.integer(format(dates,'%Y'))
Month = as.integer(format(dates,'%m'))
Day = as.integer(format(dates,'%d'))
Precipitation = dataframe$PRCP/10 # convert from tenths of mm to mm
Precipitation[Precipitation<0] = NA # NAs originally recorded as -9999
TempAirMax = dataframe$TMAX/10 # convert from tenths of degrees C to degrees C
TempAirMax[TempAirMax< -900] = NA
TempAirMin = dataframe$TMIN/10 # convert from tenths of degrees C to degrees C
TempAirMin[TempAirMin< -900] = NA
rawframe = data.frame(Year,Month,Day,Precipitation,TempAirMax,TempAirMin)
# find missing dates (should be daily data) and combine with present data
missing = find_missing_dates(rawframe)
weathframe = rbind(rawframe,missing)
weathframe = weathframe[order(weathframe[,1],weathframe[,2],weathframe[,3]),]
return(weathframe)
}