-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDLV_dailydata_v54.m
219 lines (180 loc) · 9.9 KB
/
DLV_dailydata_v54.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
function OUT = DLV_dailydata_v54_HN_BCS(cd,FN_HA,FN_HALI,FN_HADD,cd_H,temp_dir)
% This function produces the 'daily data' from the delaval backups
% >>> software version v5.4
%
%
% INPUTS: cd current directory: folder of txt DATA files
% cd_H current directory headers: forlder of txt HEADER files
% FN_HA Filename of the HistoryAnimal table
% FN_HALI Filename of the HistoryAnimalLactationInfo table
% FN_HADD Filename of the HistoryAnimalDailyData table
%
% OUTPUT OUT Merged and preprocessed table containing daily data
%
% STEP 0: Add headers to tables
% STEP 1: Load tables in matlab format
% STEP 2: Select columns we want to keep in each table & rename
% STEP 3: Merge tables into data table
% STEP 4: Check and correct for errors
%
%
%% STEP 0: combine header and results files
% History Animal
ha_H = readtable([cd_H FN_HA '_headers.txt'],'ReadVariableNames',0); % read variable names
ha_H = ha_H{:,:}'; % convert to cell array and transpose
writecell(ha_H,[temp_dir 'FN_HA.txt'],'Delimiter',';'); % write headernames to file
system(['copy "' temp_dir 'FN_HA.txt"+' '"' cd FN_HA '.txt" "' temp_dir 'FN_HA.txt"']); % combine files using system cmd
fid = fopen([temp_dir 'FN_HA.txt'],'r'); f=fread(fid,'*char')'; fclose(fid); % open en read combined data
f=f(1:length(f)-1); % remove last BOM character
fid = fopen([temp_dir 'FN_HA.txt'],'w');fwrite(fid,f); fclose(fid); % rewrite and close
% History Animal Lactation Info
hali_H = readtable([cd_H FN_HALI '_headers.txt'],'ReadVariableNames',0); % read variable names
hali_H = hali_H{:,:}'; % convert to cell array and transpose
writecell(hali_H,[temp_dir 'FN_HALI.txt'],'Delimiter',';'); % write headernames to file
system(['copy "' temp_dir 'FN_HALI.txt"+' '"' cd FN_HALI '.txt" "' temp_dir 'FN_HALI.txt"']); % combine files using system cmd
fid = fopen([temp_dir 'FN_HALI.txt'],'r'); f=fread(fid,'*char')'; fclose(fid); % open en read combined data
f=f(1:length(f)-1); % remove last BOM character
fid = fopen([temp_dir 'FN_HALI.txt'],'w');fwrite(fid,f); fclose(fid); % rewrite and close
% History Animal Daily Data
hadd_H = readtable([cd_H FN_HADD '_headers.txt'],'ReadVariableNames',0); % read variable names
hadd_H = hadd_H{:,:}'; % convert to cell array and transpose
writecell(hadd_H,[temp_dir 'FN_HADD.txt'],'Delimiter',';'); % write headernames to file
system(['copy "' temp_dir 'FN_HADD.txt"+' '"' cd FN_HADD '.txt" "' temp_dir 'FN_HADD.txt"']); % combine files using system cmd
fid = fopen([temp_dir 'FN_HADD.txt'],'r'); f=fread(fid,'*char')'; fclose(fid); % open en read combined data
f=f(1:length(f)-1); % remove last BOM character
fid = fopen([temp_dir 'FN_HADD.txt'],'w');fwrite(fid,f); fclose(fid); % rewrite and close
clear ha_H hali_H hadd_H ans
% redefine files
FN_HA = 'FN_HA'; % History Animal
FN_HALI = 'FN_HALI'; % History Animal Lactation Info
FN_HADD = 'FN_HADD'; % History Animal Daily Data
cd = temp_dir; % new current directory
%% STEP 1 - load tables in matlab
% Combine filenames in one variable (redundant step)
ext = {'.txt'}; % all possible file extensions
FNS = {FN_HA,FN_HALI,FN_HADD}; % filenames
for i = 1:length(FNS) % length
FN{i} = [cd FNS{i} ext{1}]; % all three
end
% clear variables
clear i FN_HA FN_HALI FN_HADD FNS cd
% HISTORY ANIMAL
opts = detectImportOptions(FN{1},'Delimiter',';'); % detect import options
opts.SelectedVariableNames = {'OID','ReferenceId','Number','OffRegNumber','Name','BirthDate'};% selected variable names
opts = setvartype(opts,{'OID','ReferenceId','Number'},'double'); % set var type to double
opts = setvartype(opts,{'BirthDate'},'datetime'); % set var type to datetime
a = readtable(FN{1},opts); % read table
% HISTORY ANIMAL LACTATION INFO
opts = detectImportOptions(FN{2},'Delimiter',';'); % detect import options
opts.SelectedVariableNames = {'OID','Animal','LactationNumber','StartDate'};% selected variable names
opts = setvartype(opts,{'OID','Animal','LactationNumber'},'double');% set var type to double
opts = setvartype(opts,{'StartDate'},'datetime');
b = readtable(FN{2},opts); % read table
% HISTORY ANIMAL DAILY DATA
opts = detectImportOptions(FN{3},'Delimiter',';'); % detect import options
opts.SelectedVariableNames = {'OID','BasicAnimal','DayDate','Animal','DIM','LactationNumber','DailyYield','Last7DayAvg','MilkingDurationInSec','Milkings','Kickoffs','Incompletes'};% selected variable names
opts = setvartype(opts,{'OID','BasicAnimal','DIM','Animal','LactationNumber','DailyYield','Last7DayAvg','MilkingDurationInSec','Milkings','Kickoffs','Incompletes'},'double');% set var type to double
opts = setvartype(opts,{'DayDate'},'datetime'); % set var type to datetime
c = readtable(FN{3},opts); % read table
% clear variable names
clear FN j ans DT1 DT2 exttype opts
%% STEP 2 : select columns we want to keep in each table
col_HA = {'OID','ReferenceId','Number','OffRegNumber','Name','BirthDate'};
col_HALI = {'OID','Animal','LactationNumber','StartDate'};
col_HADD = {'OID','BasicAnimal','DayDate','Animal','DIM','LactationNumber','DailyYield','Last7DayAvg','MilkingDurationInSec','Milkings','Kickoffs','Incompletes'};
idx_HA = zeros(1,length(col_HA)); % to fill in - column indices
idx_HALI = zeros(1,length(col_HALI)); % to fill in - column indices
idx_HADD = zeros(1,length(col_HADD)); % to fill in - column indices
for i = 1:length(col_HA)
idx_HA(i) = find(contains(a.Properties.VariableNames,col_HA{i})==1,1);
end
for i = 1:length(col_HALI)
idx_HALI(i) = find(contains(b.Properties.VariableNames,col_HALI{i})==1,1);
end
for i = 1:length(col_HADD)
idx_HADD(i) = find(contains(c.Properties.VariableNames,col_HADD{i})==1,1);
end
clear col_HA col_HALI col_HADD i
% select columns - for d all columns are kept
a = a(:,idx_HA); % select columns to keep
b = b(:,idx_HALI); % select columns to keep
c = c(:,idx_HADD); % select columns to keep
% rename columns for merging - there are in order of col_XXX
a.Properties.VariableNames = {'BA','RefID','Number','OfficialRegNo','Name','BDate'}; %HA
b.Properties.VariableNames = {'OID','BA','Lac','Calving'}; % HALI
c.Properties.VariableNames = {'OID2','BA','Date','RefID','DIM','Lac','TDMY','A7DY','Dur','Milkings','Kickoffs','Incompletes'}; % HADD
clear idx_HALI idx_HA idx_HADD exttype Number
%% STEP 3: Correct Lactation numbers if not possible (similar to LELY)
% we notice that in some cases the laction number is increased while in the
% data it seems that no new lactation is started.
% In AnimalLactationSummary, these records are
% associated with no calving date, and can be detected and corrected for as
% such.
b = sortrows(b,[2 3]); % sort per BA and Lac
b = innerjoin(b,a(:,1:2));
idx = find(isnat(b.Calving) == 1 & b.Lac > 0); % find all cases for which this happens
if isempty(idx) == 0
cows = b.RefID(idx); % select BA identity of these cows
for i = 1:length(cows)
sub = sortrows(c(c.RefID == cows(i),:),2); % select all time data of this cow
if idx(1) > 1
ind = find(datenum(sub.Date) > datenum(b.Calving(idx(i)-1))+100 & datenum(sub.Date) < datenum(b.Calving(idx(i)+1))-150 & sub.DIM < 10,1,'first');
else
ind = find(datenum(sub.Date) >= datenum(b.Calving(idx(i))) & datenum(sub.Date) < datenum(b.Calving(idx(i)+1))-150 & sub.DIM < 10,1,'first');
end
if isempty(ind)==1
if idx(1) > 1
ind = find(datenum(sub.Date) > datenum(b.Calving(idx(i)-1))+100 & sub.DIM < 10,1,'first'); % if it is the last lactation
else
ind = find(datenum(sub.Date) >= datenum(b.Calving(idx(i))) & sub.DIM < 10,1,'first'); % if it is the last lactation
end
end
if isempty(ind) == 0
DIM = sub.DIM(ind); % find DIM of this calving
b.Calving(idx(i)) = sub.Date(ind)-DIM; % correct calving date
b.IsCorrected(idx(i),1) = 1; % add tracer that this is corrected
end
end
else
b.IsCorrected(:,1) = 0;
end
b = b(isnat(b.Calving) == 0 & b.Lac ~= 0,:); % find all cases for which this happens
clear idx ind i cows sub sub2 DIM test
%% STEP 3: Merge tables to one
c(c.Lac == 0 & isnan(c.TDMY),:) = []; % delete all Lac = 0 and no milk yield registered
c(c.TDMY == 0 & c.A7DY == 0 & c.Dur == 0 & c.Milkings == 0,:) = []; % delete (MR?) empty data
c.Lac(c.Lac == 0) = 1; % correct lactation number when milk yield is registered
c(isnan(c.TDMY),:) = []; % delete registrations withuot data
c(isnan(c.A7DY) & isnan(c.Milkings) & isnan(c.Kickoffs) & isnan(c.Incompletes),:) = [];
test_ref = innerjoin(a(:,1:2),c(:,[2 4]),'Keys',{'BA','RefID'});
if length(test_ref.RefID)< length(a.RefID)
a.RefID = [];
OUT = sortrows(innerjoin(a,c,'Keys',{'BA'}),{'BA','Date'}); % join HADD and HALI
else
OUT = sortrows(innerjoin(a,c,'Keys',{'BA','RefID'}),{'BA','Date'}); % join HADD and HALI
end
clear test_ref
OUT = CorLacN_DLV(OUT,b(:,[2 3 4]));
% sort rows
OUT = sortrows(OUT,{'BA','Date'});
%% STEP 4: select variables and reorder
% Select the cols needed
col_OUT = {'OfficialRegNo','BA','Number','RefID','Name','BDate','Calving','Lac','Date','DIM','TDMY','A7DY','Dur','Milkings','Kickoffs','Incompletes'};
% prepare indices
idx_OUT = zeros(1,length(col_OUT)); % to fill in - column indices
% find indices
for i = 1:length(col_OUT)
idx_OUT(i) = find(contains(OUT.Properties.VariableNames,col_OUT{i})==1,1,'Last');
end
% Change order of columns
OUT = OUT(:,idx_OUT);
%% STEP 5: construct summary table
% number of unique animals
% number of unique lactations
% startdate
% % % % enddate
% % % SUM = array2table([0 0], 'VariableNames',{'NUniAn','NUniLac'});
% % % SUM.NUniAn(1,1) = length(unique(OUT.BA));
% % % SUM.NUniLac(1,1) = length(unique(OUT{:,[2 8]},'rows'));
% % % SUM.Start(1,1) = min(OUT.Date);
% % % SUM.End(1,1) = max(OUT.Date);