1 |
gforget |
1.2 |
function []=rads_noice_mad(choiceData,l0,L0,doTesting,doNoice); |
2 |
|
|
%object : outlier (& ice if doNoice) data flagging for altimetry |
3 |
|
|
%inputs : choiceData is 'topexfile','ersfile' or 'gfofile' |
4 |
|
|
% l0 is the box longitude (l0+-10 will be treated) |
5 |
|
|
% L0 is the box latitude (L0+-10 will be treated) |
6 |
|
|
% doTesting (0 by default) is a testing option |
7 |
|
|
% doNoice (0 by default) activate the flagging |
8 |
|
|
% of all ice covered points (based on nsidc) |
9 |
|
|
% |
10 |
gforget |
1.1 |
|
11 |
|
|
if isempty(who('doTesting')); doTesting=0; end; |
12 |
gforget |
1.2 |
if isempty(who('doNoice')); doNoice=0; end; |
13 |
gforget |
1.1 |
|
14 |
|
|
%directories |
15 |
gforget |
1.3 |
dirData='inputfiles/'; |
16 |
gforget |
1.2 |
topexfile = 'tj_daily_ssh_v4_r1'; |
17 |
|
|
ersfile = 'en_daily_ssh_v4_r1'; |
18 |
|
|
gfofile = 'g1_daily_ssh_v4_r1'; |
19 |
gforget |
1.3 |
dirIce='inputfiles/'; |
20 |
gforget |
1.1 |
fileIce='nsidc79_daily'; |
21 |
gforget |
1.2 |
dirOutput='./'; |
22 |
gforget |
1.1 |
|
23 |
|
|
%testing switches |
24 |
|
|
if doTesting; l0=-170; L0=-80; choiceData='gfofile'; end; |
25 |
|
|
|
26 |
|
|
%select region |
27 |
|
|
gcmfaces_global; |
28 |
|
|
XC=convert2gcmfaces(mygrid.XC.*mygrid.mskC(:,:,1)); XC=XC(:); |
29 |
|
|
YC=convert2gcmfaces(mygrid.YC.*mygrid.mskC(:,:,1)); YC=YC(:); |
30 |
|
|
ii=find(XC>=l0-10&XC<=l0+10&YC>=L0-10&YC<=L0+10); |
31 |
|
|
ni=length(ii); |
32 |
gforget |
1.2 |
nt=7671;%corresponds to 1992-2012 |
33 |
gforget |
1.1 |
|
34 |
|
|
%load data |
35 |
|
|
eval(['fileData=' choiceData ';']); |
36 |
|
|
|
37 |
|
|
allData=zeros(ni,nt); |
38 |
gforget |
1.2 |
allIce=zeros(ni,nt); |
39 |
gforget |
1.1 |
ndata=0; |
40 |
gforget |
1.2 |
for yy=1992:2012; |
41 |
gforget |
1.1 |
fprintf(['reading ' fileData '_' num2str(yy) '\n']); |
42 |
|
|
tmp1=dir([dirData fileData '_' num2str(yy)]); tmp0=tmp1.bytes/90/1170/4; |
43 |
|
|
tmp1=read2memory([dirData fileData '_' num2str(yy)],[90*1170 tmp0]); |
44 |
|
|
allData(:,ndata+[1:tmp0])=tmp1(ii,:); |
45 |
gforget |
1.2 |
if doNoice; |
46 |
|
|
tmp1=read2memory([dirIce fileIce '_' num2str(yy)],[90*1170 tmp0]); |
47 |
|
|
allIce(:,ndata+[1:tmp0])=tmp1(ii,:); |
48 |
|
|
end; |
49 |
gforget |
1.1 |
ndata=ndata+tmp0; |
50 |
|
|
end; |
51 |
|
|
allData(allData<-999)=NaN; allData=allData/100; |
52 |
|
|
allIce(allIce<-999)=NaN; allIce=allIce; |
53 |
|
|
if ndata~=nt; error('nt was mispecified'); end; |
54 |
|
|
|
55 |
|
|
%if doTesting; eval(['save ' dirOutput 'rads_noice_mad.demo.mat allData allIce ndata;']); end; |
56 |
|
|
%if doTesting; eval(['load ' dirOutput 'rads_noice_mad.demo.mat allData allIce ndata;']); end; |
57 |
|
|
|
58 |
|
|
%only keep ice free data points |
59 |
|
|
noiceData=allData; noiceData(allIce>0)=NaN; |
60 |
|
|
iceRejectPerc=100*(1-nansum(~isnan(noiceData(:)))/nansum(~isnan(allData(:)))); |
61 |
|
|
|
62 |
|
|
%the outliers filtering |
63 |
|
|
medianData=nanmedian(noiceData,2)*ones(1,nt);%center of distribution |
64 |
|
|
madData=1.4826*mad(noiceData,1,2);%width of the distribution |
65 |
|
|
cutoff=4*madData*ones(1,nt);%a factor 4 corresponds to a typical cost of 16 |
66 |
|
|
goodData=noiceData; goodData(abs(noiceData-medianData)>cutoff)=NaN; |
67 |
|
|
madRejectPerc=100*(1-nansum(~isnan(goodData(:)))/nansum(~isnan(noiceData(:)))); |
68 |
|
|
|
69 |
|
|
%output result |
70 |
|
|
[iceRejectPerc madRejectPerc] |
71 |
|
|
%note : original testing was done with gfofile, l0=-170; L0=-80; |
72 |
|
|
% including ice points madRejectPerc is ~ 3.9% |
73 |
|
|
% excluding ice points madRejectPerc drops to 0.33% |
74 |
|
|
|
75 |
|
|
if ~doTesting; |
76 |
|
|
eval(['save ' dirOutput fileData 'l0is' num2str(l0) 'L0is' num2str(L0) ... |
77 |
gforget |
1.2 |
' ii goodData madData medianData iceRejectPerc madRejectPerc choiceData l0 L0 doNoice']); |
78 |
gforget |
1.1 |
end; |
79 |
|
|
|
80 |
|
|
%------- testing case -----% |
81 |
|
|
|
82 |
|
|
if doTesting; |
83 |
|
|
|
84 |
gforget |
1.2 |
%note : original testing was done with gfofile, l0=-170; L0=-80; |
85 |
|
|
% the last point (see next line) happened to work as an example |
86 |
gforget |
1.1 |
jj=find(~isnan(madData)); |
87 |
|
|
|
88 |
|
|
%do example of mad detection of ice covered points as outliers at one grid point |
89 |
|
|
kk=jj(end); xc_kk=XC(ii(kk)); yc_kk=YC(ii(kk)); |
90 |
|
|
all_kk=allData(kk,:); |
91 |
|
|
median_kk=nanmedian(all_kk);%center of distribution |
92 |
|
|
mad_kk=1.4826*mad(all_kk,1,2);%width of distribution |
93 |
|
|
cutoff_kk=4*mad_kk*ones(1,nt);%a factor 4 corresponds to a typical cost of 16 |
94 |
|
|
good_kk=all_kk; good_kk(abs(good_kk-median_kk)>cutoff_kk)=NaN; |
95 |
|
|
reject_kk=100*(1-nansum(~isnan(good_kk))/nansum(~isnan(all_kk))); |
96 |
|
|
|
97 |
|
|
%compute the various statistics |
98 |
|
|
msk=1+0*allIce; msk(isnan(allData))=NaN; |
99 |
|
|
%in the testing case we dont filter out the ice points : msk(allIce>0)=NaN; |
100 |
|
|
stdOut=nanstd(msk.*allData,0,2);%sample standard deviation |
101 |
|
|
iqrOut=0.7413*iqr(msk.*allData,2);%intequartile range estimate of std |
102 |
|
|
madOut=1.4826*mad(msk.*allData,1,2);%median absolute difference estimate of std |
103 |
|
|
jj=find(~isnan(stdOut)); |
104 |
|
|
|
105 |
|
|
%plot region |
106 |
|
|
if L0>60; myproj=2; elseif L0<-60; myproj=3; else; myproj=1; end; |
107 |
|
|
figureL; m_map_gcmfaces(mygrid.Depth,myproj,{'myCaxis',[0 6e3]}); |
108 |
|
|
colormap('gray'); colorbar off; |
109 |
|
|
m_map_gcmfaces({'plot',XC(ii),YC(ii),'m.'},myproj,{'doHold',1}); |
110 |
|
|
m_map_gcmfaces({'plot',xc_kk,yc_kk,'k.','MarkerSize',24},myproj,{'doHold',1}); |
111 |
|
|
|
112 |
|
|
saveas(gcf,[dirOutput 'outliers_0'],'fig'); |
113 |
|
|
eval(['print -djpeg90 ' dirOutput 'outliers_0.jpg']); |
114 |
|
|
eval(['print -depsc ' dirOutput 'outliers_0.eps']); |
115 |
|
|
|
116 |
|
|
%plot sensitivity of std estimates to outliers |
117 |
|
|
figureL; set(gca,'FontSize',16); |
118 |
|
|
plot(stdOut(jj),'.-'); hold on; plot(madOut(jj),'r.-'); plot(iqrOut(jj),'g.-'); |
119 |
|
|
legend('SSTD','MAD*1.4826','IQR*0.7413'); |
120 |
|
|
xlabel('grid point index'); ylabel('estimated standard deviation'); |
121 |
|
|
|
122 |
|
|
saveas(gcf,[dirOutput 'outliers_1'],'fig'); |
123 |
|
|
eval(['print -djpeg90 ' dirOutput 'outliers_1.jpg']); |
124 |
|
|
eval(['print -depsc ' dirOutput 'outliers_1.eps']); |
125 |
|
|
|
126 |
|
|
%plot distributions for the overall region |
127 |
|
|
figureL; |
128 |
|
|
subplot(3,1,1); set(gca,'FontSize',16); |
129 |
|
|
%all data points |
130 |
|
|
tmp1=allData(jj,:); hist(tmp1(:),[-1.5:0.02:1.5]); |
131 |
|
|
aa=axis; aa(1:2)=[-1 1]; axis(aa); title('all data points'); |
132 |
|
|
%and the corresponding normal distribution |
133 |
|
|
[xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:)); |
134 |
|
|
hold on; plot(xx,yy,'m','LineWidth',0.5); |
135 |
|
|
% |
136 |
|
|
subplot(3,1,2); set(gca,'FontSize',16); |
137 |
|
|
tmp1=noiceData(jj,:); hist(tmp1(:),[-1.5:0.02:1.5]); |
138 |
|
|
aa=axis; aa(1:2)=[-1 1]; axis(aa); title('ice free data points'); |
139 |
|
|
[xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:)); |
140 |
|
|
hold on; plot(xx,yy,'m','LineWidth',0.5); |
141 |
|
|
subplot(3,1,3); set(gca,'FontSize',16); |
142 |
|
|
tmp1=allData(jj,:); tmp1(~isnan(noiceData(jj,:)))=NaN; hist(tmp1(:),[-1.5:0.02:1.5]); |
143 |
|
|
aa=axis; aa(1:2)=[-1 1]; axis(aa); title('icy data points'); |
144 |
|
|
[xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:)); |
145 |
|
|
hold on; plot(xx,yy,'m','LineWidth',0.5); |
146 |
|
|
|
147 |
|
|
saveas(gcf,[dirOutput 'outliers_2'],'fig'); |
148 |
|
|
eval(['print -djpeg90 ' dirOutput 'outliers_2.jpg']); |
149 |
|
|
eval(['print -depsc ' dirOutput 'outliers_2.eps']); |
150 |
|
|
|
151 |
|
|
figureL; |
152 |
|
|
subplot(2,1,1); set(gca,'FontSize',16); |
153 |
|
|
hist(all_kk,[-1.5:0.02:1.5]); |
154 |
|
|
aa=axis; aa(1:2)=[-1 1]; axis(aa); title('before MAD detection'); |
155 |
|
|
%and the corresponding normal distribution |
156 |
|
|
[xx,yy]=normal_distribution([-1.5:0.02:1.5],all_kk(:)); |
157 |
|
|
hold on; plot(xx,yy,'m','LineWidth',2); |
158 |
|
|
subplot(2,1,2); set(gca,'FontSize',16); |
159 |
|
|
hist(good_kk,[-1.5:0.02:1.5]); |
160 |
|
|
aa=axis; aa(1:2)=[-1 1]; axis(aa); title('after MAD detection'); |
161 |
|
|
%and the corresponding normal distribution |
162 |
|
|
[xx,yy]=normal_distribution([-1.5:0.02:1.5],all_kk(:)); |
163 |
|
|
hold on; plot(xx,yy,'m','LineWidth',2); |
164 |
|
|
|
165 |
|
|
saveas(gcf,[dirOutput 'outliers_3'],'fig'); |
166 |
|
|
eval(['print -djpeg90 ' dirOutput 'outliers_3.jpg']); |
167 |
|
|
eval(['print -depsc ' dirOutput 'outliers_3.eps']); |
168 |
|
|
|
169 |
|
|
end; |
170 |
|
|
|
171 |
|
|
function [xx,yy]=normal_distribution(xx,data); |
172 |
|
|
|
173 |
|
|
dx=diff(xx); |
174 |
|
|
dx=median(dx); |
175 |
|
|
%dx=unique(dx); if length(dx)>1; error('plot_normal_distribution expect regular spacing\n'); end; |
176 |
|
|
|
177 |
|
|
mm=nanmedian(data); |
178 |
|
|
ss=1.4826*mad(data,1); |
179 |
|
|
nn=sum(~isnan(data)); |
180 |
|
|
%for testing : mm=0.2; ss=0.1; nn=100; |
181 |
|
|
|
182 |
|
|
xx=xx+mm;%reset xx to center of distribution |
183 |
|
|
yy=nn*dx/ss/sqrt(2*pi)*exp(-0.5*xx.*xx/ss/ss);%compute distribution |
184 |
|
|
%for testing : sum(yy)/nn |
185 |
|
|
|