/[MITgcm]/MITgcm_contrib/gael/matlab_class/ecco_v4/rads_noice_mad.m
ViewVC logotype

Annotation of /MITgcm_contrib/gael/matlab_class/ecco_v4/rads_noice_mad.m

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Tue Nov 13 15:00:06 2012 UTC (12 years, 8 months ago) by gforget
Branch: MAIN
- RADS pre-processing : remove ice "contaminated" data and treat outliers in general.

1 gforget 1.1 function []=rads_noice_mad(choiceData,l0,L0,doTesting);
2    
3     if isempty(who('doTesting')); doTesting=0; end;
4    
5     %directories
6     dirData='/net/nares/raid11/ecco-shared/ecco-version-4/input/input_rads/';
7     topexfile = 'RADS_TJ_v4';
8     ersfile = 'RADS_ERS_ENV_v4';
9     gfofile = 'RADS_GFO_v4';
10     dirIce='/net/nares/raid11/ecco-shared/ecco-version-4/input/input_nsidc_all/';
11     fileIce='nsidc79_daily';
12    
13     dirOutput='ecco_it0003/sea_level_analyses/results_outliers/';
14     %dirOutput='ecco_it0003/sea_level_analyses/tmp/';
15    
16     %testing switches
17     if doTesting; l0=-170; L0=-80; choiceData='gfofile'; end;
18    
19     %select region
20     gcmfaces_global;
21     XC=convert2gcmfaces(mygrid.XC.*mygrid.mskC(:,:,1)); XC=XC(:);
22     YC=convert2gcmfaces(mygrid.YC.*mygrid.mskC(:,:,1)); YC=YC(:);
23     ii=find(XC>=l0-10&XC<=l0+10&YC>=L0-10&YC<=L0+10);
24     ni=length(ii);
25     nt=6940;%corresponds to 1992-2010
26    
27     %load data
28     eval(['fileData=' choiceData ';']);
29    
30     allData=zeros(ni,nt);
31     ndata=0;
32     for yy=1992:2010;
33     fprintf(['reading ' fileData '_' num2str(yy) '\n']);
34     tmp1=dir([dirData fileData '_' num2str(yy)]); tmp0=tmp1.bytes/90/1170/4;
35     tmp1=read2memory([dirData fileData '_' num2str(yy)],[90*1170 tmp0]);
36     allData(:,ndata+[1:tmp0])=tmp1(ii,:);
37     tmp1=read2memory([dirIce fileIce '_' num2str(yy)],[90*1170 tmp0]);
38     allIce(:,ndata+[1:tmp0])=tmp1(ii,:);
39     ndata=ndata+tmp0;
40     end;
41     allData(allData<-999)=NaN; allData=allData/100;
42     allIce(allIce<-999)=NaN; allIce=allIce;
43     if ndata~=nt; error('nt was mispecified'); end;
44    
45     %if doTesting; eval(['save ' dirOutput 'rads_noice_mad.demo.mat allData allIce ndata;']); end;
46     %if doTesting; eval(['load ' dirOutput 'rads_noice_mad.demo.mat allData allIce ndata;']); end;
47    
48     %only keep ice free data points
49     noiceData=allData; noiceData(allIce>0)=NaN;
50     iceRejectPerc=100*(1-nansum(~isnan(noiceData(:)))/nansum(~isnan(allData(:))));
51    
52     %the outliers filtering
53     medianData=nanmedian(noiceData,2)*ones(1,nt);%center of distribution
54     madData=1.4826*mad(noiceData,1,2);%width of the distribution
55     cutoff=4*madData*ones(1,nt);%a factor 4 corresponds to a typical cost of 16
56     goodData=noiceData; goodData(abs(noiceData-medianData)>cutoff)=NaN;
57     madRejectPerc=100*(1-nansum(~isnan(goodData(:)))/nansum(~isnan(noiceData(:))));
58    
59     %output result
60     [iceRejectPerc madRejectPerc]
61     %note : original testing was done with gfofile, l0=-170; L0=-80;
62     % including ice points madRejectPerc is ~ 3.9%
63     % excluding ice points madRejectPerc drops to 0.33%
64    
65     if ~doTesting;
66     eval(['save ' dirOutput fileData 'l0is' num2str(l0) 'L0is' num2str(L0) ...
67     ' ii goodData madData medianData iceRejectPerc madRejectPerc ']);
68     end;
69    
70     %------- testing case -----%
71    
72     if doTesting;
73    
74     %note : original testing was done with ersfile, l0=-170; L0=-80;
75     % the last point (see next line) happened to be work as an example
76     jj=find(~isnan(madData));
77    
78     %do example of mad detection of ice covered points as outliers at one grid point
79     kk=jj(end); xc_kk=XC(ii(kk)); yc_kk=YC(ii(kk));
80     all_kk=allData(kk,:);
81     median_kk=nanmedian(all_kk);%center of distribution
82     mad_kk=1.4826*mad(all_kk,1,2);%width of distribution
83     cutoff_kk=4*mad_kk*ones(1,nt);%a factor 4 corresponds to a typical cost of 16
84     good_kk=all_kk; good_kk(abs(good_kk-median_kk)>cutoff_kk)=NaN;
85     reject_kk=100*(1-nansum(~isnan(good_kk))/nansum(~isnan(all_kk)));
86    
87     %compute the various statistics
88     msk=1+0*allIce; msk(isnan(allData))=NaN;
89     %in the testing case we dont filter out the ice points : msk(allIce>0)=NaN;
90     stdOut=nanstd(msk.*allData,0,2);%sample standard deviation
91     iqrOut=0.7413*iqr(msk.*allData,2);%intequartile range estimate of std
92     madOut=1.4826*mad(msk.*allData,1,2);%median absolute difference estimate of std
93     jj=find(~isnan(stdOut));
94    
95     %plot region
96     if L0>60; myproj=2; elseif L0<-60; myproj=3; else; myproj=1; end;
97     figureL; m_map_gcmfaces(mygrid.Depth,myproj,{'myCaxis',[0 6e3]});
98     colormap('gray'); colorbar off;
99     m_map_gcmfaces({'plot',XC(ii),YC(ii),'m.'},myproj,{'doHold',1});
100     m_map_gcmfaces({'plot',xc_kk,yc_kk,'k.','MarkerSize',24},myproj,{'doHold',1});
101    
102     saveas(gcf,[dirOutput 'outliers_0'],'fig');
103     eval(['print -djpeg90 ' dirOutput 'outliers_0.jpg']);
104     eval(['print -depsc ' dirOutput 'outliers_0.eps']);
105    
106     %plot sensitivity of std estimates to outliers
107     figureL; set(gca,'FontSize',16);
108     plot(stdOut(jj),'.-'); hold on; plot(madOut(jj),'r.-'); plot(iqrOut(jj),'g.-');
109     legend('SSTD','MAD*1.4826','IQR*0.7413');
110     xlabel('grid point index'); ylabel('estimated standard deviation');
111    
112     saveas(gcf,[dirOutput 'outliers_1'],'fig');
113     eval(['print -djpeg90 ' dirOutput 'outliers_1.jpg']);
114     eval(['print -depsc ' dirOutput 'outliers_1.eps']);
115    
116     %plot distributions for the overall region
117     figureL;
118     subplot(3,1,1); set(gca,'FontSize',16);
119     %all data points
120     tmp1=allData(jj,:); hist(tmp1(:),[-1.5:0.02:1.5]);
121     aa=axis; aa(1:2)=[-1 1]; axis(aa); title('all data points');
122     %and the corresponding normal distribution
123     [xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:));
124     hold on; plot(xx,yy,'m','LineWidth',0.5);
125     %
126     subplot(3,1,2); set(gca,'FontSize',16);
127     tmp1=noiceData(jj,:); hist(tmp1(:),[-1.5:0.02:1.5]);
128     aa=axis; aa(1:2)=[-1 1]; axis(aa); title('ice free data points');
129     [xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:));
130     hold on; plot(xx,yy,'m','LineWidth',0.5);
131     subplot(3,1,3); set(gca,'FontSize',16);
132     tmp1=allData(jj,:); tmp1(~isnan(noiceData(jj,:)))=NaN; hist(tmp1(:),[-1.5:0.02:1.5]);
133     aa=axis; aa(1:2)=[-1 1]; axis(aa); title('icy data points');
134     [xx,yy]=normal_distribution([-1.5:0.02:1.5],tmp1(:));
135     hold on; plot(xx,yy,'m','LineWidth',0.5);
136    
137     saveas(gcf,[dirOutput 'outliers_2'],'fig');
138     eval(['print -djpeg90 ' dirOutput 'outliers_2.jpg']);
139     eval(['print -depsc ' dirOutput 'outliers_2.eps']);
140    
141     figureL;
142     subplot(2,1,1); set(gca,'FontSize',16);
143     hist(all_kk,[-1.5:0.02:1.5]);
144     aa=axis; aa(1:2)=[-1 1]; axis(aa); title('before MAD detection');
145     %and the corresponding normal distribution
146     [xx,yy]=normal_distribution([-1.5:0.02:1.5],all_kk(:));
147     hold on; plot(xx,yy,'m','LineWidth',2);
148     subplot(2,1,2); set(gca,'FontSize',16);
149     hist(good_kk,[-1.5:0.02:1.5]);
150     aa=axis; aa(1:2)=[-1 1]; axis(aa); title('after MAD detection');
151     %and the corresponding normal distribution
152     [xx,yy]=normal_distribution([-1.5:0.02:1.5],all_kk(:));
153     hold on; plot(xx,yy,'m','LineWidth',2);
154    
155     saveas(gcf,[dirOutput 'outliers_3'],'fig');
156     eval(['print -djpeg90 ' dirOutput 'outliers_3.jpg']);
157     eval(['print -depsc ' dirOutput 'outliers_3.eps']);
158    
159     end;
160    
161     function [xx,yy]=normal_distribution(xx,data);
162    
163     dx=diff(xx);
164     dx=median(dx);
165     %dx=unique(dx); if length(dx)>1; error('plot_normal_distribution expect regular spacing\n'); end;
166    
167     mm=nanmedian(data);
168     ss=1.4826*mad(data,1);
169     nn=sum(~isnan(data));
170     %for testing : mm=0.2; ss=0.1; nn=100;
171    
172     xx=xx+mm;%reset xx to center of distribution
173     yy=nn*dx/ss/sqrt(2*pi)*exp(-0.5*xx.*xx/ss/ss);%compute distribution
174     %for testing : sum(yy)/nn
175    

  ViewVC Help
Powered by ViewVC 1.1.22