% PS02_4.m is the m-file answer for Problem set 2 question 4 % Created by T. Kenna 980926 1332 close all life=15; % Figure life clc ans1=['The first catch is that time is in milliseconds not seconds. After']; ans2=['loading the data you must divide the time variable by 1000 otherwise']; ans3=['some of your answers will be incorrect. The general model equation']; ans35=['for the problem is:']; ans4=['y =a(1)+a(2)*t+a(3)*sin(2*pi*f*t)']; ans5=['Because the terms are all linear (none appear inside the sine']; ans6=['function), this becomes a problem in general linear regression.']; ans7=['Let''s start by scaling the time variable and plotting the data.']; ans8=['']; linespace=[' ']; disp(ans1),disp(ans2),disp(ans3),disp(ans35),disp(linespace),disp(ans4) disp(linespace),disp(ans5),disp(ans6),disp(ans7),disp(ans8) pause load photo.dat t=photo(:,1)/1000; y=photo(:,2); figure(1);clf plot(t,y) title('PS02-4 Concentration vs Time') xlabel('Time (s)') ylabel('Concentration (arbitrary units)') pause(life) delete(figure(1)) clc A=[ones(size(t)) t sin(2*pi*60*t)] % The design matrix ans1=['Above are the last few lines of the design matrix A which consists']; ans2=['of a column of ones, a column of t, and a column of 2*pi*f*t.']; ans3=['']; disp(ans1),disp(ans2),disp(ans3) pause clc ans1=['Next, we will do an SVD, and solve for our coefficients (the a''s).']; ans2=['Because we have 501 equations and only 3 unknowns, we have an']; ans3=['overdetermined system. As such, we can''t solve this problem as a']; ans4=['set of simultaneous equations. If the data were perfect, we might be']; ans5=['able to solve for the true coefficient values, but try as we might,']; ans6=['random error always creeps in. In addition, the homework problem also']; ans61=['includes systematic error (re. the 60 Hz noise). While good']; ans62=['experimentalists might be able to control systematic error, random']; ans63=['errors are always present. Because of this combination, the data']; ans7=['points will never agree on the true coefficient values and the']; ans8=['equations will always be inconsistent to some extent. Any regression']; ans9=['curve computed in a situation like this will not plot through all of']; ans10=['the data points. We can obtain a best fit through the data by']; ans11=['minimizing the square of the distance between the regression']; ans12=['function and the observations. An SVD accomplishes this by']; ans13=['minimizing the function (A*a-y)^2. ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans61) disp(ans62),disp(ans63),disp(ans7),disp(ans8),disp(ans9),disp(ans10) disp(ans11),disp(ans12),disp(ans13) pause clc ans1=['The next set of commands come right out of you notes (section 3.3.3)']; ans2=['so I won''t spend too much time here (I''ll just show them to you).']; ans3=['After doing the calculations, and solving for the coefficients and']; ans4=['uncertainties, I displayed them as matrix. ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4) pause clc ans5=['[U S V]=svd(A,0);']; ans6=['W=diag(1./diag(S));']; ans7=['a=V*W*U''*y'];; ans8=['Covmat=V*W.^2*V'';']; ans9=['[N,M]=size(A);']; ans10=['reduced_chi_square=sum((A*a-y).^2)/(N-M);']; ans11=['sa=sqrt(reduced_chi_square*diag(Covmat));']; ans12=['Coef_and_uncertainties=[a sa]']; disp(ans5),disp(linespace),disp(ans6),disp(linespace),disp(ans7) disp(linespace),disp(ans8),disp(linespace),disp(ans9),disp(linespace), disp(ans10),disp(linespace),disp(ans11) [U S V]=svd(A,0); % The SVD W=diag(1./diag(S)); % The inversion of S a=V*W*U'*y; % Solve for the coefficients Covmat=V*W.^2*V'; % Compute the covariance matrix [N,M]=size(A); % The size of the design matrix reduced_chi_square=sum((A*a-y).^2)/(N-M); % Compute reduced chi squared sa=sqrt(reduced_chi_square*diag(Covmat)); % Uncertainties in coefficients Coef_and_uncertainties=[a sa] ans1=['']; disp(ans1) pause clc ans1=['Next, we need to do a linear fit using the command:']; ans2=['[b sb cov r]=linfit(t,y,1);']; ans3=['The coefficients and uncertainties are displayed below. ']; disp(ans1),disp(linespace),disp(ans2),disp(linespace),disp(ans3) pause [b sb cov r]=linfit(t,y,1); Straight_line_fit=[b' sb'] ans1=['']; disp(ans1) pause clc ans1=['A very wise thing to do when comparing your fit to the data is to']; ans2=['look closely at the residuals (that is the difference between the']; ans3=['original data and the model fit). The reason this is important is']; ans4=['that it lets you see if there is any large scale or systematic trend']; ans5=['to the residuals which your chosen model did not catch. Any trend']; ans6=['observed in this type of plot would be a good reason to re-think the']; ans7=['model you are using. A case in point is to compare residual plots for']; ans8=['the general least squares fit and the straight line fit. Since we']; ans9=['know there is a 60 Hz contamination which the straight line fit does']; ans10=['not account for, it should jump right out at us. Let''s use']; ans11=['MATLAB''s subplot feature to first look at the residuals from the']; ans12=['fit that takes into account the contamination and compare them to']; ans13=['the residuals from the straight line fit. ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans7) disp(ans8),disp(ans9),disp(ans10),disp(ans11),disp(ans12),disp(ans13) pause subplot(2,1,1) plot(t,y- A*a,'r') title('Residual Comparison for PS02-4') ylabel('General LSQ') subplot(2,1,2) plot(t,y-A(:,1:2)*b') axis([0 1 -20 20]) ylabel('Straight Line') xlabel('Time') pause(life) delete(figure(1)) clc ans1=['Can you see how if you had initially done a straight line fit and']; ans2=['looked at the residuals it might have led you to include a sinusoid']; ans3=['term in your model? ']; disp(ans1),disp(ans2),disp(ans3) pause clc ans1=['Finally, lets compare the resulting coefficients from the to fits. I']; ans2=['will display the coefficients and their repspective uncertainties in']; ans3=['a single matrix with the command:']; ans4=['[a(1) sa(1) a(2) sa(2) a(3) sa(3);b(1) sb(1) b(2) sb(2) 0 0]']; ans5=['Comparing the intercept and slope from both fits shows that the']; ans6=['straight line fit didn''t actually do too bad. The reason why it']; ans7=['works so well is that you go through many cycles of 60hz noise,']; ans8=['which tends to average it out. The slight differences are probably']; ans9=['caused by the fact that the record may not have complete cycles, so']; ans10=['the final average is sligtly skewed. The larger uncertainties in the']; ans11=['straight line fit are related to the fact that you are treating the']; ans12=['60Hz signal as random noise, which you know it isn''t. ']; disp(ans1),disp(ans2),disp(ans3),disp(linespace),disp(ans4),disp(linespace) Coef_and_Uncertainy_Comparison=[a(1) sa(1) a(2) sa(2) a(3) sa(3);b(1) sb(1) b(2) sb(2) 0 0] disp(ans5),disp(ans6),disp(ans7),disp(ans8),disp(ans9),disp(ans10) disp(ans11),disp(ans12) pause clc ans1=['End of problem 4.']; disp(ans1)