% PS02_5.m is the m-file answer for problem set 2 question 5. % Made by T. Kenna 980929 1307 % Modif'd 2000:09:22 D. Glover % Modif'd 2002:09:26 D. Glover close all life=2; % the figure life % clc ans1=['After loading in chromo.dat and separating it into two variables t']; ans2=['and y, we must plot it to get an idea of just what we are looking at.']; ans3=['In addition, we need to look at the data in order to give nlleasqr.m,']; ans4=['the non linear least squares regression m-file, a first guess at what']; ans5=['the coefficients should be. Let''s take a look at the plot. ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5) pause load chromo.dat t=chromo(:,1); y=chromo(:,2); plot(t,y) title('PS02-5 Non-Linear Least Squares Regression') xlabel('Time') ylabel('Signal') pause(life) delete(figure(1)) % clc ans1=['The first trick is to get the correct model. A careful reading of']; ans2=['the problem says that you need to separate two overlapping peaks on']; ans3=['a sloping background. If we start with the equation of a line and add']; ans4=['to it two identical equations for gaussian curves (given in the']; ans5=['notes), we should have it. The equation we end up with in our']; ans6=['function file (I called mine PS02_5func.m) should look something like']; ans7=['this: ']; linespace=[' ']; ans8=['function out=PS02_5func(t,a)']; ans9=['out=a(1)+a(2)*t+a(3)*exp(-((t-a(4))/a(5)).^2)+a(6)*exp(-((t-a(7))/a(8)).^2);']; ans10=['Note the "dot" before the "caret" (exponetiation) which makes it an']; ans11=['array rather than matrix squaring. Don''t forget the semicolon at']; ans12=['the end of your function unless you enjoy seeing screens and screens']; ans13=['of numbers. The coefficients are defined as follows: ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans7) pause % clc disp(linespace),disp(ans8),disp(ans9),disp(linespace),disp(ans10),disp(ans11) disp(ans12),disp(ans13) pause ans1=['a(1) = baseline']; ans2=['a(2) = baseline slope']; ans3=['a(3) = height peak 1']; ans4=['a(4) = peak center 1 (aka elution time 1)']; ans5=['a(5) = width of peak 1']; ans6=['a(6) = height peak 2']; ans7=['a(7) = peak center 2 (aka elution time 2)']; ans8=['a(8) = width of peak 2 (1)']; ans9=['']; disp(linespace),disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5) disp(ans6),disp(ans7),disp(ans8),disp(linespace),disp(ans9) pause ans1=['After you have created your function file, (as an m-file), and also']; ans2=['down-loaded both nlleasqr.m as well as dfdp.m you are almost ready to']; ans3=['roll. The final thing you need to do is to supply nlleasqr.m with an']; ans4=['initial guess as to what each coefficient is in the form of a']; ans5=['variable called pin; it is important that these be as accurate as you']; ans6=['can make them. Now let''s look at the data with the coefficients and']; ans7=['approximate values drawn in. ']; disp(linespace),disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5) disp(ans6),disp(ans7) pause plot(t,y) X=0:1:20; Y=5:(4.8-5)/(size(X,2)-1):4.8; h=line(X,Y); set(h,'color','Red') set(h,'linewidth',2) X=ones(5,1); X=ones(1,5)*8; Y=5:(16-5)/(size(X,2)-1):16; h=line(X,Y); set(h,'color','Red') set(h,'linewidth',2) X=ones(1,5)*12; Y=5:(21-5)/(size(X,2)-1):21; h=line(X,Y); set(h,'color','Red') set(h,'linewidth',2) Y=ones(1,5)*14.5; X=7.5:(8.5-7.5)/(size(X,2)-1):8.5; h=line(X,Y); set(h,'color','Red') set(h,'linewidth',2) Y=ones(1,5)*17; X=11:(13-11)/(size(X,2)-1):13; h=line(X,Y); set(h,'color','Red') set(h,'linewidth',2) X=0:.2:2; Y=5:(10-5)/(size(X,2)-1):10; h=line(X,Y); set(h,'color','Black') text(2.1,10,'a(1)=baseline') text(16,10,'a(2) = slope') text(7.5,17.5,'a(3)') text(11.5,23,'a(6)') text(6,14.5,'a(5)') text(11.5,4,'a(7)') text(13.5,17,'a(8)') text(7.5,4,'a(4)') X=10:.2:15.8; Y=5:(10-5)/(size(X,2)-1):10; h=line(X,Y); set(h,'color','Black') title('PS02-5 Non-Linear LSR with Coefficients') xlabel('Time') ylabel('Signal') text(1,24,'A discussion of this figure appears') text(1,23,'in the MATLAB window. Arrange your') text(1,22,'windows so you can see both.') % clc ans1=['A look at the plot reaveals that the average baseline is about 5 so']; ans2=['that is a good guess for a(1). The sloping background is hardly']; ans3=['discernable so let''s guess about .1 for a(2). The heights of the']; ans4=['two gaussians are about 15 and 20 if we subtract the background we']; ans5=['get 10 and 15 as our guesses for a(3) and a(6). The positions of the']; ans6=['two gaussians are at about 8 and 12 respectively so that''s what we']; ans7=['will guess for a(4) and a(7). The width of a gaussian is a bit']; ans8=['tricky; it is not the width at the base, but the width which includes']; ans9=['approximately 67% of the area beneath the curve. We will guess about']; ans10=['1 for a(5) and 2 for a(8). We need to put all of our guesses in']; ans11=['a variable pin as follows: ']; ans12=['pin=[5 .1 10 8 1 15 12 2];']; ans13=['']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans7) disp(ans8),disp(ans9),disp(ans10),disp(ans11),disp(linespace),disp(ans12) disp(linespace),disp(ans13) pause pin=[5 .1 10 8 1 15 12 2]; % clc delete(figure(1)) ans1=['Now we are ready to do the regression using the following command:']; ans2=['']; disp(ans1),disp(ans2) pause ans3=['[f,p,kvg,iter,corp,covp,covr,stdresid,Z,r2]=nlleasqr(t,y,pin,''PS02_5func'');']; disp(linespace),disp(ans3),disp(linespace),disp(ans2),disp(linespace) pause [f,p,kvg,iter,corp,covp,covr,stdresid,Z,r2]=nlleasqr(t,y,pin,'PS02_5func'); ans1=['Typing help nlleasqr will help you define what each of the above']; ans2=['variables are; we are really concerned with" p", the fitted']; ans3=['coefficients, and "covp", the covariance matrix of p. Remembering']; ans4=['that the diagonal of the covariance matrix is the variance of each']; ans5=['coefficient, we obtain the uncertainties (standard deviation) by']; ans6=['taking the square root of the diaganol using the command: ']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6) pause ans7=['sp=sqrt(diag(covp));']; ans8=['We can now put our p''s and their respective uncertainties into a']; ans9=['three-column matrix for pure viewing pleasure. Note the first column']; ans10=['is coefficient number. ']; disp(linespace),disp(ans7),disp(linespace),disp(ans8),disp(ans9),disp(ans10) pause sp=sqrt(diag(covp)); co=1:8; format short g Coefficients_and_Uncertainties=[co' p sp] ans10=['']; disp(ans10) pause ans1=['But wait a minute! The explicit Gaussian function is slightly']; ans2=['different than the one we gave you in the notes. Both will give you']; ans3=['a curve that has that wonderful Gaussian bell shape, but it is the']; ans4=['interpretation of the parameters that is important here. To get the']; ans5=['true peak width you need to know the standard deviation (s) of the']; ans6=['Gaussian. Explicitly those terms representing the Gaussian peaks look']; ans7=['like a*exp(-(1/2)*((t-b)/s)^2) and in the notes we shorthanded the 2s^2']; ans8=['as simply a(4)^2. If we do the substitutions (remembering that s is only']; ans9=['one half the the peak width), we find that the real peak width and']; ans10=['(following proper propagation of variance) its uncertainty are given']; ans11=['by sqrt(2)*a(4). So the above matrix should read: ']; disp(linespace),disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6) disp(ans7),disp(ans8),disp(ans9),disp(ans10),disp(ans11) pause p2=p; sp2=sp; p2(5)=sqrt(2)*p(5); sp2(5)=sqrt(2)*sp(5); p2(8)=sqrt(2)*p(8); sp2(8)=sqrt(2)*sp(8); Coefficients_and_Uncertainties=[co' p2 sp2] pause % clc ans1=['The next problem is calculating the areas under each curve. The']; ans2=['formula given has a few pitfalls. First, the formula is for the']; ans3=['definite integral from zero to positive infinity. If you use it as it']; ans4=['is, you will only get one half of the area. In order to compute the']; ans5=['entire area you need to integrate from positive infinity to negative']; ans6=['infinity. The effect on the formula is to drop the 2 from the']; ans7=['denominator on the RHS of the equation. Second, you need to figure']; ans8=['out what a and b are on the RHS of the equation. If you examine the']; ans9=['portion of your function equation that includes a(3), a(4), and a(5),']; ans10=['you will see that it bears some similarity to the LHS of the']; ans11=['equation: ']; ans12=['a*exp-(b^2*x^2) = a(3)*exp(-((t-a(4))/a(5)).^2']; ans13=['We can see that a(3) and corresponds to a in the LHS of']; ans14=['the equation so that leaves us with the exponential term of:']; ans15=['']; ans16=['-(b^2*x^2) = -((t-a(4))/a(5)).^2']; ans17=['We must now figure out b. The key here is to realize that the']; ans18=['peak center, a(4), doesn''t affect the area under the curve. We can']; ans19=['make the substitution of:']; ans20=['t-a(4)^2 = x^2']; ans21=['This simplifies the equation to:']; ans22=['-(b^2*x^2) = -(x^2/a(5)^2)']; ans23=['With b = 1/a(5), the calculation of the area becomes:']; ans25=['a(3)*a(5)*sqrt(pi)']; disp(ans1),disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans7) disp(ans8),disp(ans9),disp(ans10),disp(ans11) pause % clc disp(linespace),disp(ans12),disp(linespace),disp(ans15) pause disp(linespace),disp(ans13),disp(ans14),disp(ans15),disp(linespace) pause disp(ans16),disp(linespace),disp(ans17),disp(ans18),disp(ans19),disp(linespace) pause disp(ans20),disp(linespace),disp(ans21),disp(linespace),disp(ans22) pause % clc disp(linespace),disp(ans23),disp(linespace),disp(ans25) pause ans1=['Doing this calculation for both peaks results in: ']; disp(linespace),disp(ans1) Area_under_the_peaks=[p(3)*p(5)*sqrt(pi); p(6)*p(8)*sqrt(pi)] pause % clc ans2=['The calculation of the uncertainty in the areas under the curve uses']; ans3=['the error propagation formula in your notes (section 2.3.2). We get']; ans4=['the variance of a(3) from the diagonal of the covp matrix. The']; ans5=['derivative of our area formula with respect to a(3) = a(5)*sqrt(pi),']; ans6=['with respect to a(5) = a(3)*sqrt(pi). The covariance of a(3) and a(5)']; ans7=['is obtained from the off diagonal element covp(3,5) or covp(5,3);']; ans8=['they are both the same since covp is symmetric. Note that the']; ans9=['covariance is negative because as the regression increases the']; ans10=['height, the width tends to contract so that the fit can fall through']; ans11=['the points on the side better. Another way to put it is that a(3) and']; ans12=['a(5) are anticorrelated. You might wonder how covariance which is a']; ans13=['squared term can be negative. The answer is that the covariance']; ans14=['matrix is populated by the squares of the covariances and you use']; ans15=['them only in formulae where the squares are used. ']; ans16=['Now that we have identified the major players in the formula we']; ans17=['can compute the uncertainties in the areas using the following']; ans18=['equations: ']; ans19=['sigpeak1=sqrt(covp(3,3)*p(5)^2*pi+covp(5,5)*p(3)^2*pi+2*covp(3,5)*p(3)*p(5)*pi)']; ans20=['sigpeak2=sqrt(covp(6,6)*p(8)^2*pi+covp(8,8)*p(6)^2*pi+2*covp(6,8)*p(6)*p(8)*pi)']; ans21=['']; sigpeak1=sqrt(covp(3,3)*p(5)^2*pi+covp(5,5)*p(3)^2*pi+2*covp(3,5)*p(3)*p(5)*pi); sigpeak2=sqrt(covp(6,6)*p(8)^2*pi+covp(8,8)*p(6)^2*pi+2*covp(6,8)*p(6)*p(8)*pi); disp(ans2),disp(ans3),disp(ans4),disp(ans5),disp(ans6),disp(ans7) disp(ans8),disp(ans9),disp(ans10),disp(ans11),disp(ans12),disp(ans13),disp(ans14),disp(ans15) pause % clc disp(ans16),disp(ans17),disp(ans18),disp(linespace),disp(ans19),disp(linespace),disp(ans20),disp(linespace),disp(ans21) pause % clc ans1=['Now, I will display the areas and uncertainties in one matrix']; ans2=['']; disp(linespace),disp(ans1) Areas_and_Uncertainties=[p(3)*p(5)*sqrt(pi) sigpeak1; p(6)*p(8)*sqrt(pi) sigpeak2] disp(ans2) pause % clc disp(' '); disp('Finally, the last part of the question is to test whether or not'); disp('the background is really changing with time. You were asked'); disp('to devise a statistical test of the null hypothesis H0: a(2)=0; in'); disp('other words the background is not changing. Or, in other words,'); disp('the background slope fit is not statistically different than zero.'); disp('The most direct way would be to test a(2) using the value obtained '); disp('from the Levenberg-Marquardt non-linear fit, the error estimate'); disp('obtained the same way, and your knowledge of statistical'); disp('testing. In class we went over two tests that might be'); disp('applicable: student''s t-test and normal distribution Z test.'); disp('Student''s t-test is really designed for tests of groups of'); disp('number where n is low, at large values of n student''s t-test'); disp('becomes Gaussian. Therefore we will use the normal Z statistic'); disp('test. A priori we will test this at a significance level of 0.05'); disp('(95% confidence). Using MATLAB''s NORMINV function we can obtain'); disp('the critical Z value for this test (the critical value is the'); disp('test statistic value that must be exceeded to reject the null'); disp('hypothesis). But note, alpha is 0.05 and this is a TWO-tailed'); disp('test because we just want to know if the slope is different than'); disp('zero; we didn''t ask if it was greater than or less than. So the'); disp('probability given to NORMINV is 0.025 (2.5% in each tail). You'); disp('get the critical Z value from:'); disp(' '); disp('Zcrit=norminv(0.025)'); Zcrit=norminv(0.025) disp(' '); disp('That is to say, plus or minus 1.96 (~2) standard deviations. So'); disp('now we must calculate our Z-statistic, which is given by:'); disp('(mu-x)/se. Where mu is the mean of the parent population (in our'); disp('case zero), x is the value we are testing (the slope of the'); disp('background (a(2)), and se is the standard error, given by:'); disp('s*sqrt(1/n). Here s is the standard deviation of x (we can use'); disp('the error estimate of a(2) given by nnleasqr.m) and n is the'); disp('number of data points. Ahhh, but what IS n? We only have one'); disp('instantiation of a(2), but it was derived from 401 data points!'); disp('This is important, if you were to use n=1 you would make the'); disp('wrong decision about the null hypothesis. So what does this look'); disp('like?'); disp(' '); disp('Z=(0-p(2))/(sp(2)*sqrt(1/401))'); Z=(0-p(2))/(sp(2)*sqrt(1/401)) disp('Zwrong=(0-p(2))/(sp(2)*sqrt(1/1))'); Zwrong=(0-p(2))/(sp(2)*sqrt(1/1)) disp('I think you can see the choice of n made a big difference. In'); disp('this case, even though the slope of the background was pretty'); disp('darn small, it is still significant at the 95% confidence'); disp('level.'); disp(' '); disp('Scroll back to "Finally, the last part of..."'); disp('Press any key to continue');pause % clc ans1=['End of problem 5 - whew!']; disp(ans1)