options ls=80 ps=55 ; * Data on employee salaries and their college gpas ; data money ; input salary cgpa ; cgpa2 = cgpa**2 ; * we will use this centered version of cgpa and cgpa**2 below ; ccgpa = (cgpa - 2.838) ; ccgpa2 = ccgpa**2 ; id = _n_ ; cards ; 10455 2.58 9680 2.31 7300 2.47 9388 2.52 12496 3.22 11812 3.37 9224 2.43 11725 3.08 11320 2.78 12000 2.98 12500 3.55 13310 3.64 12105 3.72 6200 2.24 11522 2.7 8000 2.3 12548 2.83 7700 2.37 10028 2.52 13176 3.22 13255 3.55 13004 3.55 8000 2.47 8224 2.47 10750 2.78 11669 2.78 12322 2.98 11002 2.58 10666 2.58 10839 2.58 ; /* The Rstudent variable on the Output statement is what the text describes as a jackknife residual */ proc reg data = money ; model salary = cgpa ; output out = new p = pred rstudent = jkres h = hat cookd = cook ; proc plot data = new vpercent=75 ; plot jkres*pred jkres*cgpa ; plot pred*cgpa='p' salary*cgpa='a' /overlay ; run ; /* The proc capability procedure below can be used to create a normal plot of residuals */ proc capability data = new noprint lineprinter ; var jkres ; qqplot jkres /normal(mu = est sigma = est symbol='.') square ; run ; /* The residual plots from the simple linear regression indicate that a higher order term is needed for cgpa */ proc reg data = money ; model salary = cgpa cgpa2 / vif ; output out = new2 p = pred2 rstudent = jkres2 h = hat2 cookd = cook2 ; run ; proc plot data = new2 vpercent=75 ; plot jkres2*pred2 jkres2*cgpa ; plot pred2*cgpa='p' salary*cgpa='a' /overlay ; run ; proc capability data = new2 noprint lineprinter ; var jkres2 ; qqplot jkres2 /normal(mu = est sigma = est symbol='.') square ; run ; proc corr data = money ; var cgpa cgpa2 ; run ; /* The polynomial model above has high VIF values, also seen in the correlation between cgpa and cgpa**2. The VIF can be improved by using centered values instead, as shown below */ proc corr data = money ; var ccgpa ccgpa2 ; run ; proc reg data = money ; model salary = ccgpa ccgpa2 / vif ; output out = new3 p = pred3 rstudent = jkres3 h = hat3 cookd = cook3 ; run ; proc plot data = new3 vpercent=75 ; plot jkres3*pred3 jkres3*cgpa ; plot pred3*cgpa='p' salary*cgpa='a' /overlay ; run ; proc capability data = new3 noprint lineprinter ; var jkres3 ; qqplot jkres3 /normal(mu = est sigma = est symbol='.') square ; run ;