numpy - Show confidence limits and prediction limits in scatter plot -
i have 2 arrays of data hight , weight:
import numpy np, matplotlib.pyplot plt heights = np.array([50,52,53,54,58,60,62,64,66,67, 68,70,72,74,76,55,50,45,65]) weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45]) plt.plot(heights,weights,'bo') plt.show()
i want produce plot similiar this:
http://www.sas.com/en_us/software/analytics/stat.html#m=screenshot6
any ideas appreciated.
here's put together. tried emulate screenshot closely.
#------------------------------------------------------------------------------ import matplotlib.pyplot plt import scipy.stats stats import numpy np # data heights = np.array([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65]) weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45]) x = heights y = weights # modeling numpy p, cov = np.polyfit(x,y,1,cov=true) # parameters , covariance of fit y_model = np.polyval(p, x) # model using fit parameters; note: parameters here coefficients # statistics n = weights.size # number of observations m = p.size # number of parameters df = n - m # degrees of freedom t = stats.t.ppf(0.95, n - m) # used ci , pi bands # estimates of error in data/model resid = y - y_model chi2 = np.sum((resid/y_model)**2) # chi-squared; estimates error in data chi2_red = chi2/(df) # reduced chi-squared; measures goodness of fit s_err = np.sqrt(np.sum(resid**2)/(df)) # standard deviation of error # plotting -------------------------------------------------------------------- fig, ax = plt.subplots(figsize=(8,6)) # data ax.plot(x,y,'o', color='#b9cfe7', markersize=8, markeredgewidth=1,markeredgecolor='b',markerfacecolor='none') # fit ax.plot(x,y_model,'-', color='0.1', linewidth='2', alpha=0.5, label='fit') x2 = np.linspace(np.min(x), np.max(x), 100) y2 = np.linspace(np.min(y_model), np.max(y_model), 100) # confidence interval ci = t*s_err*np.sqrt(1/n +(x2-np.mean(x))**2/np.sum((x-np.mean(x))**2)) ax.fill_between(x2, y2+ci, y2-ci, color='#b9cfe7', edgecolor='') '''minor hack labeling ci fill_between()''' ax.plot(x2, y2+ci, '-', color='#b9cfe7', label='95% confidence limits') # prediction interval pi = t*s_err*np.sqrt(1+1/n+(x2-np.mean(x))**2/np.sum((x-np.mean(x))**2)) ax.fill_between(x2, y2+pi, y2-pi, color='none', linestyle='--') ax.plot(x2, y2-pi, '--', color='0.5', label='95% prediction limits') ax.plot(x2, y2+pi, '--', color='0.5') # figure modifications -------------------------------------------------------- # borders ax.spines['top'].set_color('0.5') ax.spines['bottom'].set_color('0.5') ax.spines['left'].set_color('0.5') ax.spines['right'].set_color('0.5') ax.get_xaxis().set_tick_params(direction='out') ax.get_yaxis().set_tick_params(direction='out') ax.xaxis.tick_bottom() ax.yaxis.tick_left() # labels plt.title('fit plot weight', fontsize='14', fontweight='bold') plt.xlabel('height') plt.ylabel('weight') legend = plt.legend(loc=9, bbox_to_anchor=(0, -0.21, 1., .102), ncol=3, mode='expand') frame = legend.get_frame().set_edgecolor('0.5') plt.xlim(np.min(x)-1,np.max(x)+1) # save figure plt.tight_layout() plt.savefig('filename.png', bbox_extra_artists=(legend,), bbox_inches='tight') plt.show()
note:
i believe since legend outside figure, not show in matplotblib's popup window. works fine in ipython using
%maplotlib inline
.for brevity, code confidence interval kept simple. of interval code adapted source. can replace section more advanced method if choose, i.e. residual bootstrapping.
hope helps. cheers.
Comments
Post a Comment