numpy - Show confidence limits and prediction limits in scatter plot -


i have 2 arrays of data hight , weight:

import numpy np, matplotlib.pyplot plt  heights = np.array([50,52,53,54,58,60,62,64,66,67, 68,70,72,74,76,55,50,45,65]) weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])  plt.plot(heights,weights,'bo') plt.show() 

i want produce plot similiar this:

http://www.sas.com/en_us/software/analytics/stat.html#m=screenshot6

any ideas appreciated.

here's put together. tried emulate screenshot closely.

#------------------------------------------------------------------------------ import matplotlib.pyplot plt import scipy.stats stats import numpy np  # data heights = np.array([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65]) weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])  x = heights y = weights  # modeling numpy p, cov = np.polyfit(x,y,1,cov=true)           # parameters , covariance of fit y_model = np.polyval(p, x)                    # model using fit parameters; note: parameters here coefficients  # statistics n = weights.size                              # number of observations m = p.size                                    # number of parameters df = n - m                                    # degrees of freedom t = stats.t.ppf(0.95, n - m)                  # used ci , pi bands  # estimates of error in data/model resid = y - y_model                            chi2 = np.sum((resid/y_model)**2)             # chi-squared; estimates error in data chi2_red = chi2/(df)                          # reduced chi-squared; measures goodness of fit s_err = np.sqrt(np.sum(resid**2)/(df))        # standard deviation of error   # plotting -------------------------------------------------------------------- fig, ax = plt.subplots(figsize=(8,6))  # data ax.plot(x,y,'o', color='#b9cfe7', markersize=8,          markeredgewidth=1,markeredgecolor='b',markerfacecolor='none')  # fit ax.plot(x,y_model,'-', color='0.1', linewidth='2', alpha=0.5, label='fit')    x2 = np.linspace(np.min(x), np.max(x), 100) y2 = np.linspace(np.min(y_model), np.max(y_model), 100)  # confidence interval ci = t*s_err*np.sqrt(1/n +(x2-np.mean(x))**2/np.sum((x-np.mean(x))**2)) ax.fill_between(x2, y2+ci, y2-ci, color='#b9cfe7', edgecolor='')  '''minor hack labeling ci fill_between()''' ax.plot(x2, y2+ci, '-', color='#b9cfe7', label='95% confidence limits')  # prediction interval pi = t*s_err*np.sqrt(1+1/n+(x2-np.mean(x))**2/np.sum((x-np.mean(x))**2))    ax.fill_between(x2, y2+pi, y2-pi, color='none', linestyle='--') ax.plot(x2, y2-pi, '--', color='0.5', label='95% prediction limits') ax.plot(x2, y2+pi, '--', color='0.5')   # figure modifications -------------------------------------------------------- # borders ax.spines['top'].set_color('0.5') ax.spines['bottom'].set_color('0.5') ax.spines['left'].set_color('0.5') ax.spines['right'].set_color('0.5') ax.get_xaxis().set_tick_params(direction='out') ax.get_yaxis().set_tick_params(direction='out') ax.xaxis.tick_bottom() ax.yaxis.tick_left()   # labels plt.title('fit plot weight', fontsize='14', fontweight='bold') plt.xlabel('height') plt.ylabel('weight') legend = plt.legend(loc=9, bbox_to_anchor=(0, -0.21, 1., .102), ncol=3, mode='expand') frame = legend.get_frame().set_edgecolor('0.5') plt.xlim(np.min(x)-1,np.max(x)+1)  # save figure plt.tight_layout() plt.savefig('filename.png', bbox_extra_artists=(legend,), bbox_inches='tight')  plt.show() 

enter image description here

note:

  1. i believe since legend outside figure, not show in matplotblib's popup window. works fine in ipython using %maplotlib inline.

  2. for brevity, code confidence interval kept simple. of interval code adapted source. can replace section more advanced method if choose, i.e. residual bootstrapping.

hope helps. cheers.


Comments