मैं scipy Cookbook पर डाउनलोड ols.py
कोड का उपयोग कर रहा हूं (डाउनलोड बोल्ड ओएलएस के साथ पहले अनुच्छेद में है) लेकिन मुझे एकाधिक रैखिक प्रतिगमन करने के लिए ओएलएस फ़ंक्शन के लिए यादृच्छिक डेटा का उपयोग करने के बजाय समझने की आवश्यकता है।विशिष्ट डेटा के साथ ओएलएस कोड का उपयोग कर पायथन एकाधिक रैखिक रिग्रेशन?
मेरे पास एक विशिष्ट आश्रित चर y
है, और तीन स्पष्टीकरण चर हैं।
TypeError: this constructor takes no arguments.
किसी को भी मदद कर सकते हैं: हर बार जब मैं यादृच्छिक परिवर्तनीय के स्थान पर मेरे चर में डाल करने की कोशिश, यह मेरे त्रुटि देता है? क्या ऐसा करना संभव है?
यहाँ OLS कोड मैं चर है कि मैं
from __future__ import division
from scipy import c_, ones, dot, stats, diff
from scipy.linalg import inv, solve, det
from numpy import log, pi, sqrt, square, diagonal
from numpy.random import randn, seed
import time
class ols:
"""
Author: Vincent Nijs (+ ?)
Email: v-nijs at kellogg.northwestern.edu
Last Modified: Mon Jan 15 17:56:17 CST 2007
Dependencies: See import statement at the top of this file
Doc: Class for multi-variate regression using OLS
Input:
dependent variable
y_varnm = string with the variable label for y
x = independent variables, note that a constant is added by default
x_varnm = string or list of variable labels for the independent variables
Output:
There are no values returned by the class. Summary provides printed output.
All other measures can be accessed as follows:
Step 1: Create an OLS instance by passing data to the class
m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
Step 2: Get specific metrics
To print the coefficients:
>>> print m.b
To print the coefficients p-values:
>>> print m.p
"""
y = [29.4, 29.9, 31.4, 32.8, 33.6, 34.6, 35.5, 36.3, 37.2, 37.8, 38.5, 38.8,
38.6, 38.8, 39, 39.7, 40.6, 41.3, 42.5, 43.9, 44.9, 45.3, 45.8, 46.5,
77.1, 48.2, 48.8, 50.5, 51, 51.3, 50.7, 50.7, 50.6, 50.7, 50.6, 50.7]
#tuition
x1 = [376, 407, 438, 432, 433, 479, 512, 543, 583, 635, 714, 798, 891,
971, 1045, 1106, 1218, 1285, 1356, 1454, 1624, 1782, 1942, 2057, 2179,
2271, 2360, 2506, 2562, 2700, 2903, 3319, 3629, 3874, 4102, 4291]
#research and development
x2 = [28740.00, 30952.00, 33359.00, 35671.00, 39435.00, 43338.00, 48719.00, 55379.00, 63224.00,
72292.00, 80748.00, 89950.00, 102244.00, 114671.00, 120249.00, 126360.00, 133881.00, 141891.00,
151993.00, 160876.00, 165350.00, 165730.00, 169207.00, 183625.00, 197346.00, 212152.00, 226402.00,
267298.00, 277366.00, 276022.00, 288324.00, 299201.00, 322104.00, 347048.00, 372535.00,
397629.00]
#one/none parents
x3 = [11610, 12143, 12486, 13015, 13028, 13327, 14074, 14094, 14458, 14878, 15610, 15649,
15584, 16326, 16379, 16923, 17237, 17088, 17634, 18435, 19327, 19712, 21424, 21978,
22684, 22597, 22735, 22217, 22214, 22655, 23098, 23602, 24013, 24003, 21593, 22319]
def __init__(self,y,x1,y_varnm = 'y',x_varnm = ''):
"""
Initializing the ols class.
"""
self.y = y
#self.x1 = c_[ones(x1.shape[0]),x1]
self.y_varnm = y_varnm
if not isinstance(x_varnm,list):
self.x_varnm = ['const'] + list(x_varnm)
else:
self.x_varnm = ['const'] + x_varnm
# Estimate model using OLS
self.estimate()
def estimate(self):
# estimating coefficients, and basic stats
self.inv_xx = inv(dot(self.x.T,self.x))
xy = dot(self.x.T,self.y)
self.b = dot(self.inv_xx,xy) # estimate coefficients
self.nobs = self.y.shape[0] # number of observations
self.ncoef = self.x.shape[1] # number of coef.
self.df_e = self.nobs - self.ncoef # degrees of freedom, error
self.df_r = self.ncoef - 1 # degrees of freedom, regression
self.e = self.y - dot(self.x,self.b) # residuals
self.sse = dot(self.e,self.e)/self.df_e # SSE
self.se = sqrt(diagonal(self.sse*self.inv_xx)) # coef. standard errors
self.t = self.b/self.se # coef. t-statistics
self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2 # coef. p-values
self.R2 = 1 - self.e.var()/self.y.var() # model R-squared
self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef)) # adjusted R-square
self.F = (self.R2/self.df_r)/((1-self.R2)/self.df_e) # model F-statistic
self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e) # F-statistic p-value
def dw(self):
"""
Calculates the Durbin-Waston statistic
"""
de = diff(self.e,1)
dw = dot(de,de)/dot(self.e,self.e);
return dw
def omni(self):
"""
Omnibus test for normality
"""
return stats.normaltest(self.e)
def JB(self):
"""
Calculate residual skewness, kurtosis, and do the JB test for normality
"""
# Calculate residual skewness and kurtosis
skew = stats.skew(self.e)
kurtosis = 3 + stats.kurtosis(self.e)
# Calculate the Jarque-Bera test for normality
JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
JBpv = 1-stats.chi2.cdf(JB,2);
return JB, JBpv, skew, kurtosis
def ll(self):
"""
Calculate model log-likelihood and two information criteria
"""
# Model log-likelihood, AIC, and BIC criterion values
ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
return ll, aic, bic
def summary(self):
"""
Printing model output to screen
"""
# local time & date
t = time.localtime()
# extra stats
ll, aic, bic = self.ll()
JB, JBpv, skew, kurtosis = self.JB()
omni, omnipv = self.omni()
# printing output to screen
print '\n=============================================================================='
print "Dependent Variable: " + self.y_varnm
print "Method: Least Squares"
print "Date: ", time.strftime("%a, %d %b %Y",t)
print "Time: ", time.strftime("%H:%M:%S",t)
print '# obs: %5.0f' % self.nobs
print '# variables: %5.0f' % self.ncoef
print '=============================================================================='
print 'variable coefficient std. Error t-statistic prob.'
print '=============================================================================='
for i in range(len(self.x_varnm)):
print '''% -5s % -5.6f % -5.6f % -5.6f % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]])
print '=============================================================================='
print 'Models stats Residual stats'
print '=============================================================================='
print 'R-squared % -5.6f Durbin-Watson stat % -5.6f' % tuple([self.R2, self.dw()])
print 'Adjusted R-squared % -5.6f Omnibus stat % -5.6f' % tuple([self.R2adj, omni])
print 'F-statistic % -5.6f Prob(Omnibus stat) % -5.6f' % tuple([self.F, omnipv])
print 'Prob (F-statistic) % -5.6f JB stat % -5.6f' % tuple([self.Fpv, JB])
print 'Log likelihood % -5.6f Prob(JB) % -5.6f' % tuple([ll, JBpv])
print 'AIC criterion % -5.6f Skew % -5.6f' % tuple([aic, skew])
print 'BIC criterion % -5.6f Kurtosis % -5.6f' % tuple([bic, kurtosis])
print '=============================================================================='
if __name__ == '__main__':
##########################
### testing the ols class
##########################
# intercept is added, by default
m = ols(y,x1,y_varnm = 'y',x_varnm = ['x1','x2','x3'])
m.summary()
दिखाने के कुछ कोड ... –
इसके अलावा, अवधि का उपयोग करता है। –
बहुत अच्छी पहली पोस्ट। मुझे लगता है कि आपने ओएलएस के साथ बहुत अधिक कोड पोस्ट किया है (शायद आपके कोड की आवश्यकता है)। इसके अलावा, @ दाट चु विराम चिह्न के बारे में सही है। आपका प्रश्न पढ़ना मुश्किल है। –