मेरे पास एक ही प्रश्न था। नीचे मैंने एक सर्वेक्षण ऑब्जेक्ट के रूप में व्यवस्थित डेटा का उपयोग करके मामूली प्रभाव की गणना करने के लिए एमएफएक्स पैकेज से एक फ़ंक्शन को संशोधित किया। मैंने अधिक नहीं किया, अधिकांशतः 'माध्य()' और इसी तरह के आदेशों को प्रतिस्थापित किया गया है जो सर्वेक्षण-पैकेज समकक्षों के साथ गैर-सर्वेक्षण डेटा चलाने के लिए डिज़ाइन किए गए हैं। संशोधित एमएफएक्स कोड के बाद, एक कोड है जो एक उदाहरण चलाता है। GitHub पर MFX पैकेज के लिए https://cran.r-project.org/web/packages/mfx/mfx.pdf
संहिता (फ़ाइलों है कि मैं संशोधित probitmfxest.r और probitmfx.r हैं)::
पृष्ठभूमि
एलन Fernihough द्वारा MFX पैकेज पर विवरण https://github.com/cran/mfx/tree/master/R
एमएफएक्स कैलक्यूलेटर में, मैंने क्लस्टर और मजबूत एसईएस के बारे में विभिन्न मान्यताओं को संभालने वाले मूल कार्यों में निर्मित लचीलापन पर टिप्पणी की। मैं गलत हो सकता था लेकिन मुझे लगता है कि उन मुद्दों को सर्वेक्षण पैकेज, svyglm() से रिग्रेशन अनुमान कमांड का उपयोग करके पहले से ही जिम्मेदार ठहराया गया है।
सीमांत प्रभाव कैलकुलेटर
library(survey)
probitMfxEstSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in the original mfx package
){
if(is.null(formula)){
stop("model formula is missing")
}
for(i in 1:length(class(design))){
if(!((class(design)[i] %in% "survey.design2") | (class(design)[i] %in% "survey.design"))){
stop("design arguement must contain survey object")
}
}
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# # cluster sort part
# if(is.null(clustervar1) & !is.null(clustervar2)){
# stop("use clustervar1 arguement before clustervar2 arguement")
# }
# if(!is.null(clustervar1)){
# if(is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),data[,clustervar1])
# names(data)[dim(data)[2]] = clustervar1
# data=na.omit(data)
# }
# if(!is.null(clustervar2)){
# if(!(clustervar1 %in% names(data))){
# stop("clustervar1 not in data.frame object")
# }
# if(!(clustervar2 %in% names(data))){
# stop("clustervar2 not in data.frame object")
# }
# data = data.frame(model.frame(formula, data, na.action=NULL),
# data[,c(clustervar1,clustervar2)])
# names(data)[c(dim(data)[2]-1):dim(data)[2]] = c(clustervar1,clustervar2)
# data=na.omit(data)
# }
# }
# fit the probit regression
fit = svyglm(formula,
design=design,
family = quasibinomial(link = "probit"),
x=T
)
# TS: summary(fit)
# terms needed
x1 = model.matrix(fit)
if (any(alias <- is.na(coef(fit)))) { # this conditional removes any vars with a NA coefficient
x1 <- x1[, !alias, drop = FALSE]
}
xm = as.matrix(svymean(x1,design)) # calculate means of x variables
be = as.matrix(na.omit(coef(fit))) # collect coefficients: be as in beta
k1 = length(na.omit(coef(fit))) # collect number of coefficients or x variables
xb = t(xm) %*% be # get the matrix product of xMean and beta, which is the model prediction at the mean
fxb = ifelse(atmean==TRUE, dnorm(xb), mean(dnorm(x1 %*% be))) # collect either the overall predicted mean, or the average of every observation's predictions
# get variances
vcv = vcov(fit)
# from Fernihough's original mfx function
# I dont think this is needed because the
# regression computed by the survey package should
# take care of stratification and robust SEs
# from the survey info
#
# if(robust){
# if(is.null(clustervar1)){
# # white correction
# vcv = vcovHC(fit, type = "HC0")
# } else {
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# }
#
# if(robust==FALSE & is.null(clustervar1)==FALSE){
# if(is.null(clustervar2)){
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL)
# } else {
# vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2)
# }
# }
# set mfx equal to predicted mean (or other value) multiplied by beta
mfx = data.frame(mfx=fxb*be, se=NA)
# get standard errors
if(atmean){# fxb * id matrix - avg model prediction * (beta X xmean)
gr = as.numeric(fxb)*(diag(k1) - as.numeric(xb) *(be %*% t(xm)))
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
} else {
gr = apply(x1, 1, function(x){
as.numeric(as.numeric(dnorm(x %*% be))*(diag(k1) - as.numeric(x %*% be)*(be %*% t(x))))
})
gr = matrix(apply(gr,1,mean),nrow=k1)
mfx$se = sqrt(diag(gr %*% vcv %*% t(gr)))
}
# pick out constant and remove from mfx table
temp1 = apply(x1,2,function(x)length(table(x))==1)
const = names(temp1[temp1==TRUE])
mfx = mfx[row.names(mfx)!=const,]
# pick out discrete change variables
temp1 = apply(x1,2,function(x)length(table(x))==2)
disch = names(temp1[temp1==TRUE])
# calculate the disctrete change marginal effects and standard errors
if(length(disch)!=0){
for(i in 1:length(disch)){
if(atmean){
disx0 = disx1 = xm
disx1[disch[i],] = max(x1[,disch[i]])
disx0[disch[i],] = min(x1[,disch[i]])
# mfx equal to prediction @ x=1 minus prediction @ x=0
mfx[disch[i],1] = pnorm(t(be) %*% disx1) - pnorm(t(be) %*% disx0)
# standard errors
gr = dnorm(t(be) %*% disx1) %*% t(disx1) - dnorm(t(be) %*% disx0) %*% t(disx0)
mfx[disch[i],2] = sqrt(gr %*% vcv %*% t(gr))
} else {
disx0 = disx1 = x1
disx1[,disch[i]] = max(x1[,disch[i]])
disx0[,disch[i]] = min(x1[,disch[i]])
mfx[disch[i],1] = mean(pnorm(disx1 %*% be) - pnorm(disx0 %*% be))
# standard errors
gr = as.numeric(dnorm(disx1 %*% be)) * disx1 - as.numeric(dnorm(disx0 %*% be)) * disx0
avegr = as.matrix(colMeans(gr))
mfx[disch[i],2] = sqrt(t(avegr) %*% vcv %*% avegr)
}
}
}
mfx$discretechgvar = ifelse(rownames(mfx) %in% disch, 1, 0)
output = list(fit=fit, mfx=mfx)
return(output)
}
probitMfxSurv <-
function(formula,
design,
atmean = TRUE,
robust = FALSE,
clustervar1 = NULL,
clustervar2 = NULL,
start = NULL
# control = list() # this option is found in original mfx package
)
{
# res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start, control)
res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start)
est = NULL
est$mfxest = cbind(dFdx = res$mfx$mfx,
StdErr = res$mfx$se,
z.value = res$mfx$mfx/res$mfx$se,
p.value = 2*pt(-abs(res$mfx$mfx/res$mfx$se), df = Inf))
colnames(est$mfxest) = c("dF/dx","Std. Err.","z","P>|z|")
rownames(est$mfxest) = rownames(res$mfx)
est$fit = res$fit
est$dcvar = rownames(res$mfx[res$mfx$discretechgvar==1,])
est$call = match.call()
class(est) = "probitmfx"
est
}
उदाहरण
# initialize sample data
nObs = 100
x1 = rbinom(nObs,1,.5)
x2 = rbinom(nObs,1,.3)
#x3 = rbinom(100,1,.9)
x3 = runif(nObs,0,.9)
id = 1:nObs
w1 = sample(c(10,50,100),nObs,replace=TRUE)
# dependnt variables
ystar = x1 + x2 - x3 + rnorm(nObs)
y = ifelse(ystar>0,1,0)
# set up data frame
data = data.frame(id, w1, x1, x2, x3, ystar, y)
# initialize survey
survey.design <- svydesign(ids = data$id,
weights = data$w1,
data = data)
mean(data$x2)
sd(data$x2)/(length(data$x2))^0.5
svymean(x=x2,design=survey.design)
probit = svyglm(y~x1 + x2 + x3, design=survey.design, family=quasibinomial(link='probit'))
summary(probit)
probitMfxSurv(formula = y~x1 + x2 + x3, design = survey.design)
http://r-survey.r-forge.r-project.org/survey/html/marginpred.html –