import pandas as pd
from numpy import log # log instead of np.log
from statsmodels.api import OLS

Regko = pd.read_csv('csv/loedata/Regko.csv')
fm = 'log(divorce)~log(regpop)+log(drink)+log(hdrink) + log(smoke)+log(grdp/regpop) + log(aged)'
print(OLS.from_formula(fm, data=Regko[Regko.type==2]).fit().summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:           log(divorce)   R-squared:                       0.485
Model:                            OLS   Adj. R-squared:                  0.444
Method:                 Least Squares   F-statistic:                     11.76
Date:                Fri, 16 Dec 2022   Prob (F-statistic):           3.00e-09
Time:                        10:50:59   Log-Likelihood:                 75.101
No. Observations:                  82   AIC:                            -136.2
Df Residuals:                      75   BIC:                            -119.4
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
======================================================================================
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept              0.1727      0.990      0.175      0.862      -1.799       2.144
log(regpop)           -0.0126      0.026     -0.487      0.628      -0.064       0.039
log(drink)            -0.0731      0.195     -0.374      0.709      -0.463       0.316
log(hdrink)            0.2494      0.075      3.321      0.001       0.100       0.399
log(smoke)             0.1834      0.160      1.148      0.255      -0.135       0.502
log(grdp / regpop)     0.0953      0.040      2.404      0.019       0.016       0.174
log(aged)             -0.1969      0.062     -3.150      0.002      -0.321      -0.072
==============================================================================
Omnibus:                        0.191   Durbin-Watson:                   1.805
Prob(Omnibus):                  0.909   Jarque-Bera (JB):                0.335
Skew:                          -0.097   Prob(JB):                        0.846
Kurtosis:                       2.754   Cond. No.                     1.18e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.18e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


import pandas as pd
import numpy as np
from statsmodels.api import OLS

twoyear = pd.read_csv('csv/wooldridge/twoyear.csv')
print(len(twoyear))
twoyear.head()

6763


reg0 = OLS.from_formula('lwage~jc+univ+exper', data=twoyear).fit()
print(reg0.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.222
Model:                            OLS   Adj. R-squared:                  0.222
Method:                 Least Squares   F-statistic:                     644.5
Date:                Fri, 16 Dec 2022   Prob (F-statistic):               0.00
Time:                        10:50:59   Log-Likelihood:                -3888.7
No. Observations:                6763   AIC:                             7785.
Df Residuals:                    6759   BIC:                             7813.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.4723      0.021     69.910      0.000       1.431       1.514
jc             0.0667      0.007      9.767      0.000       0.053       0.080
univ           0.0769      0.002     33.298      0.000       0.072       0.081
exper          0.0049      0.000     31.397      0.000       0.005       0.005
==============================================================================
Omnibus:                       81.514   Durbin-Watson:                   1.968
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              142.465
Skew:                          -0.036   Prob(JB):                     1.16e-31
Kurtosis:                       3.707   Cond. No.                         512.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


twoyear['totcoll'] = twoyear.jc + twoyear.univ
print(OLS.from_formula('lwage~jc+totcoll+exper', data=twoyear).fit().summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.222
Model:                            OLS   Adj. R-squared:                  0.222
Method:                 Least Squares   F-statistic:                     644.5
Date:                Fri, 16 Dec 2022   Prob (F-statistic):               0.00
Time:                        10:50:59   Log-Likelihood:                -3888.7
No. Observations:                6763   AIC:                             7785.
Df Residuals:                    6759   BIC:                             7813.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.4723      0.021     69.910      0.000       1.431       1.514
jc            -0.0102      0.007     -1.468      0.142      -0.024       0.003
totcoll        0.0769      0.002     33.298      0.000       0.072       0.081
exper          0.0049      0.000     31.397      0.000       0.005       0.005
==============================================================================
Omnibus:                       81.514   Durbin-Watson:                   1.968
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              142.465
Skew:                          -0.036   Prob(JB):                     1.16e-31
Kurtosis:                       3.707   Cond. No.                         511.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


reg0.normalized_cov_params


s2 = reg0.scale # bad naming; this is s^2, not s.
print(s2)

0.18501901424040346


ixx = reg0.normalized_cov_params # inv(X'X)
se = np.sqrt(s2*(ixx.jc.jc+ixx.univ.univ-2*ixx.jc.univ))
print(se)

0.006935906572923082


bhat = reg0.params
tstat = (bhat.jc-bhat.univ)/se
print(tstat)

-1.4676566667595632


from scipy.stats import t

pval = 2*(1-t.cdf(abs(tstat), reg0.df_resid))
print(pval)

0.14224403710494182


import pandas as pd
import numpy as np
from statsmodels.api import OLS

twoyear = pd.read_csv('csv/wooldridge/twoyear.csv')
reg1 = OLS.from_formula('lwage~jc+univ+exper', data=twoyear).fit()
reg0 = OLS.from_formula('lwage~I(jc+univ)+exper', data=twoyear).fit()


Fstat = ((reg0.ssr-reg1.ssr)/1)/reg1.scale
print(Fstat)

2.1540160914845914


from scipy.stats import f

pval = 1-f.cdf(Fstat,1,reg1.df_resid)
print(pval)

0.14224403710559586


# 동일한 검정을 간단히
print(reg1.f_test('jc=univ'))

<F test: F=2.154016091483791, p=0.14224403710566683, df_denom=6.76e+03, df_num=1>


# 동일한 검정을 다른 식으로
twoyear['totcoll'] = twoyear.jc+twoyear.univ
ols1 = OLS.from_formula('lwage~jc+totcoll+exper', data=twoyear).fit()
print(ols1.f_test('jc=0'))

<F test: F=2.154016091483635, p=0.14224403710566683, df_denom=6.76e+03, df_num=1>


import pandas as pd
import numpy as np
from statsmodels.api import OLS

Doctor = pd.read_csv('csv/Ecdat/Doctor.csv')
ols1 = OLS.from_formula('doctor~children+access+health', data=Doctor).fit()
print(ols1.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 doctor   R-squared:                       0.092
Model:                            OLS   Adj. R-squared:                  0.086
Method:                 Least Squares   F-statistic:                     16.15
Date:                Fri, 16 Dec 2022   Prob (F-statistic):           5.12e-10
Time:                        10:51:00   Log-Likelihood:                -1250.3
No. Observations:                 485   AIC:                             2509.
Df Residuals:                     481   BIC:                             2525.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.6075      0.418      3.843      0.000       0.786       2.429
children      -0.2512      0.110     -2.278      0.023      -0.468      -0.035
access         1.4995      0.783      1.915      0.056      -0.039       3.039
health         0.6506      0.102      6.399      0.000       0.451       0.850
==============================================================================
Omnibus:                      660.517   Durbin-Watson:                   1.879
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           131192.869
Skew:                           6.773   Prob(JB):                         0.00
Kurtosis:                      82.426   Cond. No.                         16.2
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


print(ols1.f_test('access=0'))

<F test: F=3.665355066562526, p=0.05614775007156983, df_denom=481, df_num=1>


# Continue with the 'twoyear' data
ols1 = OLS.from_formula('lwage~jc+univ+exper', data=twoyear).fit() # unrestricted regression
print(ols1.f_test('jc=univ, univ=exper'))

<F test: F=503.97206186906055, p=9.710503129040587e-205, df_denom=6.76e+03, df_num=2>


# Continue with the 'twoyear' data
# Two constraints: jc=univ=exper
ols0 = OLS.from_formula('lwage~I(jc+univ+exper)', data=twoyear).fit() # restricted regression
twoyear['resid'] = ols0.resid
aux = OLS.from_formula('resid~jc+univ+exper', data=twoyear).fit() # unrestricted regression
lmstat = ols0.nobs*aux.rsquared
print(lmstat)

877.6587033769049


from scipy.stats import chi2

print(chi2.ppf(.95,2))

5.991464547107979


print(1-chi2.cdf(lmstat,2))

0.0

	female	phsrank	hispanic	id	exper	jc	univ	...	sublg	ne	nc	south	totcoll
0	1	65	0	19	161	0.000000	0.000000	...	1	1	0	0	0.000000
1	1	97	0	93	119	0.000000	7.033333	...	0	0	1	0	7.033333
2	1	44	0	96	81	0.000000	0.000000	...	1	1	0	0	0.000000
3	1	34	1	119	39	0.266667	0.000000	...	0	0	0	0	0.266667
4	1	80	0	132	141	0.000000	0.000000	...	0	0	0	1	0.000000

	Intercept	jc	univ	exper
Intercept	0.002397	-9.412177e-05	-8.504379e-05	-1.678074e-05
jc	-0.000094	2.520413e-04	1.042017e-05	-9.287133e-08
univ	-0.000085	1.042017e-05	2.880909e-05	2.125993e-07
exper	-0.000017	-9.287133e-08	2.125993e-07	1.340290e-07

10장 다중회귀 모형에서 가설검정¶

예제 10.1 지역별 이혼율¶

예제 10.2 2년제 대학과 4년제 대학¶

예제 10.3 2년제 대학과 4년제 대학 예제에서 F검정¶

예제 10.4 병원 방문¶

예제 10.5 복잡한 제약들의 F검정¶

예제 10.6 라그랑지 승수(LM) 검정¶