## https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html#scipy.stats.t
from scipy.stats import t
t.ppf(.975, 23)
2.0686576104190406
t.ppf(.95, 23)
1.7138715277470473
import numpy as np
import pandas as pd
from statsmodels.api import OLS
Housing = pd.read_csv('csv/Ecdat/Housing.csv')
ols = OLS.from_formula('np.log(price)~np.log(lotsize)', data=Housing).fit()
print(ols.summary())
OLS Regression Results ============================================================================== Dep. Variable: np.log(price) R-squared: 0.336 Model: OLS Adj. R-squared: 0.335 Method: Least Squares F-statistic: 275.8 Date: Thu, 15 Dec 2022 Prob (F-statistic): 2.14e-50 Time: 01:40:51 Log-Likelihood: -122.36 No. Observations: 546 AIC: 248.7 Df Residuals: 544 BIC: 257.3 Df Model: 1 Covariance Type: nonrobust =================================================================================== coef std err t P>|t| [0.025 0.975] ----------------------------------------------------------------------------------- Intercept 6.4685 0.277 23.374 0.000 5.925 7.012 np.log(lotsize) 0.5422 0.033 16.606 0.000 0.478 0.606 ============================================================================== Omnibus: 0.255 Durbin-Watson: 1.086 Prob(Omnibus): 0.880 Jarque-Bera (JB): 0.333 Skew: -0.045 Prob(JB): 0.847 Kurtosis: 2.920 Cond. No. 183. ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
from scipy.stats import t
t.ppf(.995, 544)
2.5848970040670145
import pandas as pd
from statsmodels.api import OLS
Hcons = pd.read_csv('csv/loedata/Hcons.csv')
print(Hcons.describe())
age comm rec count 6723.000000 6723.000000 6723.000000 mean 45.860033 6.841078 5.162530 std 8.237180 3.925046 4.836962 min 30.000000 0.000000 0.000000 25% 39.000000 4.261053 2.253281 50% 46.000000 6.031846 3.856771 75% 53.000000 8.440650 6.514518 max 60.000000 37.129649 72.807483
ols = OLS.from_formula('comm~age', data=Hcons).fit()
print(ols.summary())
OLS Regression Results ============================================================================== Dep. Variable: comm R-squared: 0.001 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 4.522 Date: Thu, 15 Dec 2022 Prob (F-statistic): 0.0335 Time: 01:40:51 Log-Likelihood: -18730. No. Observations: 6723 AIC: 3.746e+04 Df Residuals: 6721 BIC: 3.748e+04 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 6.2744 0.271 23.176 0.000 5.744 6.805 age 0.0124 0.006 2.127 0.033 0.001 0.024 ============================================================================== Omnibus: 2757.039 Durbin-Watson: 1.844 Prob(Omnibus): 0.000 Jarque-Bera (JB): 15710.710 Skew: 1.887 Prob(JB): 0.00 Kurtosis: 9.468 Cond. No. 264. ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
from scipy.stats import t
2*t.cdf(-1.54,48)
0.13012747345659167
2*(1-t.cdf(1.54,48))
0.1301274734565918
t.ppf(.975, 544)
1.9643343306673329
import pandas as pd
from statsmodels.api import OLS
Klosa = pd.read_csv('csv/loedata/Klosa.csv')
# Subsetting
Klosa1 = Klosa[(Klosa['working']==0) & (Klosa['age']>=65)]
fm = 'satisfy5~married'
print(OLS.from_formula(fm, data=Klosa1).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: satisfy5 R-squared: 0.026 Model: OLS Adj. R-squared: 0.025 Method: Least Squares F-statistic: 28.49 Date: Thu, 15 Dec 2022 Prob (F-statistic): 1.15e-07 Time: 01:40:52 Log-Likelihood: -4641.8 No. Observations: 1060 AIC: 9288. Df Residuals: 1058 BIC: 9297. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 51.5534 0.851 60.562 0.000 49.883 53.224 married 6.3365 1.187 5.337 0.000 4.007 8.666 ============================================================================== Omnibus: 27.276 Durbin-Watson: 1.739 Prob(Omnibus): 0.000 Jarque-Bera (JB): 29.010 Skew: -0.402 Prob(JB): 5.02e-07 Kurtosis: 2.904 Cond. No. 2.65 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import numpy as np
np.std(Klosa.satisfy5)
18.522274892616505
print(OLS.from_formula(fm, data=Klosa1[Klosa1.hlth3>=0]).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: satisfy5 R-squared: 0.002 Model: OLS Adj. R-squared: -0.001 Method: Least Squares F-statistic: 0.5670 Date: Thu, 15 Dec 2022 Prob (F-statistic): 0.452 Time: 01:40:52 Log-Likelihood: -1250.9 No. Observations: 300 AIC: 2506. Df Residuals: 298 BIC: 2513. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 62.1186 1.446 42.960 0.000 59.273 64.964 married 1.3978 1.856 0.753 0.452 -2.256 5.051 ============================================================================== Omnibus: 11.384 Durbin-Watson: 1.656 Prob(Omnibus): 0.003 Jarque-Bera (JB): 12.106 Skew: -0.484 Prob(JB): 0.00235 Kurtosis: 2.824 Cond. No. 2.95 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(OLS.from_formula(fm, data=Klosa1[Klosa1.hlth3<0]).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: satisfy5 R-squared: 0.028 Model: OLS Adj. R-squared: 0.026 Method: Least Squares F-statistic: 21.54 Date: Thu, 15 Dec 2022 Prob (F-statistic): 4.08e-06 Time: 01:40:52 Log-Likelihood: -3344.5 No. Observations: 760 AIC: 6693. Df Residuals: 758 BIC: 6702. Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 48.4131 0.991 48.849 0.000 46.468 50.359 married 6.6558 1.434 4.641 0.000 3.841 9.471 ============================================================================== Omnibus: 11.637 Durbin-Watson: 1.738 Prob(Omnibus): 0.003 Jarque-Bera (JB): 11.918 Skew: -0.294 Prob(JB): 0.00258 Kurtosis: 2.828 Cond. No. 2.57 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import pandas as pd
import numpy as np
from statsmodels.api import OLS
Housing = pd.read_csv('csv/Ecdat/Housing.csv')
ols = OLS.from_formula('np.log(price)~np.log(lotsize)', data=Housing).fit()
## https://stackoverflow.com/questions/44302099/python-statsmodels-ols-confidence-interval
## https://www.statsmodels.org/dev/generated/statsmodels.regression.linear_model.OLSResults.conf_int.html
print(ols.conf_int(alpha=.05))
0 1 Intercept 5.924920 7.012143 np.log(lotsize) 0.478043 0.606315
import pandas as pd
import numpy as np
from statsmodels.api import OLS
Ksalary = pd.read_csv('csv/loedata/Ksalary.csv')
Ksalary1 = Ksalary[(Ksalary.kospi==1) & (Ksalary.sector=='ElecElectron')]
ols = OLS.from_formula('np.log(avgsal)~np.log(sales/emp)', data=Ksalary1).fit()
print(ols.summary())
OLS Regression Results ============================================================================== Dep. Variable: np.log(avgsal) R-squared: 0.101 Model: OLS Adj. R-squared: 0.084 Method: Least Squares F-statistic: 6.072 Date: Thu, 15 Dec 2022 Prob (F-statistic): 0.0169 Time: 01:40:52 Log-Likelihood: -0.95089 No. Observations: 56 AIC: 5.902 Df Residuals: 54 BIC: 9.952 Df Model: 1 Covariance Type: nonrobust ======================================================================================= coef std err t P>|t| [0.025 0.975] --------------------------------------------------------------------------------------- Intercept 3.8047 0.046 82.889 0.000 3.713 3.897 np.log(sales / emp) 0.1225 0.050 2.464 0.017 0.023 0.222 ============================================================================== Omnibus: 0.191 Durbin-Watson: 0.216 Prob(Omnibus): 0.909 Jarque-Bera (JB): 0.337 Skew: 0.119 Prob(JB): 0.845 Kurtosis: 2.704 Cond. No. 2.32 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(ols.conf_int(.01))
0 1 Intercept 3.682129 3.927238 np.log(sales / emp) -0.010231 0.255249
print(ols.conf_int(.05))
0 1 Intercept 3.712658 3.896709 np.log(sales / emp) 0.022836 0.222183
Housing['unitprice'] = [pi/li for pi,li in zip(Housing.price, Housing.lotsize)]
ols = OLS.from_formula('np.log(unitprice)~np.log(lotsize)', data=Housing).fit()
print(ols.summary())
OLS Regression Results ============================================================================== Dep. Variable: np.log(unitprice) R-squared: 0.265 Model: OLS Adj. R-squared: 0.264 Method: Least Squares F-statistic: 196.6 Date: Thu, 15 Dec 2022 Prob (F-statistic): 2.36e-38 Time: 01:40:52 Log-Likelihood: -122.36 No. Observations: 546 AIC: 248.7 Df Residuals: 544 BIC: 257.3 Df Model: 1 Covariance Type: nonrobust =================================================================================== coef std err t P>|t| [0.025 0.975] ----------------------------------------------------------------------------------- Intercept 6.4685 0.277 23.374 0.000 5.925 7.012 np.log(lotsize) -0.4578 0.033 -14.022 0.000 -0.522 -0.394 ============================================================================== Omnibus: 0.255 Durbin-Watson: 1.086 Prob(Omnibus): 0.880 Jarque-Bera (JB): 0.333 Skew: -0.045 Prob(JB): 0.847 Kurtosis: 2.920 Cond. No. 183. ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import pandas as pd
from statsmodels.api import OLS
data = pd.read_csv('csv/AER/CigarettesB.csv')
print(OLS.from_formula('packs~price', data=data).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: packs R-squared: 0.291 Model: OLS Adj. R-squared: 0.275 Method: Least Squares F-statistic: 18.08 Date: Thu, 15 Dec 2022 Prob (F-statistic): 0.000108 Time: 01:40:52 Log-Likelihood: 19.195 No. Observations: 46 AIC: -34.39 Df Residuals: 44 BIC: -30.73 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 5.0941 0.063 81.247 0.000 4.968 5.220 price -1.1983 0.282 -4.253 0.000 -1.766 -0.630 ============================================================================== Omnibus: 1.860 Durbin-Watson: 2.307 Prob(Omnibus): 0.395 Jarque-Bera (JB): 1.209 Skew: -0.389 Prob(JB): 0.546 Kurtosis: 3.164 Cond. No. 12.2 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(OLS.from_formula('I(packs+price)~price', data=data).fit().summary())
OLS Regression Results ============================================================================== Dep. Variable: I(packs + price) R-squared: 0.011 Model: OLS Adj. R-squared: -0.011 Method: Least Squares F-statistic: 0.4953 Date: Thu, 15 Dec 2022 Prob (F-statistic): 0.485 Time: 01:40:52 Log-Likelihood: 19.195 No. Observations: 46 AIC: -34.39 Df Residuals: 44 BIC: -30.73 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 5.0941 0.063 81.247 0.000 4.968 5.220 price -0.1983 0.282 -0.704 0.485 -0.766 0.370 ============================================================================== Omnibus: 1.860 Durbin-Watson: 2.307 Prob(Omnibus): 0.395 Jarque-Bera (JB): 1.209 Skew: -0.389 Prob(JB): 0.546 Kurtosis: 3.164 Cond. No. 12.2 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.