import pandas as pd
from statsmodels.api import OLS
Wages = pd.read_csv('csv/Ecdat/Wages.csv')
len(Wages)
4165
# Any better way?
idx = [x%7==1 for x in range(len(Wages))] # Watch out! Python indices begin with 0
Wages77 = Wages[idx].copy()
Wages77.head()
exp | wks | bluecol | ind | south | smsa | married | sex | union | ed | black | lwage | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 4 | 43 | no | 0 | yes | no | yes | male | no | 9 | no | 5.72031 |
8 | 31 | 27 | yes | 0 | no | no | yes | male | no | 11 | no | 6.21461 |
15 | 7 | 51 | yes | 1 | no | no | yes | male | yes | 12 | no | 6.43615 |
22 | 32 | 46 | yes | 0 | no | yes | no | female | no | 10 | yes | 6.23832 |
29 | 11 | 46 | yes | 0 | no | no | yes | male | yes | 16 | no | 6.62007 |
ols = OLS.from_formula('lwage~ed+exp', data=Wages77).fit()
print(ols.params)
Intercept 5.476063 ed 0.062957 exp 0.010107 dtype: float64
print(OLS.from_formula('lwage~ed', data=Wages77).fit().params)
Intercept 5.767925 ed 0.054283 dtype: float64
print(OLS.from_formula('exp~ed', data=Wages77).fit().params)
Intercept 28.878598 ed -0.858271 dtype: float64
import pandas as pd
import numpy as np
from statsmodels.api import OLS
Housing = pd.read_csv('csv/Ecdat/Housing.csv')
regA = OLS.from_formula('np.log(price)~np.log(lotsize)+bedrooms', data=Housing).fit()
print("Regression A:")
print(regA.params)
Regression A: Intercept 6.380361 np.log(lotsize) 0.501505 bedrooms 0.145872 dtype: float64
regB = OLS.from_formula('np.log(price)~bedrooms', data=Housing).fit()
print("Regression B:")
print(regB.params)
Regression B: Intercept 10.505674 bedrooms 0.186593 dtype: float64
regC = OLS.from_formula('np.log(lotsize)~bedrooms', data=Housing).fit()
print("Regression C:")
print(regC.params)
Regression C: Intercept 8.225860 bedrooms 0.081199 dtype: float64
import pandas as pd
import numpy as np
from statsmodels.api import OLS
Housing = pd.read_csv('csv/Ecdat/Housing.csv')
ols = OLS.from_formula('np.log(price)~np.log(lotsize/5000)+I(bedrooms-3)', data=Housing).fit()
print(ols.summary())
OLS Regression Results ============================================================================== Dep. Variable: np.log(price) R-squared: 0.418 Model: OLS Adj. R-squared: 0.416 Method: Least Squares F-statistic: 195.1 Date: Wed, 14 Dec 2022 Prob (F-statistic): 1.43e-64 Time: 15:03:12 Log-Likelihood: -86.480 No. Observations: 546 AIC: 179.0 Df Residuals: 543 BIC: 191.9 Df Model: 2 Covariance Type: nonrobust ========================================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------------------ Intercept 11.0894 0.012 903.860 0.000 11.065 11.113 np.log(lotsize / 5000) 0.5015 0.031 16.201 0.000 0.441 0.562 I(bedrooms - 3) 0.1459 0.017 8.733 0.000 0.113 0.179 ============================================================================== Omnibus: 2.883 Durbin-Watson: 1.209 Prob(Omnibus): 0.237 Jarque-Bera (JB): 2.794 Skew: -0.175 Prob(JB): 0.247 Kurtosis: 3.022 Cond. No. 2.57 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(ols.conf_int(.05))
0 1 Intercept 11.065294 11.113495 np.log(lotsize / 5000) 0.440701 0.562310 I(bedrooms - 3) 0.113059 0.178684