Getting Started¶
Data load¶
[1]:
import pandas as pd
X = pd.read_csv('simulation_data_x.csv')
y = pd.read_csv('simulation_data_y.csv')
[2]:
X.head()
[2]:
V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | ... | V91 | V92 | V93 | V94 | V95 | V96 | V97 | V98 | V99 | V100 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.101741 | 0.144909 | 0.235567 | 0.576186 | 0.299443 | 0.296395 | 0.902235 | 0.265811 | 0.420927 | 0.684045 | ... | -0.058677 | 0.497420 | -1.124986 | 0.338215 | -0.942943 | -1.257044 | -0.531471 | 1.236317 | 0.405682 | 0.387636 |
1 | -0.105054 | -0.110128 | -0.033311 | -0.042925 | -0.752605 | -0.794815 | -1.699739 | -1.891533 | -1.287547 | -1.154547 | ... | -1.614948 | -1.337878 | 0.795742 | 1.101117 | -0.920702 | -0.098002 | -0.269719 | 0.333092 | -0.500367 | 1.340876 |
2 | -0.478922 | -0.058475 | -0.620625 | 1.775435 | 0.935760 | 1.268173 | -2.652118 | -2.327299 | -2.913926 | -2.140256 | ... | 0.081620 | 0.999657 | -0.594758 | 0.057804 | -1.259650 | 0.321864 | 0.992930 | 0.552269 | 1.253700 | 0.974151 |
3 | -0.657057 | -0.508105 | -0.556453 | 0.057045 | -0.344814 | -0.557824 | -0.814844 | 0.016355 | -0.384234 | -0.022224 | ... | 0.077500 | -1.489750 | -0.151010 | 0.347814 | 2.281268 | -1.275026 | -0.141539 | -0.335557 | 0.196004 | -1.347782 |
4 | -0.939275 | -0.780565 | -0.495687 | -0.308105 | -0.604543 | -0.272711 | 0.080763 | 0.542582 | 0.580669 | 0.162638 | ... | 0.677662 | 0.188278 | 0.716616 | -1.290398 | -0.579556 | -0.692827 | -1.040820 | -0.674525 | 1.355343 | 1.754870 |
5 rows × 100 columns
[3]:
y.head()
[3]:
V1 | |
---|---|
0 | 2.811113 |
1 | 1.049249 |
2 | -4.496389 |
3 | -3.846408 |
4 | -2.805357 |
General Usage¶
[4]:
from Hi_LASSO_pyspark import HiLASSO_Spark
model = HiLASSO_Spark(X, y, alpha=0.05, q1='auto', q2='auto', L=30, cv=5, node='auto', logistic=False)
model.fit()
C:\Users\Seungha\anaconda3\lib\site-packages\sklearn\externals\joblib\__init__.py:15: FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.
warnings.warn(msg, category=FutureWarning)
C:\Users\Seungha\anaconda3\lib\site-packages\sklearn\externals\six.py:31: FutureWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).
"(https://pypi.org/project/six/).", FutureWarning)
Procedure_1_fin.
Procedure_2_fin.
[4]:
<Hi_LASSO_spark.HiLASSO_Spark at 0x280ac36e308>
[5]:
model.coef_
[5]:
array([ 6.21282205e-01, 2.27730867e+00, -2.12828920e-01, 7.91275602e-01,
9.36945797e-02, -6.15373475e-02, 9.00107483e-01, 7.81416406e-01,
2.33909585e-01, -2.91365004e-03, -3.78273386e-01, 0.00000000e+00,
-2.41606493e-02, 0.00000000e+00, -6.58164921e-01, 1.54145412e-03,
-1.16992273e-02, 2.51639371e-02, 0.00000000e+00, 7.55799788e-03,
1.68828138e-01, -3.05823959e-02, 0.00000000e+00, 1.73225751e-03,
3.01344168e-01, 0.00000000e+00, 1.91514369e-02, 3.97503818e-02,
0.00000000e+00, 4.43804365e-02, 0.00000000e+00, 0.00000000e+00,
4.32576398e-02, 5.52723676e-01, 0.00000000e+00, 0.00000000e+00,
4.03342118e-04, -2.10036305e-03, 0.00000000e+00, 0.00000000e+00,
6.95194714e-02, 0.00000000e+00, 5.69989592e-01, 0.00000000e+00,
-6.26655745e-02, 2.29603115e-02, -1.44772894e-02, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, -3.37721796e-02,
-1.72170081e-01, 0.00000000e+00, 5.22259759e-03, 0.00000000e+00,
1.57643093e-02, 1.84508844e-02, -3.35463900e-02, 8.92966775e-02,
0.00000000e+00, 0.00000000e+00, 1.16838784e-03, 0.00000000e+00,
-5.10785040e-03, -8.20826615e-04, 1.37965408e-02, -7.84592615e-03,
4.05342662e-04, -8.13447460e-04, 1.62454783e-03, 2.11658823e-01,
1.37239634e-02, 0.00000000e+00, 1.96548381e-02, 3.97642011e-04,
3.98597583e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
-1.87735414e-01, 9.89520027e-02, 9.42610997e-02, 0.00000000e+00,
-4.74286582e-03, 3.51989385e-03, -1.06679404e+00, -1.44361223e-01,
0.00000000e+00, 0.00000000e+00, 1.13453582e-03, 0.00000000e+00,
0.00000000e+00, 9.41690572e-02, 0.00000000e+00, 8.94030396e-02,
0.00000000e+00, 0.00000000e+00, -5.98969457e-02, 1.38721768e-02])
[6]:
model.p_values
[6]:
array([4.62571924e-063, 5.33340116e-216, 5.58692822e-002, 7.50873638e-129,
5.41813130e-001, 9.94065083e-001, 7.50873638e-129, 6.68678693e-094,
4.69898115e-011, 9.99999953e-001, 3.47337427e-096, 1.00000000e+000,
9.99559305e-001, 1.00000000e+000, 1.77097129e-119, 1.00000000e+000,
9.99999988e-001, 9.99985528e-001, 1.00000000e+000, 1.00000000e+000,
2.63765514e-007, 9.99910617e-001, 1.00000000e+000, 1.00000000e+000,
3.22315280e-033, 1.00000000e+000, 9.99998180e-001, 9.98226547e-001,
1.00000000e+000, 7.93160337e-001, 1.00000000e+000, 1.00000000e+000,
4.72204837e-001, 4.95865112e-102, 1.00000000e+000, 1.00000000e+000,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
7.69851716e-002, 1.00000000e+000, 4.14070187e-122, 1.00000000e+000,
9.94240213e-006, 9.99796163e-001, 9.99962949e-001, 1.00000000e+000,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 9.59297160e-001,
1.20662551e-010, 1.00000000e+000, 9.99999988e-001, 1.00000000e+000,
9.99985528e-001, 9.99910617e-001, 9.99559305e-001, 1.20674890e-003,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 4.77949646e-023,
9.99994693e-001, 1.00000000e+000, 9.99796163e-001, 1.00000000e+000,
4.72204837e-001, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
1.55932686e-032, 2.46381754e-006, 2.00854891e-003, 1.00000000e+000,
1.00000000e+000, 1.00000000e+000, 2.61910668e-202, 1.81491953e-022,
1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
1.00000000e+000, 1.28372860e-004, 1.00000000e+000, 4.40268352e-009,
1.00000000e+000, 1.00000000e+000, 8.81497258e-001, 9.99999953e-001])
[7]:
model.selected_var
[7]:
array([ 0.62128221, 2.27730867, 0. , 0.7912756 , 0. ,
0. , 0.90010748, 0.78141641, 0.23390958, 0. ,
-0.37827339, 0. , 0. , 0. , -0.65816492,
0. , 0. , 0. , 0. , 0. ,
0.16882814, 0. , 0. , 0. , 0.30134417,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0.55272368, 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0.56998959, 0. , -0.06266557,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , -0.17217008, 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0.21165882, 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
-0.18773541, 0.098952 , 0. , 0. , 0. ,
0. , -1.06679404, -0.14436122, 0. , 0. ,
0. , 0. , 0. , 0.09416906, 0. ,
0.08940304, 0. , 0. , 0. , 0. ])