Getting Started

Data load

[1]:
import pandas as pd
X = pd.read_csv('simulation_data_x.csv')
y = pd.read_csv('simulation_data_y.csv')
[2]:
X.head()
[2]:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 ... V91 V92 V93 V94 V95 V96 V97 V98 V99 V100
0 0.101741 0.144909 0.235567 0.576186 0.299443 0.296395 0.902235 0.265811 0.420927 0.684045 ... -0.058677 0.497420 -1.124986 0.338215 -0.942943 -1.257044 -0.531471 1.236317 0.405682 0.387636
1 -0.105054 -0.110128 -0.033311 -0.042925 -0.752605 -0.794815 -1.699739 -1.891533 -1.287547 -1.154547 ... -1.614948 -1.337878 0.795742 1.101117 -0.920702 -0.098002 -0.269719 0.333092 -0.500367 1.340876
2 -0.478922 -0.058475 -0.620625 1.775435 0.935760 1.268173 -2.652118 -2.327299 -2.913926 -2.140256 ... 0.081620 0.999657 -0.594758 0.057804 -1.259650 0.321864 0.992930 0.552269 1.253700 0.974151
3 -0.657057 -0.508105 -0.556453 0.057045 -0.344814 -0.557824 -0.814844 0.016355 -0.384234 -0.022224 ... 0.077500 -1.489750 -0.151010 0.347814 2.281268 -1.275026 -0.141539 -0.335557 0.196004 -1.347782
4 -0.939275 -0.780565 -0.495687 -0.308105 -0.604543 -0.272711 0.080763 0.542582 0.580669 0.162638 ... 0.677662 0.188278 0.716616 -1.290398 -0.579556 -0.692827 -1.040820 -0.674525 1.355343 1.754870

5 rows × 100 columns

[3]:
y.head()
[3]:
V1
0 2.811113
1 1.049249
2 -4.496389
3 -3.846408
4 -2.805357

General Usage

[4]:
from Hi_LASSO_pyspark import HiLASSO_Spark

model = HiLASSO_Spark(X, y, alpha=0.05, q1='auto', q2='auto', L=30, cv=5, node='auto', logistic=False)
model.fit()
C:\Users\Seungha\anaconda3\lib\site-packages\sklearn\externals\joblib\__init__.py:15: FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.
  warnings.warn(msg, category=FutureWarning)
C:\Users\Seungha\anaconda3\lib\site-packages\sklearn\externals\six.py:31: FutureWarning: The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).
  "(https://pypi.org/project/six/).", FutureWarning)
Procedure_1_fin.
Procedure_2_fin.
[4]:
<Hi_LASSO_spark.HiLASSO_Spark at 0x280ac36e308>
[5]:
model.coef_
[5]:
array([ 6.21282205e-01,  2.27730867e+00, -2.12828920e-01,  7.91275602e-01,
        9.36945797e-02, -6.15373475e-02,  9.00107483e-01,  7.81416406e-01,
        2.33909585e-01, -2.91365004e-03, -3.78273386e-01,  0.00000000e+00,
       -2.41606493e-02,  0.00000000e+00, -6.58164921e-01,  1.54145412e-03,
       -1.16992273e-02,  2.51639371e-02,  0.00000000e+00,  7.55799788e-03,
        1.68828138e-01, -3.05823959e-02,  0.00000000e+00,  1.73225751e-03,
        3.01344168e-01,  0.00000000e+00,  1.91514369e-02,  3.97503818e-02,
        0.00000000e+00,  4.43804365e-02,  0.00000000e+00,  0.00000000e+00,
        4.32576398e-02,  5.52723676e-01,  0.00000000e+00,  0.00000000e+00,
        4.03342118e-04, -2.10036305e-03,  0.00000000e+00,  0.00000000e+00,
        6.95194714e-02,  0.00000000e+00,  5.69989592e-01,  0.00000000e+00,
       -6.26655745e-02,  2.29603115e-02, -1.44772894e-02,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -3.37721796e-02,
       -1.72170081e-01,  0.00000000e+00,  5.22259759e-03,  0.00000000e+00,
        1.57643093e-02,  1.84508844e-02, -3.35463900e-02,  8.92966775e-02,
        0.00000000e+00,  0.00000000e+00,  1.16838784e-03,  0.00000000e+00,
       -5.10785040e-03, -8.20826615e-04,  1.37965408e-02, -7.84592615e-03,
        4.05342662e-04, -8.13447460e-04,  1.62454783e-03,  2.11658823e-01,
        1.37239634e-02,  0.00000000e+00,  1.96548381e-02,  3.97642011e-04,
        3.98597583e-02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -1.87735414e-01,  9.89520027e-02,  9.42610997e-02,  0.00000000e+00,
       -4.74286582e-03,  3.51989385e-03, -1.06679404e+00, -1.44361223e-01,
        0.00000000e+00,  0.00000000e+00,  1.13453582e-03,  0.00000000e+00,
        0.00000000e+00,  9.41690572e-02,  0.00000000e+00,  8.94030396e-02,
        0.00000000e+00,  0.00000000e+00, -5.98969457e-02,  1.38721768e-02])
[6]:
model.p_values
[6]:
array([4.62571924e-063, 5.33340116e-216, 5.58692822e-002, 7.50873638e-129,
       5.41813130e-001, 9.94065083e-001, 7.50873638e-129, 6.68678693e-094,
       4.69898115e-011, 9.99999953e-001, 3.47337427e-096, 1.00000000e+000,
       9.99559305e-001, 1.00000000e+000, 1.77097129e-119, 1.00000000e+000,
       9.99999988e-001, 9.99985528e-001, 1.00000000e+000, 1.00000000e+000,
       2.63765514e-007, 9.99910617e-001, 1.00000000e+000, 1.00000000e+000,
       3.22315280e-033, 1.00000000e+000, 9.99998180e-001, 9.98226547e-001,
       1.00000000e+000, 7.93160337e-001, 1.00000000e+000, 1.00000000e+000,
       4.72204837e-001, 4.95865112e-102, 1.00000000e+000, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       7.69851716e-002, 1.00000000e+000, 4.14070187e-122, 1.00000000e+000,
       9.94240213e-006, 9.99796163e-001, 9.99962949e-001, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 9.59297160e-001,
       1.20662551e-010, 1.00000000e+000, 9.99999988e-001, 1.00000000e+000,
       9.99985528e-001, 9.99910617e-001, 9.99559305e-001, 1.20674890e-003,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 4.77949646e-023,
       9.99994693e-001, 1.00000000e+000, 9.99796163e-001, 1.00000000e+000,
       4.72204837e-001, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       1.55932686e-032, 2.46381754e-006, 2.00854891e-003, 1.00000000e+000,
       1.00000000e+000, 1.00000000e+000, 2.61910668e-202, 1.81491953e-022,
       1.00000000e+000, 1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
       1.00000000e+000, 1.28372860e-004, 1.00000000e+000, 4.40268352e-009,
       1.00000000e+000, 1.00000000e+000, 8.81497258e-001, 9.99999953e-001])
[7]:
model.selected_var
[7]:
array([ 0.62128221,  2.27730867,  0.        ,  0.7912756 ,  0.        ,
        0.        ,  0.90010748,  0.78141641,  0.23390958,  0.        ,
       -0.37827339,  0.        ,  0.        ,  0.        , -0.65816492,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.16882814,  0.        ,  0.        ,  0.        ,  0.30134417,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.55272368,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.56998959,  0.        , -0.06266557,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.17217008,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.21165882,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.18773541,  0.098952  ,  0.        ,  0.        ,  0.        ,
        0.        , -1.06679404, -0.14436122,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.09416906,  0.        ,
        0.08940304,  0.        ,  0.        ,  0.        ,  0.        ])