在Python3中使用管道获取对RFE的支持和排名属性 [英] Get support and ranking attributes for RFE using Pipeline in Python 3

查看:0
本文介绍了在Python3中使用管道获取对RFE的支持和排名属性的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

到目前为止,我拥有的代码如下所示,它工作得很好。但是,我要为每个测试的功能打印以下RFE属性:";rfe.Support_[i]";&rfe.ranking_[i]";和所选功能的名称。i";引用索引后,第一个属性返回True或False(如果选择了列),第二个属性返回它们各自的排名。

换句话说,我想打印每个RFE中考虑的列,并且它们不会保留为抽象的东西。

# Explore the number of selected features for RFE
from numpy import mean
from numpy import std
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Get the dataset
def get_dataset(df, target):
    X, y = df.drop(columns = target), df[[target]].values.flatten()
    return X, y

# Get a list of models to evaluate
def get_models(list_num_cols, list_cat_cols):
    num_transformer = Pipeline(steps = [('num_imputer', SimpleImputer(strategy = 'median'))])
    cat_transformer = Pipeline(steps = [('cat_imputer', SimpleImputer(strategy = 'most_frequent')),
                                        ('one-hot-encoder', OneHotEncoder())])
    preprocessor = ColumnTransformer(transformers = [('num', num_transformer, list_num_cols),
                                                     ('cat', cat_transformer, list_cat_cols)])
    models = dict()    
    for i in range(2, 4):
        rfe_dtr = RFE(estimator = DecisionTreeRegressor(), n_features_to_select = i)
        model_dtr = DecisionTreeRegressor()
        models['DecisionTreeRegressor_' + str(i)] = Pipeline(steps = [('preprocessor', preprocessor),
                                                                     ('s_dtr', rfe_dtr), 
                                                                     ('m_dtr', model_dtr)])
    return models

# Evaluate a give model using cross-validation
def evaluate_model(model, X, y):
    cv = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 7)
    scores = cross_val_score(model, X, y, scoring = 'neg_mean_absolute_error', cv = cv, 
                             n_jobs = -1, error_score = 'raise')
    return scores


# Define the dataset
X, y = get_dataset(my_df, 'my_target')   # It begins here
# Get the models to evaluate
models = get_models(X.select_dtypes(include = 'number').columns.tolist(), 
                    X.select_dtypes(include = 'object').columns.tolist())
# Evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
    scores = evaluate_model(model, X, y)
    results.append(scores)
    names.append(name)
    print('%s %.3f (%.3f)' % (name, mean(scores), std(scores)))

以下返回错误:

models['DecisionTreeRegressor_2'].named_steps['s_dtr'].support_[0] # Returns: AttributeError: 'RFE' object has no attribute 'support_'
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].ranking_[0] # Returns: AttributeError: 'RFE' object has no attribute 'ranking_'

推荐答案

我回答了问题。我把它贴出来,以防它能帮上什么人。它包括使用";cross_valify";,而不是";cross_val_core";,以及选项";Return_Estiator=True";,以便能够检索不同Fold和RFE中的管线,并通过索引访问它们。然后您可以使用";Named_Steps";。

# Explore the number of selected features for RFE
from numpy import mean
from numpy import std
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Get the dataset
def get_dataset(df, target):
    X, y = df.drop(columns = target), df[[target]].values.flatten()
    return X, y

# Get a list of models to evaluate
def get_models(list_num_cols, list_cat_cols):
    num_transformer = Pipeline(steps = [('num_imputer', SimpleImputer(strategy = 'median'))])
    cat_transformer = Pipeline(steps = [('cat_imputer', SimpleImputer(strategy = 'most_frequent')),
                                        ('one-hot-encoder', OneHotEncoder())])
    preprocessor = ColumnTransformer(transformers = [('num', num_transformer, list_num_cols),
                                                     ('cat', cat_transformer, list_cat_cols)])
    models = dict()    
    for i in range(2, 4):
        rfe_dtr = RFE(estimator = DecisionTreeRegressor(), n_features_to_select = i)
        model_dtr = DecisionTreeRegressor()
        models['DecisionTreeRegressor_' + str(i)] = Pipeline(steps = [('preprocessor', preprocessor),
                                                                     ('s_dtr', rfe_dtr), 
                                                                     ('m_dtr', model_dtr)])
    return models

# Evaluate a give model using cross-validation
def evaluate_model(model, X, y):
    cv = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 7)
    output = cross_validate(model, X, y, scoring = 'neg_mean_absolute_error', cv = cv, 
                             n_jobs = -1, error_score = 'raise', return_estimator = True)
    return output


# Define the dataset
X, y = get_dataset(my_df, 'my_target')   # It begins here
# Get the models to evaluate
models = get_models(X.select_dtypes(include = 'number').columns.tolist(), 
                    X.select_dtypes(include = 'object').columns.tolist())
# Evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
    output = evaluate_model(model, X, y)
    results.append(output['test_score'])
    names.append(name)
    print('%s %.3f (%.3f)' % (name, mean(output['test_score']), std(output['test_score'])))
    print(output)  
    print(output['estimator'][0].named_steps['s_dtr'].support_)
    print(output['estimator'][0].named_steps['s_dtr'].ranking_)
    print(output['estimator'][0].named_steps['s_dtr'].support_[2])
    print(output['estimator'][0].named_steps['s_dtr'].ranking_[2])

这篇关于在Python3中使用管道获取对RFE的支持和排名属性的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆