Selecting numerical and categorical variables
The code
In [9]:
import pandas as pd
In [10]:
df = pd.read_csv("sample_dataset.csv")
In [15]:
df['area error'].value_counts()
Out[15]:
A 489 B 4 C 1 Name: area error, dtype: int64
In [16]:
df.dtypes
Out[16]:
mean radius float64 mean texture float64 mean perimeter float64 mean area float64 mean smoothness float64 mean compactness float64 mean concavity float64 mean concave points float64 mean symmetry float64 mean fractal dimension float64 radius error float64 texture error float64 perimeter error float64 area error object smoothness error float64 compactness error float64 concavity error float64 concave points error float64 symmetry error float64 fractal dimension error float64 worst radius float64 worst texture float64 worst perimeter float64 worst area float64 worst smoothness float64 worst compactness float64 worst concavity float64 worst concave points float64 worst symmetry float64 worst fractal dimension float64 target int64 dtype: object
In [19]:
categorical_variables = df.select_dtypes(include=['object','category','bool']).columns
numerical_variables = df.select_dtypes(exclude=['object','category','bool']).columns
In [21]:
categorical_variables
Out[21]:
Index(['area error'], dtype='object')
In [22]:
numerical_variables
Out[22]:
Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean concave points', 'mean symmetry', 'mean fractal dimension', 'radius error', 'texture error', 'perimeter error', 'smoothness error', 'compactness error', 'concavity error', 'concave points error', 'symmetry error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst perimeter', 'worst area', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry', 'worst fractal dimension', 'target'], dtype='object')
0 comments