ValueError: Input contains NaN, infinity or a value too large for dtype(‘float64‘)
问题刚开始学习 sklearn ,运行下面的代码时报错,from sklearn.feature_extraction import DictVectorizerfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizerfromsklearn.preprocessing importMinMaxScale
·
问题
刚开始学习 sklearn ,运行下面的代码时报错,
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler,StandardScaler ,Normalizer
from sklearn.impute import SimpleImputer
import numpy as np
import jieba
def im():
"""
缺失值处理
:return:
"""
im = SimpleImputer(missing_values='NaN',strategy='mean')
data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
print(data)
if __name__ == "__main__":
im()
运行报错,
ValueError: Input contains NaN, infinity or a value too large for dtype(‘float64’)。具体如下:
Traceback (most recent call last):
File "E:/pycharm_workspace/matplotlibDemo/feature.py", line 104, in <module>
im()
File "E:/pycharm_workspace/matplotlibDemo/feature.py", line 95, in im
data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
File "D:\skl3\lib\site-packages\sklearn\base.py", line 699, in fit_transform
return self.fit(X, **fit_params).transform(X)
File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 288, in fit
X = self._validate_input(X, in_fit=True)
File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 262, in _validate_input
raise ve
File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 255, in _validate_input
copy=self.copy)
File "D:\skl3\lib\site-packages\sklearn\base.py", line 421, in _validate_data
X = check_array(X, **check_params)
File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 664, in check_array
allow_nan=force_all_finite == 'allow-nan')
File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 106, in _assert_all_finite
msg_dtype if msg_dtype is not None else X.dtype)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
Input contains NaN, infinity or a value too large for dtype('float64')
表示 Input 的值包含太长了。
解决方法
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler,StandardScaler ,Normalizer
from sklearn.impute import SimpleImputer
import numpy as np
import jieba
def im():
"""
缺失值处理
:return:
"""
im = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
print(data)
if __name__ == "__main__":
im()
运行结果报错:
[[1. 2.]
[1. 3.]
[7. 6.]]
更多推荐
已为社区贡献2条内容
所有评论(0)