问题

刚开始学习 sklearn ,运行下面的代码时报错,

from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from  sklearn.preprocessing import  MinMaxScaler,StandardScaler ,Normalizer
from sklearn.impute import SimpleImputer
import  numpy as np

import jieba

def im():
    """
    缺失值处理
    :return:
    """
    im  = SimpleImputer(missing_values='NaN',strategy='mean')
    data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
    print(data)

if __name__ == "__main__":   
    im()

运行报错,

ValueError: Input contains NaN, infinity or a value too large for dtype(‘float64’)。具体如下:

Traceback (most recent call last):
  File "E:/pycharm_workspace/matplotlibDemo/feature.py", line 104, in <module>
    im()
  File "E:/pycharm_workspace/matplotlibDemo/feature.py", line 95, in im
    data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
  File "D:\skl3\lib\site-packages\sklearn\base.py", line 699, in fit_transform
    return self.fit(X, **fit_params).transform(X)
  File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 288, in fit
    X = self._validate_input(X, in_fit=True)
  File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 262, in _validate_input
    raise ve
  File "D:\skl3\lib\site-packages\sklearn\impute\_base.py", line 255, in _validate_input
    copy=self.copy)
  File "D:\skl3\lib\site-packages\sklearn\base.py", line 421, in _validate_data
    X = check_array(X, **check_params)
  File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 664, in check_array
    allow_nan=force_all_finite == 'allow-nan')
  File "D:\skl3\lib\site-packages\sklearn\utils\validation.py", line 106, in _assert_all_finite
    msg_dtype if msg_dtype is not None else X.dtype)
    
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

Input contains NaN, infinity or a value too large for dtype('float64') 表示 Input 的值包含太长了。

解决方法

from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from  sklearn.preprocessing import  MinMaxScaler,StandardScaler ,Normalizer
from sklearn.impute import SimpleImputer
import  numpy as np

import jieba

def im():
    """
    缺失值处理
    :return:
    """
    im  = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
    data = im.fit_transform([[1,2],[np.nan,3],[7,6]])
    print(data)

if __name__ == "__main__":   
    im()

运行结果报错:

[[1. 2.]
 [1. 3.]
 [7. 6.]]
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐