OneHotEncoder Convert to Json Format

🧃 scikit-learn의 OneHotEncoder 결과를 json 처리해서 보여주는 방법

>> import json
>> import numpy as np
>> from sklearn.preprocessing import OneHotEncoder

>> transformer = OneHotEncoder(handle_unknown='ignore', sparse=False)

# example of single feature
>> X = [
        "StreetParking:yt_lot_1",
        "StreetParking:yt_lot_2",
        "StreetParking:yt_lot_3",
        "StreetParking:yt_lot_1",
        "StreetParking:yt_lot_2"
        ]  
  
# np.array(X).reshape(-1,1)
# array([['StreetParking:yt_lot_1'],
#       ['StreetParking:yt_lot_2'],
#       ['StreetParking:yt_lot_3'],
#       ['StreetParking:yt_lot_1'],
#       ['StreetParking:yt_lot_2']])

>> changed_field = transformer.fit_transform(np.array(X).reshape(-1,1))
>> print(change_field)
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.]])

위 모습과 같이 transform()결과는 np.array 형태

>> import decimal
>> def _float_to_str(float_type):
      converted_value = decimal.Context().create_decimal(repr(float_type))
      return format(converted_value, 'f')

# 리스트 내부 개별 요소의 타입 변환
>> changed_field = list(list(map(_float_to_str, i)) for i in changed_field)
# 다음과 같음.. >> list(list(map(str, i)) for i in get_X)
>> print(changed_field)

[['1.0', '0.0', '0.0'], 
 ['0.0', '1.0', '0.0'], 
 ['0.0', '0.0', '1.0'],
 ['1.0', '0.0', '0.0'], 
 ['0.0', '1.0', '0.0']]

# 리스트 내 개별 인덱싱에 대한 str() 처리 
>> list(map(lambda x: str(x), changed_field))

["['1.0', '0.0', '0.0']", 
 "['0.0', '1.0', '0.0']", 
 "['0.0', '0.0', '1.0']", 
 "['1.0', '0.0', '0.0']",
 "['0.0', '1.0', '0.0']"] 

# 결과 JSON 변환
>> json_convert = dict(zip(range(0, len(changed_field_2) + 1), changed_field_2))
>> print(json_convert)

{0: "['1.0', '0.0', '0.0']", 
 1: "['0.0', '1.0', '0.0']",
 2: "['0.0', '0.0', '1.0']", 
 3: "['1.0', '0.0', '0.0']", 
 4: "['0.0', '1.0', '0.0']"}

>> json.dumps(json_convert)

'{"0": "[\'1.0\', \'0.0\', \'0.0\']", 
  "1": "[\'0.0\', \'1.0\', \'0.0\']", 
  "2": "[\'0.0\', \'0.0\', \'1.0\']", 
  "3": "[\'1.0\', \'0.0\', \'0.0\']", 
  "4": "[\'0.0\', \'1.0\', \'0.0\']"}'

🧊 만약 transformer 결과가 np.array가 아닌 scipy.sparse.csr.csr_matrix 인 경우

>> transformer = OneHotEncoder(handle_unknown='ignore', sparse=True)
>> transformer.fit_transform(np.array(X).reshape(-1,1))

<5x3 sparse matrix of type '<class 'numpy.float64'>'
        with 5 stored elements in Compressed Sparse Row format>
        
>> transformer.fit_transform(np.array(X).reshape(-1,1)).toarray()