OneHotEncoder Convert to Json Format
🧃 scikit-learn의 OneHotEncoder 결과를 json 처리해서 보여주는 방법
>> import json
>> import numpy as np
>> from sklearn.preprocessing import OneHotEncoder
>> transformer = OneHotEncoder(handle_unknown='ignore', sparse=False)
# example of single feature
>> X = [
"StreetParking:yt_lot_1",
"StreetParking:yt_lot_2",
"StreetParking:yt_lot_3",
"StreetParking:yt_lot_1",
"StreetParking:yt_lot_2"
]
# np.array(X).reshape(-1,1)
# array([['StreetParking:yt_lot_1'],
# ['StreetParking:yt_lot_2'],
# ['StreetParking:yt_lot_3'],
# ['StreetParking:yt_lot_1'],
# ['StreetParking:yt_lot_2']])
>> changed_field = transformer.fit_transform(np.array(X).reshape(-1,1))
>> print(change_field)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.],
[1., 0., 0.],
[0., 1., 0.]])
위 모습과 같이 transform()결과는 np.array 형태
>> import decimal
>> def _float_to_str(float_type):
converted_value = decimal.Context().create_decimal(repr(float_type))
return format(converted_value, 'f')
# 리스트 내부 개별 요소의 타입 변환
>> changed_field = list(list(map(_float_to_str, i)) for i in changed_field)
# 다음과 같음.. >> list(list(map(str, i)) for i in get_X)
>> print(changed_field)
[['1.0', '0.0', '0.0'],
['0.0', '1.0', '0.0'],
['0.0', '0.0', '1.0'],
['1.0', '0.0', '0.0'],
['0.0', '1.0', '0.0']]
# 리스트 내 개별 인덱싱에 대한 str() 처리
>> list(map(lambda x: str(x), changed_field))
["['1.0', '0.0', '0.0']",
"['0.0', '1.0', '0.0']",
"['0.0', '0.0', '1.0']",
"['1.0', '0.0', '0.0']",
"['0.0', '1.0', '0.0']"]
# 결과 JSON 변환
>> json_convert = dict(zip(range(0, len(changed_field_2) + 1), changed_field_2))
>> print(json_convert)
{0: "['1.0', '0.0', '0.0']",
1: "['0.0', '1.0', '0.0']",
2: "['0.0', '0.0', '1.0']",
3: "['1.0', '0.0', '0.0']",
4: "['0.0', '1.0', '0.0']"}
>> json.dumps(json_convert)
'{"0": "[\'1.0\', \'0.0\', \'0.0\']",
"1": "[\'0.0\', \'1.0\', \'0.0\']",
"2": "[\'0.0\', \'0.0\', \'1.0\']",
"3": "[\'1.0\', \'0.0\', \'0.0\']",
"4": "[\'0.0\', \'1.0\', \'0.0\']"}'
🧊 만약 transformer 결과가 np.array가 아닌 scipy.sparse.csr.csr_matrix 인 경우
>> transformer = OneHotEncoder(handle_unknown='ignore', sparse=True)
>> transformer.fit_transform(np.array(X).reshape(-1,1))
<5x3 sparse matrix of type '<class 'numpy.float64'>'
with 5 stored elements in Compressed Sparse Row format>
>> transformer.fit_transform(np.array(X).reshape(-1,1)).toarray()