Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encoding error in pygpc/io.py #7

Open
loganirado opened this issue Jan 9, 2025 · 1 comment
Open

Encoding error in pygpc/io.py #7

loganirado opened this issue Jan 9, 2025 · 1 comment

Comments

@loganirado
Copy link

Hi,

I'm trying to use pygpc to analize a custom differential equation model with two parameters and an output.

I'm running in the following error:
UnicodeEncodeError: 'ascii' codec can't encode character '\xe4' in position 71: ordinal not in range(128)

In the following script (error line is inclosed in **):

`def write_arr_to_hdf5(fn_hdf5, arr_name, data, overwrite_arr=True, verbose=False):
"""
Takes an array and adds it to an .hdf5 file

If data is list of dict, write_dict_to_hdf5() is called for each dict with adapted hdf5-folder name
Otherwise, data is casted to np.ndarray and dtype of unicode data casted to '|S'.

Parameters
----------
fn_hdf5 : str
    Filename of .hdf5 file
arr_name : str
    Complete path in .hdf5 file with array name
data : ndarray, list or dict
    Data to write
overwrite_arr : bool, optional, default: True
    Overwrite existing array
verbose : bool, optional, default: False
    Print information
"""
max_recursion_depth = 12

# dict or OrderedDict
if isinstance(data, dict) or isinstance(data, OrderedDict):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data,
                           folder=arr_name,
                           verbose=verbose)
        return

# list of dictionaries:
elif isinstance(data, list) and len(data) > 0 and (isinstance(data[0], dict) or is_instance(data[0])):
    t, dt = get_dtype(data)

    # do not save uuids in hdf5
    if dt == "uuid.UUID":
        return

    else:
        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        for idx, lst in enumerate(data):
            if len(arr_name.split("/")) >= max_recursion_depth:
                lst = np.array("None")

            write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                               data=lst,
                               folder=arr_name+"/"+str(idx),
                               verbose=verbose)
        return

# object
elif is_instance(data):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        t, dt = get_dtype(data)

        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data.__dict__,
                           folder=arr_name,
                           verbose=verbose)
        return

# list or tuple
elif type(data) is list or type(data) is tuple:
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array(["None"])

    t, dt = get_dtype(data)

    # do not save uuids in hdf5
    if dt == "uuid.UUID":
        return

    else:
        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        data_dict = dict()

        for idx, lst in enumerate(data):
            data_dict[idx] = lst

        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data_dict,
                           folder=arr_name,
                           verbose=verbose)

        return

elif not isinstance(data, np.ndarray):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        data = np.array(data)

# np.arrays of np.arrays
elif data.dtype == 'O' and len(data) > 1:
    if len(arr_name.split("/")) >= max_recursion_depth:
        return
    else:
        t, dt = get_dtype(data)

        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        data = data.tolist()
        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data,
                           folder=arr_name,
                           verbose=verbose)
        return

# do some type casting from numpy/pd -> h5py
# date column from experiment.csv is O
# plotsetting["view"] is O list of list of different length
# coil1 and coil2 columns names from experiment.csv is <U8
# coil_mean column name from experiment.csv is <U12
if data.dtype == 'O' or data.dtype.kind == 'U':
# if data.dtype == 'O':
    # data = np.array([s.encode('utf-8') for s in data])
    # pdb.set_trace()
    print('\n')
    print(data)
    print('\n')
    **data = (data.astype('|S')) # UnicodeEncodeError: 'ascii' codec can't encode character '\xe4' in position 71: ordinal not in range(128)**

    if verbose:
        print("Converting array " + arr_name + " to string")

t, dt = get_dtype(data)

with h5py.File(fn_hdf5, 'a') as f:
    # create data_set
    if overwrite_arr:
        try:
            del f[arr_name]
        except KeyError:
            pass

    f.create_dataset(arr_name, data=data)
    f[str(arr_name)].attrs.__setitem__("type", t)
    f[str(arr_name)].attrs.__setitem__("dtype", dt)

return`

Any idea about what is going wrong?

Thanks in advance!

@loganirado
Copy link
Author

The error was caused by a "ä" character in the folder name. I renamed the folder to fix it, bout would be interesting to know how to make the script accept "weird letters".

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant