Encoding error in pygpc/io.py #7

loganirado · 2025-01-09T14:00:47Z

Hi,

I'm trying to use pygpc to analize a custom differential equation model with two parameters and an output.

I'm running in the following error:
UnicodeEncodeError: 'ascii' codec can't encode character '\xe4' in position 71: ordinal not in range(128)

In the following script (error line is inclosed in **):

`def write_arr_to_hdf5(fn_hdf5, arr_name, data, overwrite_arr=True, verbose=False):
"""
Takes an array and adds it to an .hdf5 file

If data is list of dict, write_dict_to_hdf5() is called for each dict with adapted hdf5-folder name
Otherwise, data is casted to np.ndarray and dtype of unicode data casted to '|S'.

Parameters
----------
fn_hdf5 : str
    Filename of .hdf5 file
arr_name : str
    Complete path in .hdf5 file with array name
data : ndarray, list or dict
    Data to write
overwrite_arr : bool, optional, default: True
    Overwrite existing array
verbose : bool, optional, default: False
    Print information
"""
max_recursion_depth = 12

# dict or OrderedDict
if isinstance(data, dict) or isinstance(data, OrderedDict):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data,
                           folder=arr_name,
                           verbose=verbose)
        return

# list of dictionaries:
elif isinstance(data, list) and len(data) > 0 and (isinstance(data[0], dict) or is_instance(data[0])):
    t, dt = get_dtype(data)

    # do not save uuids in hdf5
    if dt == "uuid.UUID":
        return

    else:
        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        for idx, lst in enumerate(data):
            if len(arr_name.split("/")) >= max_recursion_depth:
                lst = np.array("None")

            write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                               data=lst,
                               folder=arr_name+"/"+str(idx),
                               verbose=verbose)
        return

# object
elif is_instance(data):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        t, dt = get_dtype(data)

        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data.__dict__,
                           folder=arr_name,
                           verbose=verbose)
        return

# list or tuple
elif type(data) is list or type(data) is tuple:
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array(["None"])

    t, dt = get_dtype(data)

    # do not save uuids in hdf5
    if dt == "uuid.UUID":
        return

    else:
        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        data_dict = dict()

        for idx, lst in enumerate(data):
            data_dict[idx] = lst

        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data_dict,
                           folder=arr_name,
                           verbose=verbose)

        return

elif not isinstance(data, np.ndarray):
    if len(arr_name.split("/")) >= max_recursion_depth:
        data = np.array("None")
    else:
        data = np.array(data)

# np.arrays of np.arrays
elif data.dtype == 'O' and len(data) > 1:
    if len(arr_name.split("/")) >= max_recursion_depth:
        return
    else:
        t, dt = get_dtype(data)

        # create group and set type and dtype attributes
        with h5py.File(fn_hdf5, "a") as f:
            f.create_group(str(arr_name))
            f[str(arr_name)].attrs.__setitem__("type", t)
            f[str(arr_name)].attrs.__setitem__("dtype", dt)

        data = data.tolist()
        write_dict_to_hdf5(fn_hdf5=fn_hdf5,
                           data=data,
                           folder=arr_name,
                           verbose=verbose)
        return

# do some type casting from numpy/pd -> h5py
# date column from experiment.csv is O
# plotsetting["view"] is O list of list of different length
# coil1 and coil2 columns names from experiment.csv is <U8
# coil_mean column name from experiment.csv is <U12
if data.dtype == 'O' or data.dtype.kind == 'U':
# if data.dtype == 'O':
    # data = np.array([s.encode('utf-8') for s in data])
    # pdb.set_trace()
    print('\n')
    print(data)
    print('\n')
    **data = (data.astype('|S')) # UnicodeEncodeError: 'ascii' codec can't encode character '\xe4' in position 71: ordinal not in range(128)**

    if verbose:
        print("Converting array " + arr_name + " to string")

t, dt = get_dtype(data)

with h5py.File(fn_hdf5, 'a') as f:
    # create data_set
    if overwrite_arr:
        try:
            del f[arr_name]
        except KeyError:
            pass

    f.create_dataset(arr_name, data=data)
    f[str(arr_name)].attrs.__setitem__("type", t)
    f[str(arr_name)].attrs.__setitem__("dtype", dt)

return`

Any idea about what is going wrong?

Thanks in advance!

The text was updated successfully, but these errors were encountered:

loganirado · 2025-01-09T14:18:42Z

The error was caused by a "ä" character in the folder name. I renamed the folder to fix it, bout would be interesting to know how to make the script accept "weird letters".

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Encoding error in pygpc/io.py #7

Encoding error in pygpc/io.py #7

loganirado commented Jan 9, 2025

loganirado commented Jan 9, 2025

Encoding error in pygpc/io.py #7

Encoding error in pygpc/io.py #7

Comments

loganirado commented Jan 9, 2025

loganirado commented Jan 9, 2025