Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: raw_timestamps support for writer and defragment #320

Merged
merged 9 commits into from
Jan 24, 2024
Binary file added nptdms/test/data/raw_timestamps.tdms
Binary file not shown.
12 changes: 10 additions & 2 deletions nptdms/test/test_example_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
"""

import os
import numpy as np
from nptdms import tdms
from io import BytesIO

import numpy as np
from nptdms import tdms, TdmsWriter

DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + '/data'

Expand Down Expand Up @@ -33,6 +34,13 @@ def test_raw_format():
0.2517777])


def test_defragment_raw_timestamps():
"""Test defragmenting a file with raw timestamps"""
test_file_path = DATA_DIR + '/raw_timestamps.tdms'
output_file = BytesIO()
TdmsWriter.defragment(test_file_path, output_file, raw_timestamps=True)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to have a check that if you read in the defragmented file, there hasn't been a loss of precision of the timestamp properties

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adamreeve great idea. I've added assertions that the channel.data and channel.time_track() are the same in both the input and output data (using np.testing.assert_equal). I'm not sure if this is checking for loss of precision of the timestamps and could use your input on that.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I don't think that's enough as the time track will be cast down to a numpy datetime64. To check that this test is actually testing the fix, it should fail if we remove raw_timestamps=True from TdmsWriter.defragment.

I think what you'll want to do is pass raw_timestamps=True when opening both output_tdms and input_tdms, and then add a check like:

# verify the written timestamp matches the input
output_channel = output_tdms[group.name][channel.name]
assert output_channel.properties['wf_start_time'] == channel.properties['wf_start_time']

To make that work correctly you'll also need to implement __eq__ for TdmsTimestamp

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adamreeve BTW, I also just refactored the test_defragment to test more example files. In doing so, I noticed that one of the example files raises an error, due to scalar channel data. However, this error should probably be addressed in a separate issue and PR.



def test_big_endian_format():
"""Test reading a file that encodes data in big endian mode"""
test_file = tdms.TdmsFile(DATA_DIR + '/big_endian.tdms')
Expand Down
6 changes: 5 additions & 1 deletion nptdms/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import struct
from datetime import datetime, timedelta
import numpy as np


EPOCH = np.datetime64('1904-01-01 00:00:00', 's')

_struct_pack = struct.pack


class TdmsTimestamp(object):
""" A Timestamp from a TDMS file
Expand All @@ -18,6 +20,8 @@ class TdmsTimestamp(object):
def __init__(self, seconds, second_fractions):
self.seconds = seconds
self.second_fractions = second_fractions
self.enum_value = 0x44
self.bytes = _struct_pack('<Qq', second_fractions, seconds)
jimkring marked this conversation as resolved.
Show resolved Hide resolved

def __repr__(self):
return "TdmsTimestamp({0}, {1})".format(self.seconds, self.second_fractions)
Expand Down
7 changes: 5 additions & 2 deletions nptdms/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
from nptdms.common import toc_properties, ObjectPath
from nptdms.timestamp import TdmsTimestamp
from nptdms.types import *
from nptdms import TdmsFile

Expand All @@ -21,7 +22,7 @@ class TdmsWriter(object):
"""

@classmethod
def defragment(cls, source, destination, version=4712, index_file=False):
def defragment(cls, source, destination, version=4712, index_file=False, raw_timestamps=False):
jimkring marked this conversation as resolved.
Show resolved Hide resolved
""" Defragemnts an existing TdmsFile by loading and moving each Object to a separate channel
to stream read one consecutive part of the file for faster access.

Expand All @@ -38,7 +39,7 @@ def defragment(cls, source, destination, version=4712, index_file=False):
If ``destination`` is a readable object ``index_file`` can either be a redable object or ``False``
to store a ``.tdms_index`` file inside of the submitted object or not.
"""
file = TdmsFile(source)
file = TdmsFile(source, raw_timestamps)
with cls(destination, version=version, index_file=index_file) as new_file:
new_file.write_segment([RootObject(file.properties)])
for group in file.groups():
Expand Down Expand Up @@ -375,6 +376,8 @@ def _to_tdms_value(value):
return TimeStamp(value)
if isinstance(value, np.datetime64):
return TimeStamp(value)
if isinstance(value, TdmsTimestamp):
return value
if isinstance(value, str):
return String(value)
if isinstance(value, bytes):
Expand Down
Loading