Skip to content

Commit

Permalink
Make block-sizes overridable
Browse files Browse the repository at this point in the history
  • Loading branch information
ashenoy463 committed May 3, 2024
1 parent 5d0e741 commit 5ff9a35
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions mdx/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def __init__(
# End user methods

def read_trajectory(
self, data_path: os.PathLike = None, atomic_format: str = "frame"
self,
data_path: os.PathLike = None,
atomic_format: str = "frame",
blocksize: str = None,
) -> None:
"""
Read trajectory files, parse and store in class attribute
Expand All @@ -90,7 +93,7 @@ def read_trajectory(
db.read_text(
self.__get_data_files(data_path, "trajectory"),
linedelimiter="TIMESTEP",
blocksize=self.block,
blocksize=f"{self.block if blocksize is None else blocksize}",
)
.remove(lambda x: x == "ITEM: TIMESTEP")
.map(lambda x: x.split("ITEM: "))
Expand All @@ -101,7 +104,7 @@ def read_trajectory(

self.trajectory = corpus.compute() if self.eager else corpus

def read_bonds(self, data_path: os.PathLike = None) -> None:
def read_bonds(self, data_path: os.PathLike = None, blocksize: str = None) -> None:
"""
Read bond files, parse and store in class attribute
Expand All @@ -112,7 +115,7 @@ def read_bonds(self, data_path: os.PathLike = None) -> None:
db.read_text(
self.__get_data_files(data_path, "bonds"),
linedelimiter="# Timestep",
blocksize=self.block,
blocksize=f"{self.block if blocksize is None else blocksize}",
)
.remove(lambda x: x == "# Timestep")
.map(
Expand All @@ -129,7 +132,9 @@ def read_bonds(self, data_path: os.PathLike = None) -> None:

self.bonds = corpus.compute() if self.eager else corpus

def read_species(self, data_path: os.PathLike = None) -> None:
def read_species(
self, data_path: os.PathLike = None, blocksize: str = None
) -> None:
"""
Read species files, parse and store species data in class attribute
Expand All @@ -138,7 +143,9 @@ def read_species(self, data_path: os.PathLike = None) -> None:
"""
corpus = (
db.read_text(
self.__get_data_files(data_path, "species"), linedelimiter="# Timestep"
self.__get_data_files(data_path, "species"),
linedelimiter="# Timestep",
blocksize=f"{self.block if blocksize is None else blocksize}",
)
.map(lambda x: x[1:].split("\n")[:-1])
.remove(lambda x: x == [])
Expand Down Expand Up @@ -275,15 +282,10 @@ def __process_species_step(self, step_text: str):
header, data = list(map(lambda x: x.split(), step_text))

frame["timestep"] = int(data.pop(0))

print(header)
print(data)
frame["no_moles"] = int(data[0])
frame["no_species"] = int(data[1])

for specie, amount in zip(header[2:], data[2:]):
if frame["timestep"] == 2000:
print(specie, amount, "\n")
frame["species"][specie] = int(amount)

return frame
Expand Down

0 comments on commit 5ff9a35

Please sign in to comment.