Skip to content

Commit

Permalink
Merge pull request #275 from OpenCOMPES/add_column_filter
Browse files Browse the repository at this point in the history
add filter function in processor
  • Loading branch information
steinnymir authored Nov 18, 2023
2 parents efc7c83 + ab577f5 commit 7768f11
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 2 deletions.
41 changes: 41 additions & 0 deletions sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sed.calibrator import MomentumCorrector
from sed.core.config import parse_config
from sed.core.config import save_config
from sed.core.dfops import apply_filter
from sed.core.dfops import apply_jitter
from sed.core.metadata import MetaHandler
from sed.diagnostics import grid_histogram
Expand Down Expand Up @@ -402,6 +403,46 @@ def load(
duplicate_policy="merge",
)

def filter_column(
self,
column: str,
min_value: float = -np.inf,
max_value: float = np.inf,
) -> None:
"""Filter values in a column which are outside of a given range
Args:
column (str): Name of the column to filter
min_value (float, optional): Minimum value to keep. Defaults to None.
max_value (float, optional): Maximum value to keep. Defaults to None.
"""
if column not in self._dataframe.columns:
raise KeyError(f"Column {column} not found in dataframe!")
if min_value >= max_value:
raise ValueError("min_value has to be smaller than max_value!")
if self._dataframe is not None:
self._dataframe = apply_filter(
self._dataframe,
col=column,
lower_bound=min_value,
upper_bound=max_value,
)
if self._timed_dataframe is not None and column in self._timed_dataframe.columns:
self._timed_dataframe = apply_filter(
self._timed_dataframe,
column,
lower_bound=min_value,
upper_bound=max_value,
)
metadata = {
"filter": {
"column": column,
"min_value": min_value,
"max_value": max_value,
},
}
self._attributes.add(metadata, "filter", duplicate_policy="merge")

# Momentum calibration workflow
# 1. Bin raw detector data for distortion correction
def bin_and_load_momentum_calibration(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_dfops.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def test_drop_column():
def test_apply_filter():
"""Test function to filter a df by a column with upper/lower bounds."""
colname = "posx"
lower_bound = -0.5
upper_bound = 0.5
lower_bound = -0.1
upper_bound = 0.1
df_filtered = apply_filter(
df,
col=colname,
Expand Down
25 changes: 25 additions & 0 deletions tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,31 @@ def test_delay_calibration_workflow():
assert "delay" in processor.dataframe.columns


def test_filter_column():
"""Test the jittering function"""
config = parse_config(
config={"core": {"loader": "mpes"}},
folder_config={},
user_config={},
system_config={},
)
processor = SedProcessor(
folder=df_folder,
config=config,
folder_config={},
user_config={},
system_config={},
)
low, high = np.quantile(processor.dataframe["X"].compute(), [0.1, 0.9])
processor.filter_column("X", low, high)
assert processor.dataframe["X"].compute().min() >= low
assert processor.dataframe["X"].compute().max() <= high
with pytest.raises(KeyError):
processor.filter_column("wrong", low, high)
with pytest.raises(ValueError):
processor.filter_column("X", high, low)


def test_add_jitter():
"""Test the jittering function"""
config = parse_config(
Expand Down

0 comments on commit 7768f11

Please sign in to comment.