Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support as_int on bool series #2359

Merged
merged 6 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/includes/generated_docs/language__series.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ status = status_code.map_values(
```
</div>

<div class="attr-heading" id="BoolPatientSeries.as_int">
<tt><strong>as_int</strong>()</tt>
<a class="headerlink" href="#BoolPatientSeries.as_int" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Return each value in this Boolean series as 1 (True) or 0 (False).
</div>

</div>


Expand Down Expand Up @@ -243,6 +251,14 @@ status = status_code.map_values(
```
</div>

<div class="attr-heading" id="BoolEventSeries.as_int">
<tt><strong>as_int</strong>()</tt>
<a class="headerlink" href="#BoolEventSeries.as_int" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Return each value in this Boolean series as 1 (True) or 0 (False).
</div>

<div class="attr-heading" id="BoolEventSeries.count_distinct_for_patient">
<tt><strong>count_distinct_for_patient</strong>()</tt>
<a class="headerlink" href="#BoolEventSeries.count_distinct_for_patient" title="Permanent link">🔗</a>
Expand Down
29 changes: 29 additions & 0 deletions docs/includes/generated_docs/specs.md
Original file line number Diff line number Diff line change
Expand Up @@ -2558,6 +2558,33 @@ returns the following patient series:



### 7.2 Convert a boolean value to an integer


#### 7.2.1 Bool as int
Booleans are converted to 0 (False) or 1 (True).

This example makes use of a patient-level table named `p` containing the following data:

| patient|b1 |
| - | - |
| 1|T |
| 2| |
| 3|F |

```python
p.b1.as_int()
```
returns the following patient series:

| patient | value |
| - | - |
| 1|1 |
| 2| |
| 3|0 |



## 8 Operations on integer series


Expand Down Expand Up @@ -3030,6 +3057,8 @@ returns the following patient series:


#### 11.1.3 Case with boolean column
Note that individual boolean columns can be converted to the integers 0 and 1 using
the `as_int()` method.

This example makes use of a patient-level table named `p` containing the following data:

Expand Down
12 changes: 2 additions & 10 deletions ehrql/measures/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ehrql.measures.measures import get_all_group_by_columns
from ehrql.query_model.column_specs import ColumnSpec, get_column_spec_from_series
from ehrql.query_model.nodes import Case, Dataset, Function, Value, get_series_type
from ehrql.query_model.nodes import Dataset, Function, Value, get_series_type
from ehrql.query_model.transforms import substitute_parameters


Expand Down Expand Up @@ -207,15 +207,7 @@ def series_as_int(series):
if series_type is int:
return series
elif series_type is bool:
# TODO: This is definitely not the most efficient way to do this. We should
# extend the `CastToInt` operation to apply to boolean as well.
return Case(
{
Function.EQ(series, Value(True)): Value(1),
Function.EQ(series, Value(False)): Value(0),
},
default=None,
)
return Function.CastToInt(series)
else:
assert False

Expand Down
15 changes: 8 additions & 7 deletions ehrql/query_engines/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,14 @@ def date_difference_in_days(self, end, start):
)

def cast_to_int(self, value):
# Trino's casting to int rounds away from zero. We need to round towards zero for
# consistency with other query engines.
rounded_towards_zero = sqlalchemy.case(
(value > 0, SQLFunction("FLOOR", value)),
else_=SQLFunction("CEILING", value),
)
return sqlalchemy.cast(rounded_towards_zero, sqlalchemy.Integer)
if isinstance(value.type, sqlalchemy.Numeric):
# Trino's casting to int rounds away from zero. We need to round towards zero for
# consistency with other query engines.
value = sqlalchemy.case(
(value > 0, SQLFunction("FLOOR", value)),
else_=SQLFunction("CEILING", value),
)
return sqlalchemy.cast(value, sqlalchemy.Integer)

def truedivide(self, lhs, rhs):
rhs_null_if_zero = SQLFunction("NULLIF", rhs, 0.0, type_=sqlalchemy.Float)
Expand Down
10 changes: 10 additions & 0 deletions ehrql/query_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,16 @@ def __invert__(self: T) -> T:
"""
return _apply(qm.Function.Not, self)

@overload
def as_int(self: "PatientSeries") -> "IntPatientSeries": ...
@overload
def as_int(self: "EventSeries") -> "IntEventSeries": ...
def as_int(self):
"""
Return each value in this Boolean series as 1 (True) or 0 (False).
"""
return _apply(qm.Function.CastToInt, self)


class BoolPatientSeries(BoolFunctions, PatientSeries):
_type = bool
Expand Down
2 changes: 1 addition & 1 deletion ehrql/query_model/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ class FloorDivide(Series[int]):

# Casting numeric types
class CastToInt(Series[int]):
source: Series[Numeric]
source: Series[Numeric] | Series[bool]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could save eight whole characters by writing this as:

Series[Numeric | bool]

Eight, Becky! What were you thinking?


class CastToFloat(Series[float]):
source: Series[Numeric]
Expand Down
25 changes: 25 additions & 0 deletions tests/spec/bool_series_ops/test_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from ..tables import p


title = "Convert a boolean value to an integer"

table_data = {
p: """
| b1
--+----
1 | T
2 |
3 | F
""",
}


def test_bool_as_int(spec_test):
"""
Booleans are converted to 0 (False) or 1 (True).
"""
spec_test(
table_data,
p.b1.as_int(),
{1: 1, 2: None, 3: 0},
)
4 changes: 4 additions & 0 deletions tests/spec/case_expressions/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def test_case_with_default(spec_test):


def test_case_with_boolean_column(spec_test):
"""
Note that individual boolean columns can be converted to the integers 0 and 1 using
the `as_int()` method.
"""
table_data = {
p: """
| i1 | b1
Expand Down
1 change: 1 addition & 0 deletions tests/spec/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
],
"bool_series_ops": [
"test_logical_ops",
"test_conversion",
],
"int_series_ops": [
"test_arithmetic_ops",
Expand Down
Loading