-
Notifications
You must be signed in to change notification settings - Fork 18
/
queries.py
426 lines (308 loc) · 14 KB
/
queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
"""Constructs for different kinds of queries and managers."""
import ast
import operator
from datetime import date
from functools import reduce
from itertools import chain
from django.db import models
from django.db.models import F, Q
from lark import Lark, Transformer, v_args
from studies.fields import CONDITIONS, LANGUAGES
CONST_MAPPING = {"true": True, "false": False, "null": None}
GENDER_MAPPING = {"male": "m", "female": "f", "other": "o"}
CONDITION_FIELDS = {condition_tuple[0] for condition_tuple in CONDITIONS}
LANGUAGE_FIELDS = {f"speaks_{language_tuple[0]}" for language_tuple in LANGUAGES}
QUERY_GRAMMAR = """
?start: bool_expr
?bool_expr: bool_term ("OR" bool_term)*
?bool_term: bool_factor ("AND" bool_factor)*
?bool_factor: not_bool_factor
| "(" bool_expr ")"
| relation_expr
?relation_expr: gender_comparison
| gestational_age_comparison
| age_in_days_comparison
| language_comparison
| condition_comparison
| language_count_comparison
not_bool_factor: "NOT" bool_factor
gender_comparison: "gender" (EQ | NE) gender_target
// 24 to 40 weeks
gestational_age_comparison: "gestational_age_in_weeks" comparator GESTATIONAL_AGE_AS_WEEKS
age_in_days_comparison: "age_in_days" comparator INT
language_count_comparison: ("n_languages" | "num_languages") comparator INT
comparator: EQ | NE | LT | LTE | GT | GTE
gender_target: MALE | FEMALE | OTHER_GENDER | UNSPECIFIED
language_comparison: LANGUAGE_TARGET
condition_comparison: CONDITION_TARGET
// TERMINALS
LANGUAGE_TARGET: {language_targets}
CONDITION_TARGET: {condition_targets}
GESTATIONAL_AGE_AS_WEEKS: /(2[4-9]|3[0-9]|40)/i | UNSPECIFIED
EQ: "="
NE: "!="
LT: "<"
LTE: "<="
GT: ">"
GTE: ">="
TRUE: "true"i
FALSE: "false"i
NULL: "null"i
MALE: "male"i | "m"i
FEMALE: "female"i | "f"i
OTHER_GENDER: "other"i | "o"i
UNSPECIFIED: "na"i | "n/a"i
%import common.INT
%import common.WS
%ignore WS
""".format(
language_targets=" | ".join([f'"{target}"' for target in LANGUAGE_FIELDS]),
condition_targets=" | ".join([f'"{target}"' for target in CONDITION_FIELDS]),
)
QUERY_DSL_PARSER = Lark(QUERY_GRAMMAR, parser="earley")
def age_range_eligibility_for_study(child_age_range, study) -> bool:
study_start, study_end = study_age_range(study)
child_start = child_age_range[0] * 365
child_end = child_age_range[-1] * 365
return study_start <= child_end and study_end >= child_start
def get_child_eligibility_for_study(child_obj, study_obj):
return (
get_child_participation_eligibility(child_obj, study_obj)
and _child_in_age_range_for_study(child_obj, study_obj)
and get_child_eligibility(child_obj, study_obj.criteria_expression)
)
def get_child_participation_eligibility(child, study) -> bool:
"""Check if child's participation in other studies changes their eligibility.
Args:
child (Child): Child model object
study (Study): Study model object
Returns:
bool: Return true if child is eligible based on their prior study participation
"""
ember_frame_player_id = 1
must_have = True
must_not = True
# for both must have and must not have participated, ignore responses from internal studies that are empty
if study.must_have_participated.exists():
must_have_count = (
child.responses.filter(study__in=study.must_have_participated.all())
.exclude(study__study_type_id=ember_frame_player_id, sequence=[])
.distinct()
.values_list("study")
.count()
)
must_have = must_have_count == study.must_have_participated.count()
if not must_have:
return False
if study.must_not_have_participated.exists():
must_not_have_count = (
child.responses.filter(study__in=study.must_not_have_participated.all())
.exclude(study__study_type_id=ember_frame_player_id, sequence=[])
.distinct()
.values_list("study")
.count()
)
must_not = must_not_have_count == 0
return must_not
def _child_in_age_range_for_study(child, study):
"""Check if child in age range for study, using same age calculations as in study detail and response data."""
if not child.birthday:
return False
age_in_days_outside_of_range = child_in_age_range_for_study_days_difference(
child, study
)
return age_in_days_outside_of_range == 0
def child_in_age_range_for_study_days_difference(child, study):
"""Check if child in age range for study, using same age calculations as in study detail and response data.
Args:
child (Child): Child model object
study (Study): Study model object
Returns:
int: the difference (in days) between the child's age and and the study's min or max age (in days).
0 if the child's age is within the study's age range.
Negative int if the child is too young (days below the minimum)
Positive int if the child is too old (days above the maximum)
"""
if not child.birthday:
return None
# Similar to _child_in_age_range_for_study, but we want to know whether the child is too young/old, rather than just a boolean.
# Age ranges are defined in DAYS, using shorthand of year = 365 days, month = 30 days,
# to provide a reliable actual unit of time rather than calendar "months" and "years" which vary in duration.
# See logic used in web/studies/study-detail.html to display eligibility to participant,
# help-text provided to researchers in studies/templates/studies/_study_fields.html,
# and documentation for researchers at
# https://lookit.readthedocs.io/en/develop/researchers-set-study-fields.html#minimum-and-maximum-age-cutoffs
min_age_in_days_estimate, max_age_in_days_estimate = study_age_range(study)
age_in_days = (date.today() - child.birthday).days
if age_in_days <= min_age_in_days_estimate:
return age_in_days - min_age_in_days_estimate
elif age_in_days >= max_age_in_days_estimate:
return age_in_days - max_age_in_days_estimate
else:
return 0
def study_age_range(study):
min_age_in_days_estimate = (
(study.min_age_years * 365) + (study.min_age_months * 30) + study.min_age_days
)
max_age_in_days_estimate = (
(study.max_age_years * 365) + (study.max_age_months * 30) + study.max_age_days
)
return min_age_in_days_estimate, max_age_in_days_estimate
def get_child_eligibility(child_obj, criteria_expr):
if criteria_expr:
compiled_tester_func = compile_expression(criteria_expr)
expanded_child = _get_expanded_child(child_obj)
return bool(compiled_tester_func(expanded_child))
else:
return True
def compile_expression(boolean_algebra_expression: str):
"""Compiles a boolean algebra expression into a python function.
Args:
boolean_algebra_expression: a string boolean algebra expression.
Returns:
A function.
Raises:
lark.exceptions.ParseError: in case we cannot parse the boolean algebra.
"""
if boolean_algebra_expression:
parse_tree = QUERY_DSL_PARSER.parse(boolean_algebra_expression)
func_body = FunctionTransformer().transform(parse_tree)
else:
func_body = "True"
func_text = " ".join(["def property_tester(child_obj): return", func_body])
code_object = ast.parse(func_text, mode="exec")
new_func = compile(code_object, filename="temp.py", mode="exec")
temp_namespace = {}
exec(new_func, temp_namespace)
return temp_namespace["property_tester"]
def _get_expanded_child(child_object):
"""Expands a child object such that it can be evaluated easily.
The output of this method should be such that _compile_expression
can evaluate it; i.e. all keys are first-level.
Args:
child_object: a accounts.models.Child instance.
Returns:
A dict representing the child.
"""
expanded_child = _to_dict(child_object)
# 1) Change birthday to age in days.
age_delta = date.today() - expanded_child.pop("birthday")
expanded_child["age_in_days"] = age_delta.days
# 2) Expand existing conditions in-place.
expanded_conditions = dict(expanded_child.pop("existing_conditions").items())
expanded_child.update(expanded_conditions)
# 3) Expand languages in place.
expanded_languages = {
f"speaks_{langcode}": boolean
for langcode, boolean in expanded_child.pop("languages_spoken").items()
}
expanded_child.update(expanded_languages)
ga_enum = expanded_child.pop("gestational_age_at_birth")
gestational_age_in_weeks = _gestational_age_enum_value_to_weeks(ga_enum)
expanded_child["gestational_age_in_weeks"] = gestational_age_in_weeks
return expanded_child
def _to_dict(model_instance):
"""Better version of django.forms.models.model_to_dict.
Args:
model_instance: A django model instance.
Returns:
A dictionary formed from a model instance.
"""
opts = model_instance._meta
data = {}
for f in chain(opts.concrete_fields, opts.private_fields):
data[f.name] = f.value_from_object(model_instance)
return data
def _gestational_age_enum_value_to_weeks(enum_value: int):
"""Convert enum value on child object to actual # of weeks.
This enables us to directly query the expanded child object with a
scalar value. 0 == "under 24 weeks"; 17 = "Over 40 weeks". To see
enumerated values, please reference studies/fields.py.
"""
return min(max(23, enum_value + 23), 40) if enum_value else None
@v_args(inline=True)
class FunctionTransformer(Transformer):
def bool_expr(self, bool_term, *others):
or_clauses = " ".join(f"or {other}" for other in others)
return f"({bool_term} {or_clauses})"
def bool_term(self, bool_factor, *others):
and_clauses = " ".join(f"and {other}" for other in others)
return f"({bool_factor} {and_clauses})"
def gender_comparison(self, comparator, target_gender):
return f"child_obj.get('gender') {'==' if comparator == '=' else comparator} {target_gender}"
def gestational_age_comparison(self, comparator, num_weeks):
"""False if no_answer is provided."""
if num_weeks.lower() in ("na", "n/a"):
# TODO: enhance validation layer so that a non-equals comparator will provide a sensible
# error message.
return f"child_obj.get('gestational_age_in_weeks') {comparator} None"
else:
return (
f"child_obj.get('gestational_age_in_weeks') {comparator} {num_weeks} "
"if child_obj.get('gestational_age_in_weeks') else False"
)
def age_in_days_comparison(self, comparator, num_days):
return f"child_obj.get('age_in_days') {comparator} {num_days}"
def language_comparison(self, lang_target):
return f"child_obj.get('{lang_target}', False)"
def condition_comparison(self, condition_target):
return f"child_obj.get('{condition_target}', False)"
def language_count_comparison(self, comparator, num_langs):
return (
f"len({{k: v for k, v in child_obj.items() if k.startswith('speaks_') and v}}) "
f"{comparator} {num_langs}"
)
def gender_target(self, gender):
gender = gender.lower()
return f"'{GENDER_MAPPING.get(gender, gender)}'"
def comparator(self, relation):
return "==" if relation == "=" else relation
def not_bool_factor(self, bool_factor):
return f"not {bool_factor}"
class BitfieldQuerySet(models.QuerySet):
"""A QuerySet that can handle bitwise queries intelligently.
The trick is in constructs like this: F(field_name) +
F(field_name).bitand(reduce(operator.or_, bitmasks, 0)) which might
produce a SQL query like so:
WHERE ...
"accounts_child"."existing_conditions" <
(("accounts_child"."existing_conditions" + (1 * ("accounts_child"."existing_conditions" & 12))))
This is a "bit hack" that relies on the fact that a bit state ANDed with a mask will give us a result that
is greater than zero if ~any~ of the bits match between the mask and the state. So what we are saying is,
"give me rows from this table where my_field is less than my_field + (my_field AND some_mask). This will only
ever be true if there are matching set bits between my_field and some_mask.
For has_one_of, we take all the bits we care about and OR them into a single mask (e.g., 01101)
For has_all_of, we split the individual bits we care about (e.g. 01000, 00100, 00001 - only powers of 2 in decimal)
and split them across AND filters in the where clause of our SQL query.
"""
def has_one_of(self, field_name: str, bitmasks: list):
"""Check to see that field_name has at least one of the bits in
bitmasks.
Args:
field_name: The field which we will be querying against - usually a BigInt
bitmasks: the list of integers which will serve as bitmasks
Returns:
A filtered queryset.
"""
filter_dict = {
f"{field_name}__gt": 0,
# field value contains one of supplied field bits
f"{field_name}__lt": F(field_name)
+ F(field_name).bitand(reduce(operator.or_, bitmasks, 0)),
}
return self.filter(**filter_dict)
def has_all_of(self, field_name: str, bitmasks: list):
"""Check to see that field_name has all of the bits in bitmasks.
Args:
field_name: The field which we will be querying against - usually a BigInt
bitmasks: the list of integers which will serve as bitmasks
Returns:
A filtered queryset.
"""
def make_query_dict(specific_mask):
return {
f"{field_name}__lt": F(field_name) + F(field_name).bitand(specific_mask)
}
has_each = map(lambda c: Q(**make_query_dict(c)), bitmasks)
filter_query = reduce(operator.and_, has_each, Q(**{f"{field_name}__gt": 0}))
return self.filter(filter_query)