lookit-api/accounts/queries.py at develop · lookit/lookit-api

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

"""Constructs for different kinds of queries and managers."""

import ast

import operator

from datetime import date

from functools import reduce

from itertools import chain

from django.db import models

from django.db.models import F, Q

from lark import Lark, Transformer, v_args

from studies.fields import CONDITIONS, LANGUAGES

CONST_MAPPING = {"true": True, "false": False, "null": None}

GENDER_MAPPING = {"male": "m", "female": "f", "other": "o"}

CONDITION_FIELDS = {condition_tuple[0] for condition_tuple in CONDITIONS}

LANGUAGE_FIELDS = {f"speaks_{language_tuple[0]}" for language_tuple in LANGUAGES}

QUERY_GRAMMAR = """

?start: bool_expr

?bool_expr: bool_term ("OR" bool_term)*

?bool_term: bool_factor ("AND" bool_factor)*

?bool_factor: not_bool_factor

| "(" bool_expr ")"

| relation_expr

?relation_expr: gender_comparison

| gestational_age_comparison

| age_in_days_comparison

| language_comparison

| condition_comparison

| language_count_comparison

not_bool_factor: "NOT" bool_factor

gender_comparison: "gender" (EQ | NE) gender_target

// 24 to 40 weeks

gestational_age_comparison: "gestational_age_in_weeks" comparator GESTATIONAL_AGE_AS_WEEKS

age_in_days_comparison: "age_in_days" comparator INT

language_count_comparison: ("n_languages" | "num_languages") comparator INT

comparator: EQ | NE | LT | LTE | GT | GTE

gender_target: MALE | FEMALE | OTHER_GENDER | UNSPECIFIED

language_comparison: LANGUAGE_TARGET

condition_comparison: CONDITION_TARGET

// TERMINALS

LANGUAGE_TARGET: {language_targets}

CONDITION_TARGET: {condition_targets}

GESTATIONAL_AGE_AS_WEEKS: /(2[4-9]|3[0-9]|40)/i | UNSPECIFIED

EQ: "="

NE: "!="

LT: "<"

LTE: "<="

GT: ">"

GTE: ">="

TRUE: "true"i

FALSE: "false"i

NULL: "null"i

MALE: "male"i | "m"i

FEMALE: "female"i | "f"i

OTHER_GENDER: "other"i | "o"i

UNSPECIFIED: "na"i | "n/a"i

%import common.INT

%import common.WS

%ignore WS

""".format(

language_targets=" | ".join([f'"{target}"' for target in LANGUAGE_FIELDS]),

condition_targets=" | ".join([f'"{target}"' for target in CONDITION_FIELDS]),

)

QUERY_DSL_PARSER = Lark(QUERY_GRAMMAR, parser="earley")

def age_range_eligibility_for_study(child_age_range, study) -> bool:

study_start, study_end = study_age_range(study)

child_start = child_age_range[0] * 365

child_end = child_age_range[-1] * 365

return study_start <= child_end and study_end >= child_start

def get_child_eligibility_for_study(child_obj, study_obj):

return (

get_child_participation_eligibility(child_obj, study_obj)

and _child_in_age_range_for_study(child_obj, study_obj)

and get_child_eligibility(child_obj, study_obj.criteria_expression)

)

def get_child_participation_eligibility(child, study) -> bool:

"""Check if child's participation in other studies changes their eligibility.

Args:

child (Child): Child model object

study (Study): Study model object

Returns:

bool: Return true if child is eligible based on their prior study participation

"""

ember_frame_player_id = 1

must_have = True

must_not = True

# for both must have and must not have participated, ignore responses from internal studies that are empty

if study.must_have_participated.exists():

must_have_count = (

child.responses.filter(study__in=study.must_have_participated.all())

.exclude(study__study_type_id=ember_frame_player_id, sequence=[])

.distinct()

.values_list("study")

.count()

)

must_have = must_have_count == study.must_have_participated.count()

if not must_have:

return False

if study.must_not_have_participated.exists():

must_not_have_count = (

child.responses.filter(study__in=study.must_not_have_participated.all())

.exclude(study__study_type_id=ember_frame_player_id, sequence=[])

.distinct()

.values_list("study")

.count()

)

must_not = must_not_have_count == 0

return must_not

def _child_in_age_range_for_study(child, study):

"""Check if child in age range for study, using same age calculations as in study detail and response data."""

if not child.birthday:

return False

age_in_days_outside_of_range = child_in_age_range_for_study_days_difference(

child, study

)

return age_in_days_outside_of_range == 0

def child_in_age_range_for_study_days_difference(child, study):

"""Check if child in age range for study, using same age calculations as in study detail and response data.

Args:

child (Child): Child model object

study (Study): Study model object

Returns:

int: the difference (in days) between the child's age and and the study's min or max age (in days).

0 if the child's age is within the study's age range.

Negative int if the child is too young (days below the minimum)

Positive int if the child is too old (days above the maximum)

"""

if not child.birthday:

return None

# Similar to _child_in_age_range_for_study, but we want to know whether the child is too young/old, rather than just a boolean.

# Age ranges are defined in DAYS, using shorthand of year = 365 days, month = 30 days,

# to provide a reliable actual unit of time rather than calendar "months" and "years" which vary in duration.

# See logic used in web/studies/study-detail.html to display eligibility to participant,

# help-text provided to researchers in studies/templates/studies/_study_fields.html,

# and documentation for researchers at

# https://lookit.readthedocs.io/en/develop/researchers-set-study-fields.html#minimum-and-maximum-age-cutoffs

min_age_in_days_estimate, max_age_in_days_estimate = study_age_range(study)

age_in_days = (date.today() - child.birthday).days

if age_in_days <= min_age_in_days_estimate:

return age_in_days - min_age_in_days_estimate

elif age_in_days >= max_age_in_days_estimate:

return age_in_days - max_age_in_days_estimate

else:

return 0

def study_age_range(study):

min_age_in_days_estimate = (

(study.min_age_years * 365) + (study.min_age_months * 30) + study.min_age_days

)

max_age_in_days_estimate = (

(study.max_age_years * 365) + (study.max_age_months * 30) + study.max_age_days

)

return min_age_in_days_estimate, max_age_in_days_estimate

def get_child_eligibility(child_obj, criteria_expr):

if criteria_expr:

compiled_tester_func = compile_expression(criteria_expr)

expanded_child = _get_expanded_child(child_obj)

return bool(compiled_tester_func(expanded_child))

else:

return True

def compile_expression(boolean_algebra_expression: str):

"""Compiles a boolean algebra expression into a python function.

Args:

boolean_algebra_expression: a string boolean algebra expression.

Returns:

A function.

Raises:

lark.exceptions.ParseError: in case we cannot parse the boolean algebra.

"""

if boolean_algebra_expression:

parse_tree = QUERY_DSL_PARSER.parse(boolean_algebra_expression)

func_body = FunctionTransformer().transform(parse_tree)

else:

func_body = "True"

func_text = " ".join(["def property_tester(child_obj): return", func_body])

code_object = ast.parse(func_text, mode="exec")

new_func = compile(code_object, filename="temp.py", mode="exec")

temp_namespace = {}

exec(new_func, temp_namespace)

return temp_namespace["property_tester"]

def _get_expanded_child(child_object):

"""Expands a child object such that it can be evaluated easily.

The output of this method should be such that _compile_expression

can evaluate it; i.e. all keys are first-level.

Args:

child_object: a accounts.models.Child instance.

Returns:

A dict representing the child.

"""

expanded_child = _to_dict(child_object)

# 1) Change birthday to age in days.

age_delta = date.today() - expanded_child.pop("birthday")

expanded_child["age_in_days"] = age_delta.days

# 2) Expand existing conditions in-place.

expanded_conditions = dict(expanded_child.pop("existing_conditions").items())

expanded_child.update(expanded_conditions)

# 3) Expand languages in place.

expanded_languages = {

f"speaks_{langcode}": boolean

for langcode, boolean in expanded_child.pop("languages_spoken").items()

}

expanded_child.update(expanded_languages)

ga_enum = expanded_child.pop("gestational_age_at_birth")

gestational_age_in_weeks = _gestational_age_enum_value_to_weeks(ga_enum)

expanded_child["gestational_age_in_weeks"] = gestational_age_in_weeks

return expanded_child

def _to_dict(model_instance):

"""Better version of django.forms.models.model_to_dict.

Args:

model_instance: A django model instance.

Returns:

A dictionary formed from a model instance.

"""

opts = model_instance._meta

data = {}

for f in chain(opts.concrete_fields, opts.private_fields):

data[f.name] = f.value_from_object(model_instance)

return data

def _gestational_age_enum_value_to_weeks(enum_value: int):

"""Convert enum value on child object to actual # of weeks.

This enables us to directly query the expanded child object with a

scalar value. 0 == "under 24 weeks"; 17 = "Over 40 weeks". To see

enumerated values, please reference studies/fields.py.

"""

return min(max(23, enum_value + 23), 40) if enum_value else None

@v_args(inline=True)

class FunctionTransformer(Transformer):

def bool_expr(self, bool_term, *others):

or_clauses = " ".join(f"or {other}" for other in others)

return f"({bool_term} {or_clauses})"

def bool_term(self, bool_factor, *others):

and_clauses = " ".join(f"and {other}" for other in others)

return f"({bool_factor} {and_clauses})"

def gender_comparison(self, comparator, target_gender):

return f"child_obj.get('gender') {'==' if comparator == '=' else comparator} {target_gender}"

def gestational_age_comparison(self, comparator, num_weeks):

"""False if no_answer is provided."""

if num_weeks.lower() in ("na", "n/a"):

# TODO: enhance validation layer so that a non-equals comparator will provide a sensible

# error message.

return f"child_obj.get('gestational_age_in_weeks') {comparator} None"

else:

return (

f"child_obj.get('gestational_age_in_weeks') {comparator} {num_weeks} "

"if child_obj.get('gestational_age_in_weeks') else False"

)

def age_in_days_comparison(self, comparator, num_days):

return f"child_obj.get('age_in_days') {comparator} {num_days}"

def language_comparison(self, lang_target):

return f"child_obj.get('{lang_target}', False)"

def condition_comparison(self, condition_target):

return f"child_obj.get('{condition_target}', False)"

def language_count_comparison(self, comparator, num_langs):

return (

f"len({{k: v for k, v in child_obj.items() if k.startswith('speaks_') and v}}) "

f"{comparator} {num_langs}"

)

def gender_target(self, gender):

gender = gender.lower()

return f"'{GENDER_MAPPING.get(gender, gender)}'"

def comparator(self, relation):

return "==" if relation == "=" else relation

def not_bool_factor(self, bool_factor):

return f"not {bool_factor}"

class BitfieldQuerySet(models.QuerySet):

"""A QuerySet that can handle bitwise queries intelligently.

The trick is in constructs like this: F(field_name) +

F(field_name).bitand(reduce(operator.or_, bitmasks, 0)) which might

produce a SQL query like so:

WHERE ...

"accounts_child"."existing_conditions" <

(("accounts_child"."existing_conditions" + (1 * ("accounts_child"."existing_conditions" & 12))))

This is a "bit hack" that relies on the fact that a bit state ANDed with a mask will give us a result that

is greater than zero if ~any~ of the bits match between the mask and the state. So what we are saying is,

"give me rows from this table where my_field is less than my_field + (my_field AND some_mask). This will only

ever be true if there are matching set bits between my_field and some_mask.

For has_one_of, we take all the bits we care about and OR them into a single mask (e.g., 01101)

For has_all_of, we split the individual bits we care about (e.g. 01000, 00100, 00001 - only powers of 2 in decimal)

and split them across AND filters in the where clause of our SQL query.

"""

def has_one_of(self, field_name: str, bitmasks: list):

"""Check to see that field_name has at least one of the bits in

bitmasks.

Args:

field_name: The field which we will be querying against - usually a BigInt

bitmasks: the list of integers which will serve as bitmasks

Returns:

A filtered queryset.

"""

filter_dict = {

f"{field_name}__gt": 0,

# field value contains one of supplied field bits

f"{field_name}__lt": F(field_name)

+ F(field_name).bitand(reduce(operator.or_, bitmasks, 0)),

}

return self.filter(**filter_dict)

def has_all_of(self, field_name: str, bitmasks: list):

"""Check to see that field_name has all of the bits in bitmasks.

Args:

field_name: The field which we will be querying against - usually a BigInt

bitmasks: the list of integers which will serve as bitmasks

Returns:

A filtered queryset.

"""

def make_query_dict(specific_mask):

return {

f"{field_name}__lt": F(field_name) + F(field_name).bitand(specific_mask)

}

has_each = map(lambda c: Q(**make_query_dict(c)), bitmasks)

filter_query = reduce(operator.and_, has_each, Q(**{f"{field_name}__gt": 0}))

return self.filter(filter_query)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

queries.py

queries.py

Files

queries.py

Latest commit

History

queries.py

File metadata and controls