core

Module containing a variety of functions to extract data from text, such as emails, phone numbers, addresses, etc. using regex.

init

setupLogger

 setupLogger (name:str='log', serialize:bool=False)

Add handler attached to <name>.log at trace level and up.

	Type	Default	Details
name	str	log	str
serialize	bool	False	bool
Returns	None

loadSerializedLog

 loadSerializedLog (log_fp:str, incl_full_record:bool=False,
                    incl_unix_ts:bool=False)

Convert serialized log to dataframe for excel export.

	Type	Default	Details
log_fp	str		str
incl_full_record	bool	False	bool
incl_unix_ts	bool	False	bool
Returns	DataFrame		pd.DataFrame

yaml_helper

 yaml_helper (fpath:str='./config.yaml', mode:str='r',
              data:Optional[dict]=None)

Helper function to read, write, append to files in yaml format. Checks for duplicate keys if reading or appending.

	Type	Default	Details
fpath	str	./config.yaml	str
mode	str	r	str - r / a / w
data	Optional	None	cannot be None if writing or appending
Returns	dict		dict - data if reading, {‘r’: 0} if writing/appending

extract emails

extract_emails

 extract_emails (t_str:str)

Extract all emails from note using regex. If multiple emails found, join w/ //.

	Type	Details
t_str	str	str
Returns	str	email address

testExtractEmails

assert extract_emails("Some text here bob@gmail.ca some text here") == "bob@gmail.ca"
assert (
    extract_emails("sdasd asd a232 very.common@example.com //")
    == "very.common@example.com"
)
assert (
    extract_emails("sdasd asd FirstName.LastName@EasierReading.org")
    == "FirstName.LastName@EasierReading.org"
)
assert extract_emails("sds extract_emails x@example.com asda ") == "x@example.com"
assert (
    extract_emails(
        "long.email-address-with-hyphens@and.subdomains.example.com asda dasd asdasd "
    )
    == "long.email-address-with-hyphens@and.subdomains.example.com"
)
assert (
    extract_emails("dasd adasd user.name+tag+sorting@example.com asd asd ")
    == "user.name+tag+sorting@example.com"
)
assert (
    extract_emails("dasd adasd user.name+tag+sorting@s.example    asd asd ")
    == "user.name+tag+sorting@s.example"
)
assert (
    extract_emails(
        "dasd bob@gmail.com adasd user.name+tag+sorting@s.example    asd asd "
        + "dasd adasd user.name+tag+sorting@example.com asd asd "
    )
    == "bob@gmail.com // user.name+tag+sorting@s.example // user.name+tag+sorting@example.com"
)

Supposedly, this is a valid email but the script won’t pick it up. Unlikely to encounter.

t_str = "admin@exampl sdasd "
extract_emails(t_str)

''

Forward slash is supposedly valid as well but the script won’t pick this up. Unlikely to encounter.

t_str = "dasd name/surname@example.com sdasd "
extract_emails(t_str)

'surname@example.com'

extract phone numbers

extract_phone1

 extract_phone1 (t_str:str)

Extract all phone numbers using xxx-xxx-xxxx pattern, or [ph:...] if alternate format (e.g., [ph:123-456-7890 ext. 8001]). If the latter pattern is found, does not check for the former.

	Type	Details
t_str	str	str
Returns	str	str

testExtractHomePhone

assert extract_phone1("dasda  123-123-1234 aasdasd ... ") == "123-123-1234"
assert (
    extract_phone1("dasda  123-123-1234 // 123-343-4521 ")
    == "123-123-1234 // 123-343-4521"
)
assert (
    extract_phone1("asdasd as [ph: 903-123-4365 ext 8001] // sadasd ")
    == "903-123-4365 ext 8001"
)
assert (
    extract_phone1("asdasd as [pH: 903-123-4365 ext 8001] // sadasd ")
    == "903-123-4365 ext 8001"
)
assert (
    extract_phone1("asdasd as 123-123-1234 // [pH: 903-123-4365 ext 8001] // sadasd ")
    == "903-123-4365 ext 8001"
)
assert (
    extract_phone1(
        "asdasd as  // [pH: 903-123-4365 ext 8001 // 123-123-1234] // sadasd "
    )
    == "903-123-4365 ext 8001 // 123-123-1234"
)

extract_phone2

 extract_phone2 (t_str:str)

Work or cell number. Put inside [cell:...]

	Type	Details
t_str	str	str
Returns	str	str

testExtractCellPhone

assert extract_phone2("asdsad 123-213-4521") == ""
assert extract_phone2("asdsad [cell: 123-213-4521  ]") == "123-213-4521"
assert extract_phone2("asdsad [CELL:  123-213-4521  ]") == "123-213-4521"
assert (
    extract_phone2("asdsad [CELL:  123-213-4521 // 999-99999999999 ]")
    == "123-213-4521 // 999-99999999999"
)

extract_phone_numbers

 extract_phone_numbers (t_str:str)

Main function to extract phone numbers from a note. To remove all phone numbers on file, use [remove phone].

	Type	Details
t_str	str	str
Returns	tuple	tuple[str, str]

extract_phone_numbers(".. [remove phone] asdsad")

('<<<blank>>>', '<<<blank>>>')

<<<blank>>> tells the update_xl module to clear existing data.

testExtractPhoneNumbers

assert extract_phone_numbers("[remove phone]") == ("<<<blank>>>", "<<<blank>>>")
assert extract_phone_numbers(
    "123 sesame st ... 123-345-3245 // [cell: 905-123-5453]"
) == ("123-345-3245", "905-123-5453")
assert extract_phone_numbers(
    "... [ph: 123-432-5643 ext 900] // [cell: 123-345-4452 ext 9000 ]"
) == ("123-432-5643 ext 900", "123-345-4452 ext 9000")

assert extract_phone_numbers("234 123-123-1234 // 234-234-2345") == (
    "123-123-1234 // 234-234-2345",
    "",
)
assert extract_phone_numbers(
    "223423434 sadasd 123-123-1234 // 234-234-2345 // [cell: 123-543-9999]"
) == ("123-123-1234 // 234-234-2345", "123-543-9999")
assert extract_phone_numbers(
    "223423434 sadasd [ph: 123-123-1234 // 234-234-2345 (POA)] 456-234-1231 // [cell: 123-543-9999]"
) == ("123-123-1234 // 234-234-2345 (POA)", "123-543-9999")
assert extract_phone_numbers("[cell:306-535-5490]") == ("", "306-535-5490")
assert extract_phone_numbers("... 306-535-5490 ...") == ("306-535-5490", "")

extract address

full_prov_name

 full_prov_name (t_str:str)

Address file uses full name, not abbreviation

	Type	Details
t_str	str	str
Returns	str	str

testFullProv

assert full_prov_name("FL") == "FL"  # if not found, returns input string
assert full_prov_name("on") == "Ontario"
assert full_prov_name("yt") == "Yukon"
assert full_prov_name("Bc") == "British Columbia"

extract_address1

 extract_address1 (t_str:str)

For Canadian addresses.

	Type	Details
t_str	str	str
Returns	tuple	tuple[str, str, str, str, str]: (street, city, abbreviated province, postal code, country)

Address can be formatted in a variety of ways. See below tests for examples.

testExtractCdnAddress

t_str = "... // c/o Jim Bob, 123 Sesame St, Toronto, ON  M5X 2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Toronto",
    "ON",
    "M5X 2D1",
    "Canada",
)
t_str = "... // c/o Jim Bob, 123 Sesame St, Toronto, ON   M5X2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Toronto",
    "ON",
    "M5X 2D1",
    "Canada",
)
t_str = "... // c/o Jim Bob, 123 Sesame St, Toronto, ON.   M5X2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Toronto",
    "ON",
    "M5X 2D1",
    "Canada",
)
t_str = "... // c/o Jim Bob, 123 Sesame St, Quebec,  QC.   M5X2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Quebec",
    "QC",
    "M5X 2D1",
    "Canada",
)
t_str = "... // c/o Jim Bob, 123 Sesame St, Quebec,  qc.   M5X2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Quebec",
    "QC",
    "M5X 2D1",
    "Canada",
)
t_str = "... // c/o Jim Bob, 123 Sesame St, Some City,  pe,   M5X2D1  // ... "
assert extract_address1(t_str) == (
    "c/o Jim Bob, 123 Sesame St",
    "Some City",
    "PE",
    "M5X 2D1",
    "Canada",
)

If the pattern indicates it is a Canadian address but the province isn’t a valid abbreviation, a warning will be raised:

t_str = "... // c/o Jim Bob, 123 Sesame St, Some City,  Manitoba,   M5X2D1  // ... "
extract_address1(t_str)

2025-07-10 16:17:49.899 | WARNING  | __main__:extract_address1:49 - Check province - s/b abbreviated; input str: ... // c/o Jim Bob, 123 Sesame St, Some City,  Manitoba,   M5X2D1  // ...

('c/o Jim Bob, 123 Sesame St', 'Some City', 'MA', 'M5X 2D1', 'Canada')

extract_address2

 extract_address2 (t_str:str)

extract_address1 only works for Canadian addresses. This function processes non-Canadian addresses.

	Type	Details
t_str	str	str
Returns	tuple	tuple[str, str, str, str, str]

testExtractIntlAddress

t_str = "[street:123 Sesame St][city:Miami][prov:FL][pc:02345][country:US]"
assert extract_address2(t_str) == ("123 Sesame St", "Miami", "FL", "02345", "US")
t_str = "[street:][city:][prov:][pc:][country:US]"
assert extract_address2(t_str) == (
    "",
    "",
    "",
    "",
    "US",
)

name updates

extract_names

 extract_names (t_str:str)

[fn:...] / [ln:...] for member. [sfn:...] / [sln:...] for spouse.

	Type	Details
t_str	str	str
Returns	tuple	tuple[str, str, str, str]

testExtractNames

t_str = "[FN:Bob][Ln: Jim] [sfn:Mary][sln:Thomas]"
assert extract_names(t_str) == ("Bob", "Jim", "Mary", "Thomas")

t_str = "[FN:Bob][sfn:Mary][sln:Thomas]"
assert extract_names(t_str) == ("Bob", "", "Mary", "Thomas")

t_str = "[fn:  Sam ]"
assert extract_names(t_str) == ("Sam", "", "", "")

deaths

check_mbr_death

 check_mbr_death (t_str:str)

*Checks for either [mdod:<dod>] or [md:<dod>] in note. <dod> should be a string that can be parsed by the pandas.to_datetime function. Preferably in the format of dd-mmm-yyyy.

If there’s proof of death, add [dc] to note.*

	Type	Details
t_str	str	str
Returns	tuple	tuple[str, str] - (dod formatted as str, proof of death)

testMbrDeath

assert check_mbr_death(" // [mdod:1-jan-2020][dc] // ") == (
    ("2020-01-01"),
    "Yes",
)
assert check_mbr_death(" // [mdod:1-jan-2020] // ") == (
    ("2020-01-01"),
    "",
)
assert check_mbr_death(" // [md:1-jan-2020][DC] // ") == (
    ("2020-01-01"),
    "Yes",
)
assert check_mbr_death(" // [md:1-feb-2023] // ") == (
    ("2023-02-01"),
    "",
)
assert check_mbr_death(" // [mdob:1-feb-2023] // ") == (
    "",
    "",
)
assert check_mbr_death(" // [dc] // ") == (
    "",
    "Yes",
)

check_spouse_death

 check_spouse_death (t_str:str)

*Checks for either [sdod:<dod>] or [sd:<dod>] in note. <dod> should be a string that can be parsed by the pandas.to_datetime function. Preferably in the format of dd-mmm-yyyy.

If there’s proof of death, add [sdc] to note.*

	Type	Details
t_str	str	str
Returns	tuple	tuple[str,str] - (dod formatted as str, proof of death)

testSpouseDeath

assert check_spouse_death(" // [sdod:1-jan-2020][sdc] // ") == (
    ("2020-01-01"),
    "Yes",
)
assert check_spouse_death(" // [sdod:1-jan-2020] // ") == (
    ("2020-01-01"),
    "",
)
assert check_spouse_death(" // [sd:1-jan-2020][sdc] // ") == (
    ("2020-01-01"),
    "Yes",
)
assert check_spouse_death(" // [sd:1-feb-2023] // ") == (
    ("2023-02-01"),
    "",
)
assert check_spouse_death(" // [sdob:1-feb-2023] // ") == (
    "",
    "",
)
assert check_spouse_death(" // [sdc] // ") == (
    "",
    "Yes",
)

get_cs

 get_cs (t_str:str)

Get preliminary values for Current Status and Date of Current Status.

	Type	Details
t_str	str	str
Returns	tuple	(current status, curr status date formatted as str)

testCurrentStatus

t_str = "[sd:1-jan-2023][md:1-feb-2024]"
assert get_cs(t_str) == ("N/A (died)", ("2024-02-01"))

t_str = "[sd:1-jan-2023][md:1-feb-2024]"
assert get_cs(t_str) == ("N/A (died)", ("2024-02-01"))

t_str = "[md:1-feb-2024]"
assert get_cs(t_str) == ("N/A (died)", ("2024-02-01"))

t_str = "[sd:1-feb-2024]"
assert get_cs(t_str) == ("N/A (died)", ("2024-02-01"))

payee status, banking

Payee status of 04 means payee is deceased
If both death notification and banking received:
- Member death notification + banking
  - no need to add banking to maintenance file
- Spouse death notification + banking
  - Pre-deceased spouse: add banking to maintenance file
  - Survivor death: no need to add banking to maintenance file

payee_status_and_banking

 payee_status_and_banking (t_str:str, validate:bool=True)

Get payee status for PIN: 00 for banking update, 01 for suspend, 04 for payee death. If 00, 2nd element of returned tuple will be new banking info.

	Type	Default	Details
t_str	str		str
validate	bool	True	check length of institution / transit numbers
Returns	tuple		tuple[str, str]: `(new status code, new banking)` // `("", "")` if no change

testPayeeStatusAndBanking

assert payee_status_and_banking("// [bank:123-12334-312312312312] asdasd ") == (
    "00",
    "123-12334-312312312312",
)
assert payee_status_and_banking("[sus]") == ("01", "")

with pytest.raises(
    Exception, match="Can't suspend payments and update banking info at the same time"
):
    payee_status_and_banking("[sus][bank:...]")

with pytest.raises(AssertionError, match="Invalid transit"):
    payee_status_and_banking("// [bank:123-132334-312312312312] asdasd ")
with pytest.raises(AssertionError, match="Invalid institution"):
    payee_status_and_banking("// [bank:23-13234-312312312312] asdasd ")

# preliminary values only
# later function determines final payee status / banking info
assert payee_status_and_banking(
    "// [sdod:1-jan-2024][bank:123-12334-312312312312]"
) == ("04", "")

assert payee_status_and_banking(
    "// [mdod:1-jan-2024][bank:123-12334-312312312312]"
) == ("04", "")

assert payee_status_and_banking("[mdod:1-jan-2020]") == ("04", "")
assert payee_status_and_banking("[sdod:1-jan-2020]") == ("04", "")

assert payee_status_and_banking(
    "// [sdod:1-jan-2020][bank:123-12334-312312312312]"
) == ("04", "")
assert payee_status_and_banking(
    "// [mdod:1-jan-2020][bank:123-12334-312312312312]"
) == ("04", "")

DOB

dob_updates

 dob_updates (t_str:str)

Update DOB in both address file and PIN using the following patterns: [mdob:<dob>], [sdob:<sdob>]

	Type	Details
t_str	str	str
Returns	tuple	tuple[str,str] - (mdob, sdob)

testDobUpdates

t_str = "[mdob:1-jan-1960]"
assert dob_updates(t_str) == ("1960-01-01", "")
t_str = "[sdob:1-jan-1960]"
assert dob_updates(t_str) == ("", "1960-01-01")
t_str = "[mdob:28-feb-1954][sdob:1-jan-1960]"
assert dob_updates(t_str) == (
    "1954-02-28",
    "1960-01-01",
)