Testing some tidbits

Wednesday 4 December 2024

I posted a Python tidbit about checking if a string consists entirely of zeros and ones:

Python expressions checking if a string is only zeros and ones

I got a bunch of replies suggesting other ways. I wanted to post those, but I also wanted to check if they were right. A classic testing structure would have required putting them all in functions, etc, which I didn’t want to bother with.

So I cobbled together a test harness for them (also in a gist if you want):

GOOD = [
    "",
    "0",
    "1",
    "000000000000000000",
    "111111111111111111",
    "101000100011110101010000101010101001001010101",
]

BAD = [
    "x",
    "nedbat",
    "x000000000000000000000000000000000000",
    "111111111111111111111111111111111111x",
    "".join(chr(i) for i in range(10000)),
]

TESTS = """
    # The original checks
    all(c in "01" for c in s)
    set(s).issubset({"0", "1"})
    set(s) <= {"0", "1"}
    re.fullmatch(r"[01]*", s)
    s.strip("01") == ""
    not s.strip("01")

    # Using min/max
    "0" <= min(s or "0") <= max(s or "1") <= "1"
    not s or (min(s) in "01" and max(s) in "01")
    ((ss := sorted(s or "0")) and ss[0] in "01" and ss[-1] in "01")

    # Using counting
    s.count("0") + s.count("1") == len(s)
    (not (ctr := Counter(s)) or (ctr["0"] + ctr["1"] == len(s)))

    # Using numeric tests
    all(97*c - c*c > 2351 for c in s.encode())
    max((abs(ord(c) - 48.5) for c in "0"+s)) < 1
    all(map(lambda x: (ord(x) ^ 48) < 2, s))

    # Removing all the 0 and 1
    re.sub(r"[01]", "", s) == ""
    len((s).translate(str.maketrans("", "", "01"))) == 0
    len((s).replace("0", "").replace("1", "")) == 0
    "".join(("1".join((s).split("0"))).split("1")) == ""

    # A few more for good measure
    set(s + "01") == set("01")
    not (set(s) - set("01"))
    not any(filter(lambda x: x not in {"0", "1"}, s))
    all(map(lambda x: x in "01", s))
"""

import re
from collections import Counter
from inspect import cleandoc

g = {
    "re": re,
    "Counter": Counter,
}

for test in cleandoc(TESTS).splitlines():
    test = test.partition("#")[0]
    if not test:
        continue
    for ss, expected in [(GOOD, True), (BAD, False)]:
        for s in ss:
            result = eval(test, {"s": s} | g)
            if bool(result) != expected:
                print("OOPS:")
                print(f"   {s = }")
                print(f"   {test}")
                print(f"   {expected = }")

It’s a good thing I did this because a few of the suggestions needed adjusting, especially for dealing with the empty string. But now they all work, and are checked!

More Python expressions checking if a string is only zeros and ones

BTW, if you prefer Mastodon to BlueSky, the posts are there too: first and second.

Also BTW: Brian Okken adapted these tests to pytest, showing some interesting pytest techniques.

Comments

[gravatar]

Here’s some timings from running the test cases 100 times:

 4.46 ms: not s.strip("01")
 4.98 ms: s.strip("01") == ""
 6.38 ms: re.fullmatch(r"[01]*", s)
 8.41 ms: s.count("0") + s.count("1") == len(s)
10.25 ms: len((s).replace("0", "").replace("1", "")) == 0
10.63 ms: all(map(lambda x: x in "01", s))
11.06 ms: "".join(("1".join((s).split("0"))).split("1")) == ""
12.94 ms: re.sub(r"[01]", "", s) == ""
13.08 ms: not any(filter(lambda x: x not in {"0", "1"}, s))
13.39 ms: all(map(lambda x: (ord(x) ^ 48) < 2, s))
14.16 ms: all(97*c - c*c > 2351 for c in s.encode())
14.95 ms: all(c in "01" for c in s)
26.20 ms: not s or (min(s) in "01" and max(s) in "01")
26.51 ms: "0" <= min(s or "0") <= max(s or "1") <= "1"
30.43 ms: ((ss := sorted(s or "0")) and ss[0] in "01" and ss[-1] in "01")
43.01 ms: set(s) <= {"0", "1"}
43.44 ms: set(s + "01") == set("01")
46.81 ms: set(s).issubset({"0", "1"})
58.14 ms: (not (ctr := Counter(s)) or (ctr["0"] + ctr["1"] == len(s)))
60.78 ms: not (set(s) - set("01"))
72.19 ms: len((s).translate(str.maketrans("", "", "01"))) == 0
84.00 ms: max((abs(ord(c) - 48.5) for c in "0"+s)) < 1

And the modified code:

GOOD = [
    "",
    "0",
    "1",
    "000000000000000000",
    "111111111111111111",
    "101000100011110101010000101010101001001010101",
]

BAD = [
    "x",
    "nedbat",
    "x000000000000000000000000000000000000",
    "111111111111111111111111111111111111x",
    "".join(chr(i) for i in range(10000)),
]

TESTS = """
    # The original checks
    all(c in "01" for c in s)
    set(s).issubset({"0", "1"})
    set(s) <= {"0", "1"}
    re.fullmatch(r"[01]*", s)
    s.strip("01") == ""
    not s.strip("01")
    # Using min/max
    "0" <= min(s or "0") <= max(s or "1") <= "1"
    not s or (min(s) in "01" and max(s) in "01")
    ((ss := sorted(s or "0")) and ss[0] in "01" and ss[-1] in "01")
    # Using counting
    s.count("0") + s.count("1") == len(s)
    (not (ctr := Counter(s)) or (ctr["0"] + ctr["1"] == len(s)))
    # Using numeric tests
    all(97*c - c*c > 2351 for c in s.encode())
    max((abs(ord(c) - 48.5) for c in "0"+s)) < 1
    all(map(lambda x: (ord(x) ^ 48) < 2, s))
    # Removing all the 0 and 1
    re.sub(r"[01]", "", s) == ""
    len((s).translate(str.maketrans("", "", "01"))) == 0
    len((s).replace("0", "").replace("1", "")) == 0
    "".join(("1".join((s).split("0"))).split("1")) == ""
    # A few more for good measure
    set(s + "01") == set("01")
    not (set(s) - set("01"))
    not any(filter(lambda x: x not in {"0", "1"}, s))
    all(map(lambda x: x in "01", s))
"""

import re
import sys
import time
from collections import Counter
from inspect import cleandoc

g = {
    "re": re,
    "Counter": Counter,
}

times = []
for test in cleandoc(TESTS).splitlines():
    test = test.partition("#")[0]
    if not test:
        continue
    before = time.time()
    for ss, expected in 100 * [(GOOD, True), (BAD, False)]:
        for s in ss:
            result = eval(test, {"s": s} | g)
            if bool(result) != expected:
                print("OOPS:")
                print(f"   {s = }")
                print(f"   {test}")
                print(f"   {expected = }")
    after = time.time()
    milliseconds = (after - before) * 1000
    times.append((milliseconds, test))

print("Python", sys.version)
print()
print(
    "\n".join(
        f"{milliseconds:>5.2f} ms: {test}" for milliseconds, test in sorted(times)
    )
)
print()
[gravatar]

This is fun, and I learned a few basic Python tricks.
But I also wanted to try to get all of these tests to run with pytest. :)

Here’s my approach to testing these methods with pytest:
Testing some tidbits with pytest

[gravatar]
Ruud van der Ham 4:10 PM on 20 Dec 2024

I think the results of the benchmark (even relative) are highly dependent on the composition of the test set. So one should always try and define a relevant test suit. E.g. if strings are always 10000+ characters and have hardly any non 0 or 1, the results might be quite different from the situation wherr string are 10 bytes long and have many non 0 or 1.

Add a comment:

Ignore this:
Leave this empty:
Name is required. Either email or web are required. Email won't be displayed and I won't spam you. Your web site won't be indexed by search engines.
Don't put anything here:
Leave this empty:
Comment text is Markdown.