ctf-writeups

Unicode bypass

Common bypass

Reduce length

Sometimes we can reduce length by unicode bypass:

import unicodedata
import string
from collections import defaultdict

mapping = defaultdict(list)
for ch in range(0x110000):
    nfkc_text = unicodedata.normalize("NFKC", chr(ch))
    if all(c in string.printable for c in nfkc_text):
        # some characters are not working
        try:
            eval("_" + chr(ch))
        except SyntaxError as e:
            # bad
            continue
            pass
        except Exception as e:
            pass
        mapping[nfkc_text].append(chr(ch))


def find_shortest(s):
    res = []
    for i in range(len(s)):
        # case 1: no mapping happened
        if i > 0:
            new_res = res[i - 1] + s[i]
        else:
            new_res = s[i]
        # case 2: a suffix of length j is mapped
        for j in range(2, i + 2):
            part = s[i - j + 1 : i + 1]
            if part in mapping:
                if i - j + 1 > 0:
                    temp = res[i - j] + mapping[part][0]
                else:
                    temp = mapping[part][0]
                # found better solution
                if len(temp) < len(new_res):
                    new_res = temp
        res.append(new_res)
    return res[-1]


for function in __builtins__.__dict__:
    res = find_shortest(function)
    if len(res) < len(function):
        print(function, res, f"{len(res)} < {len(function)}")

Result:

ascii ascโ…ฑ 4 < 5
divmod dโ…ณmod 5 < 6
isinstance isin๏ฌ…ance 9 < 10
memoryview memoryโ…ตew 9 < 10
filter ๏ฌlter 5 < 6
float ๏ฌ‚oat 4 < 5
list li๏ฌ… 3 < 4
staticmethod ๏ฌ…aticmethod 11 < 12
str ๏ฌ…r 2 < 3
GeneratorExit GeneratorEโ…บt 12 < 13
SystemExit Sy๏ฌ…emEโ…บt 8 < 10
EnvironmentError Enโ…ตronmentError 15 < 16
OverflowError Over๏ฌ‚owError 12 < 13
ZeroDivisionError ZeroDโ…ณisionError 16 < 17
SystemError Sy๏ฌ…emError 10 < 11
BufferError Bu๏ฌ€erError 10 < 11
FileExistsError FileEโ…บ๏ฌ…sError 13 < 15
exit eโ…บt 3 < 4

Full mapping

Print all characters that map to ascii printables after NFKC normalization:

import unicodedata
import string
from collections import defaultdict

mapping = defaultdict(list)
for ch in range(0x110000):
    nfkc_text = unicodedata.normalize("NFKC", chr(ch))
    if all(c in string.printable for c in nfkc_text):
        if nfkc_text != chr(ch):
            # some characters are not working
            try:
                eval("_" + chr(ch))
            except SyntaxError as e:
                # bad
                continue
                pass
            except Exception as e:
                pass
            mapping[nfkc_text].append(chr(ch))

for key in sorted(mapping.keys()):
    print("-", f"`{key}`:", ",".join(f"`{ch}` (U+{ord(ch):X})" for ch in mapping[key]))

Result: