Hints kitten: validate IPs with ipaddress

On the initial commit of this feature, IPs were just matched with a
very simple regex that prioritised simplicity/readability over
accuracy.

This commit adds a postprocessor for ip matches that makes use of
Python's `ipaddress` in the standard library to validate all the IP
matches.

This way we don't need huge and complex regex patterns to match _and_
validate the IPs, and we can just use `ipaddress` to abstract us from
implementing all the validation logic into the regex pattern.
This commit is contained in:
Sighery 2020-10-04 22:46:00 +02:00
parent b4415c90f9
commit 10533c3eba
2 changed files with 29 additions and 4 deletions

View File

@ -2,6 +2,7 @@
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
import ipaddress
import os
import re
import string
@ -236,6 +237,11 @@ def postprocessor(func: PostprocessorFunc) -> PostprocessorFunc:
return func
class InvalidMatch(Exception):
"""Raised when a match turns out to be invalid."""
pass
@postprocessor
def url(text: str, s: int, e: int) -> Tuple[int, int]:
if s > 4 and text[s - 5:s] == 'link:': # asciidoc URLs
@ -280,11 +286,28 @@ def quotes(text: str, s: int, e: int) -> Tuple[int, int]:
return s, e
@postprocessor
def ip(text: str, s: int, e: int) -> Tuple[int, int]:
# Check validity of IPs (or raise InvalidMatch)
ip = text[s:e]
try:
ipaddress.ip_address(ip)
except ValueError:
raise InvalidMatch("Invalid IP")
return s, e
def mark(pattern: str, post_processors: Iterable[PostprocessorFunc], text: str, args: HintsCLIOptions) -> Generator[Mark, None, None]:
pat = re.compile(pattern)
for idx, (s, e, groupdict) in enumerate(regex_finditer(pat, args.minimum_match_length, text)):
for func in post_processors:
s, e = func(text, s, e)
try:
for func in post_processors:
s, e = func(text, s, e)
except InvalidMatch:
continue
mark_text = text[s:e].replace('\n', '').replace('\0', '')
yield Mark(idx, s, e, mark_text, groupdict)
@ -333,6 +356,7 @@ def functions_for(args: HintsCLIOptions) -> Tuple[str, List[PostprocessorFunc]]:
# # IPv6 with no validation
r"(?:[a-fA-F0-9]{0,4}:){2,7}[a-fA-F0-9]{1,4})"
)
post_processors.append(ip)
elif args.type == 'word':
chars = args.word_characters
if chars is None:

View File

@ -48,8 +48,9 @@ class TestHints(BaseTest):
('2001:DB8::FF00:42:8329', ['2001:DB8::FF00:42:8329']),
('0000:0000:0000:0000:0000:0000:0000:0001', ['0000:0000:0000:0000:0000:0000:0000:0001']),
('::1', ['::1']),
# The regex doesn't check for validity
('255.255.255.256', ['255.255.255.256']),
# Invalid IPs won't match
('255.255.255.256', []),
(':1', []),
)
for testcase, expected in testcases: