From b4415c90f98b32c92e2166a056331f5f885e55d8 Mon Sep 17 00:00:00 2001 From: Sighery Date: Sun, 4 Oct 2020 22:04:00 +0200 Subject: [PATCH 1/2] Hints kitten: add support for IPs (v4 and v6) It selects v4 and v6 IPs with a simple regex that doesn't actually check for the validity of the IPs. --- kittens/hints/main.py | 10 +++++++++- kitty_tests/hints.py | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/kittens/hints/main.py b/kittens/hints/main.py index d16381945..ecf15e97f 100644 --- a/kittens/hints/main.py +++ b/kittens/hints/main.py @@ -325,6 +325,14 @@ def functions_for(args: HintsCLIOptions) -> Tuple[str, List[PostprocessorFunc]]: pattern = '(?m)^\\s*(.+)[\\s\0]*$' elif args.type == 'hash': pattern = '[0-9a-f]{7,128}' + elif args.type == 'ip': + pattern = ( + # # IPv4 with no validation + r"((?:\d{1,3}\.){3}\d{1,3}" + r"|" + # # IPv6 with no validation + r"(?:[a-fA-F0-9]{0,4}:){2,7}[a-fA-F0-9]{1,4})" + ) elif args.type == 'word': chars = args.word_characters if chars is None: @@ -482,7 +490,7 @@ programs. --type default=url -choices=url,regex,path,line,hash,word,linenum,hyperlink +choices=url,regex,path,line,hash,word,linenum,hyperlink,ip The type of text to search for. A value of :code:`linenum` is special, it looks for error messages using the pattern specified with :option:`--regex`, which must have the named groups, :code:`path` and :code:`line`. If not specified, diff --git a/kitty_tests/hints.py b/kitty_tests/hints.py index 3a59dbace..1876a579b 100644 --- a/kitty_tests/hints.py +++ b/kitty_tests/hints.py @@ -30,3 +30,30 @@ class TestHints(BaseTest): t('link:{}[xxx]'.format(u), u) t('`xyz <{}>`_.'.format(u), u) t('moo'.format(u), u) + + def test_ip_hints(self): + from kittens.hints.main import parse_hints_args, functions_for, mark, convert_text + args = parse_hints_args(['--type', 'ip'])[0] + pattern, post_processors = functions_for(args) + + def create_marks(text, cols=60): + text = convert_text(text, cols) + return tuple(mark(pattern, post_processors, text, args)) + + testcases = ( + ('100.64.0.0', ['100.64.0.0']), + ('2001:0db8:0000:0000:0000:ff00:0042:8329', ['2001:0db8:0000:0000:0000:ff00:0042:8329']), + ('2001:db8:0:0:0:ff00:42:8329', ['2001:db8:0:0:0:ff00:42:8329']), + ('2001:db8::ff00:42:8329', ['2001:db8::ff00:42:8329']), + ('2001:DB8::FF00:42:8329', ['2001:DB8::FF00:42:8329']), + ('0000:0000:0000:0000:0000:0000:0000:0001', ['0000:0000:0000:0000:0000:0000:0000:0001']), + ('::1', ['::1']), + # The regex doesn't check for validity + ('255.255.255.256', ['255.255.255.256']), + ) + + for testcase, expected in testcases: + with self.subTest(testcase=testcase, expected=expected): + marks = create_marks(testcase) + ips = [m.text for m in marks] + self.ae(ips, expected) From 10533c3ebadff69cf910105e77a4c1022618fb42 Mon Sep 17 00:00:00 2001 From: Sighery Date: Sun, 4 Oct 2020 22:46:00 +0200 Subject: [PATCH 2/2] Hints kitten: validate IPs with ipaddress On the initial commit of this feature, IPs were just matched with a very simple regex that prioritised simplicity/readability over accuracy. This commit adds a postprocessor for ip matches that makes use of Python's `ipaddress` in the standard library to validate all the IP matches. This way we don't need huge and complex regex patterns to match _and_ validate the IPs, and we can just use `ipaddress` to abstract us from implementing all the validation logic into the regex pattern. --- kittens/hints/main.py | 28 ++++++++++++++++++++++++++-- kitty_tests/hints.py | 5 +++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/kittens/hints/main.py b/kittens/hints/main.py index ecf15e97f..4eaa29da1 100644 --- a/kittens/hints/main.py +++ b/kittens/hints/main.py @@ -2,6 +2,7 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2018, Kovid Goyal +import ipaddress import os import re import string @@ -236,6 +237,11 @@ def postprocessor(func: PostprocessorFunc) -> PostprocessorFunc: return func +class InvalidMatch(Exception): + """Raised when a match turns out to be invalid.""" + pass + + @postprocessor def url(text: str, s: int, e: int) -> Tuple[int, int]: if s > 4 and text[s - 5:s] == 'link:': # asciidoc URLs @@ -280,11 +286,28 @@ def quotes(text: str, s: int, e: int) -> Tuple[int, int]: return s, e +@postprocessor +def ip(text: str, s: int, e: int) -> Tuple[int, int]: + # Check validity of IPs (or raise InvalidMatch) + ip = text[s:e] + + try: + ipaddress.ip_address(ip) + except ValueError: + raise InvalidMatch("Invalid IP") + + return s, e + + def mark(pattern: str, post_processors: Iterable[PostprocessorFunc], text: str, args: HintsCLIOptions) -> Generator[Mark, None, None]: pat = re.compile(pattern) for idx, (s, e, groupdict) in enumerate(regex_finditer(pat, args.minimum_match_length, text)): - for func in post_processors: - s, e = func(text, s, e) + try: + for func in post_processors: + s, e = func(text, s, e) + except InvalidMatch: + continue + mark_text = text[s:e].replace('\n', '').replace('\0', '') yield Mark(idx, s, e, mark_text, groupdict) @@ -333,6 +356,7 @@ def functions_for(args: HintsCLIOptions) -> Tuple[str, List[PostprocessorFunc]]: # # IPv6 with no validation r"(?:[a-fA-F0-9]{0,4}:){2,7}[a-fA-F0-9]{1,4})" ) + post_processors.append(ip) elif args.type == 'word': chars = args.word_characters if chars is None: diff --git a/kitty_tests/hints.py b/kitty_tests/hints.py index 1876a579b..64d003a58 100644 --- a/kitty_tests/hints.py +++ b/kitty_tests/hints.py @@ -48,8 +48,9 @@ class TestHints(BaseTest): ('2001:DB8::FF00:42:8329', ['2001:DB8::FF00:42:8329']), ('0000:0000:0000:0000:0000:0000:0000:0001', ['0000:0000:0000:0000:0000:0000:0000:0001']), ('::1', ['::1']), - # The regex doesn't check for validity - ('255.255.255.256', ['255.255.255.256']), + # Invalid IPs won't match + ('255.255.255.256', []), + (':1', []), ) for testcase, expected in testcases: