From f345ac1bdd5f23f95370b2db8fa42085fa13a0b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Oct 2016 09:11:10 +0530 Subject: [PATCH] A function to efficiently check for the common case pure ascii strings --- kitty/utils.py | 8 ++++++++ kitty_tests/datatypes.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/kitty/utils.py b/kitty/utils.py index c2068cb06..4aaa3fea5 100644 --- a/kitty/utils.py +++ b/kitty/utils.py @@ -3,6 +3,7 @@ # License: GPL v3 Copyright: 2016, Kovid Goyal import os +import sys import termios import struct import shlex @@ -82,3 +83,10 @@ def hangup(): pgrp = os.getpgid(pid) os.killpg(pgrp, signal.SIGHUP) os.close(create_pty()[0]) + +base_size = sys.getsizeof('') + + +def is_simple_string(x): + ' We use the fact that python stores unicode strings with a 1-byte representation when possible ' + return sys.getsizeof(x) == base_size + len(x) diff --git a/kitty_tests/datatypes.py b/kitty_tests/datatypes.py index 75cf16eda..a90abf3d7 100644 --- a/kitty_tests/datatypes.py +++ b/kitty_tests/datatypes.py @@ -2,9 +2,12 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2016, Kovid Goyal +import codecs + from . import BaseTest, set_text_in_line from kitty.data_types import Line, Cursor +from kitty.utils import is_simple_string, wcwidth class TestDataTypes(BaseTest): @@ -72,3 +75,11 @@ class TestDataTypes(BaseTest): l.set_decoration(0, q.decoration) c = l.cursor_from(0) self.ae(c, q) + + def test_utils(self): + d = codecs.getincrementaldecoder('utf-8')('strict').decode + self.ae(tuple(map(wcwidth, 'a1\0コ')), (1, 1, 0, 2)) + for s in ('abd38453*(+\n\t\f\r !\0~[]{}()"\':;<>/?ASD`',): + self.assertTrue(is_simple_string(s)) + self.assertTrue(is_simple_string(d(s.encode('utf-8')))) + self.assertFalse(is_simple_string('a1コ'))