More URL detection tests

This commit is contained in:
Kovid Goyal 2017-09-13 23:15:27 +05:30
parent 88d896e745
commit 271b623f82
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 26 additions and 6 deletions

View File

@ -108,6 +108,16 @@ has_url_prefix_at(Line *self, index_type at, index_type min_prefix_len, index_ty
#define MAX_URL_SCHEME_LEN 5
#define MIN_URL_LEN 5
static inline bool
has_url_beyond(Line *self, index_type x) {
if (self->xnum <= x + MIN_URL_LEN + 3) return false;
for (index_type i = x; i < MIN(x + MIN_URL_LEN + 3, self->xnum); i++) {
if (!is_url_char(self->cells[i].ch & CHAR_MASK)) return false;
}
return true;
}
index_type
line_url_start_at(Line *self, index_type x) {
// Find the starting cell for a URL that contains the position x. A URL is defined as
@ -116,11 +126,11 @@ line_url_start_at(Line *self, index_type x) {
index_type ds_pos = 0, t;
// First look for :// ahead of x
if (self->xnum - x > MAX_URL_SCHEME_LEN + 3) ds_pos = find_colon_slash(self, x + MAX_URL_SCHEME_LEN + 3, x < 2 ? 0 : x - 2);
if (ds_pos != 0) {
if (ds_pos != 0 && has_url_beyond(self, ds_pos)) {
if (has_url_prefix_at(self, ds_pos, ds_pos > x ? ds_pos - x: 0, &t)) return t;
}
ds_pos = find_colon_slash(self, x, 0);
if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3) return self->xnum;
if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3 || !has_url_beyond(self, ds_pos)) return self->xnum;
if (has_url_prefix_at(self, ds_pos, 0, &t)) return t;
return self->xnum;
}

View File

@ -222,20 +222,30 @@ class TestDataTypes(BaseTest):
l.set_text(t, 0, len(t), C())
return l
def lspace_test(n):
l = create(' ' * n + 'http://acme.com')
def lspace_test(n, scheme='http'):
l = create(' ' * n + scheme + '://acme.com')
for i in range(0, n):
self.ae(l.url_start_at(i), len(l))
for i in range(n, len(l)):
self.ae(l.url_start_at(i), n)
for i in range(5):
lspace_test(i)
for i in range(7):
for scheme in 'http https ftp file'.split():
lspace_test(i)
l = create('b https://testing.me a')
for s in (0, 1, len(l) - 1, len(l) - 2):
self.ae(l.url_start_at(s), len(l), 'failed with start at: %d' % s)
for s in range(2, len(l) - 2):
self.ae(l.url_start_at(s), 2, 'failed with start at: %d (%s)' % (s, str(l)[s:]))
def no_url(t):
l = create(t)
for s in range(len(l)):
self.ae(l.url_start_at(s), len(l))
no_url('https:// testing.me a')
no_url('h ttp://acme.com')
no_url('http: //acme.com')
no_url('http:/ /acme.com')
def rewrap(self, lb, lb2):
hb = HistoryBuf(lb2.ynum, lb2.xnum)
cy = lb.rewrap(lb2, hb)