mirror of https://github.com/morpheus65535/bazarr
35 lines
844 B
Python
35 lines
844 B
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
Words utils
|
||
|
"""
|
||
|
from collections import namedtuple
|
||
|
|
||
|
from . import seps
|
||
|
|
||
|
_Word = namedtuple('_Word', ['span', 'value'])
|
||
|
|
||
|
|
||
|
def iter_words(string):
|
||
|
"""
|
||
|
Iterate on all words in a string
|
||
|
:param string:
|
||
|
:type string:
|
||
|
:return:
|
||
|
:rtype: iterable[str]
|
||
|
"""
|
||
|
i = 0
|
||
|
last_sep_index = -1
|
||
|
inside_word = False
|
||
|
for char in string:
|
||
|
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
|
||
|
if inside_word:
|
||
|
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||
|
inside_word = False
|
||
|
last_sep_index = i
|
||
|
else:
|
||
|
inside_word = True
|
||
|
i += 1
|
||
|
if inside_word:
|
||
|
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|