module documentation
Undocumented
Function | cli |
Undocumented |
Function | tokenize |
This command tokenizes text stream using nltk.word_tokenize |
Constant | CONTEXT |
Undocumented |
@cli.command( 'tokenize')
@click.option( '--language', '-l', default='en', help='The language for the Punkt sentence tokenization.')
@click.option( '--preserve-line', '-l', default=True, is_flag=True, help='An option to keep the preserve the sentence and not sentence tokenize it.')
@click.option( '--processes', '-j', default=1, help='No. of processes.')
@click.option( '--encoding', '-e', default='utf8', help='Specify encoding of file.')
@click.option( '--delimiter', '-d', default=' ', help='Specify delimiter to join the tokens.')
def tokenize_file(language, preserve_line, processes, encoding, delimiter): (source) ¶
@click.option(
@click.option(
@click.option(
@click.option(
@click.option(
def tokenize_file(language, preserve_line, processes, encoding, delimiter): (source) ¶
This command tokenizes text stream using nltk.word_tokenize