2016-07-02 16:53:53 +02:00
|
|
|
#! /usr/bin/env python3
|
|
|
|
|
2016-12-27 21:13:41 +01:00
|
|
|
# This file is under GNU General Public License 3.0
|
|
|
|
# see LICENSE.txt
|
|
|
|
|
|
|
|
|
2016-07-02 16:53:53 +02:00
|
|
|
from argparse import ArgumentParser
|
|
|
|
from fileinput import FileInput, hook_encoded
|
|
|
|
import re, itertools, sys
|
|
|
|
|
|
|
|
space = re.compile(r'^\s')
|
|
|
|
|
|
|
|
p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
|
|
|
|
p.add_argument('--input', '-i', type=str, default="somefile.cvs",
|
|
|
|
help='input file')
|
|
|
|
p.add_argument('--length', '-l', type=int, default=100,
|
|
|
|
help='min word length to stripp a line')
|
|
|
|
|
|
|
|
args = p.parse_args()
|
|
|
|
|
|
|
|
try:
|
|
|
|
from icu import UnicodeString, Locale
|
|
|
|
except ImportError:
|
|
|
|
print("warning: PyICU not installed, using fallback", file=sys.stderr)
|
|
|
|
else:
|
|
|
|
locale = Locale("utf-8")
|
|
|
|
|
|
|
|
_all = (
|
|
|
|
line.split(',')
|
|
|
|
for line in FileInput(
|
|
|
|
args.input,
|
|
|
|
openhook=hook_encoded("utf-8")
|
|
|
|
)
|
|
|
|
if not space.match(line)
|
|
|
|
)
|
|
|
|
|
2016-12-09 15:16:48 +01:00
|
|
|
_some = (line for line in _all if len(line[2]) < args.length)
|
2016-07-02 16:53:53 +02:00
|
|
|
|
2016-12-09 15:16:48 +01:00
|
|
|
for i, w in enumerate(_some):
|
2016-07-02 16:53:53 +02:00
|
|
|
print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))
|