p≡p engine FORK
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

40 lines
1.0 KiB

  1. #! /usr/bin/env python3
  2. # This file is under GNU General Public License 3.0
  3. # see LICENSE.txt
  4. from argparse import ArgumentParser
  5. from fileinput import FileInput, hook_encoded
  6. import re, itertools, sys
  7. space = re.compile(r'^\s')
  8. p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
  9. p.add_argument('--input', '-i', type=str, default="somefile.cvs",
  10. help='input file')
  11. p.add_argument('--length', '-l', type=int, default=100,
  12. help='min word length to stripp a line')
  13. args = p.parse_args()
  14. try:
  15. from icu import UnicodeString, Locale
  16. except ImportError:
  17. print("warning: PyICU not installed, using fallback", file=sys.stderr)
  18. else:
  19. locale = Locale("utf-8")
  20. _all = (
  21. line.split(',')
  22. for line in FileInput(
  23. args.input,
  24. openhook=hook_encoded("utf-8")
  25. )
  26. if not space.match(line)
  27. )
  28. _some = (line for line in _all if len(line[2]) < args.length)
  29. for i, w in enumerate(_some):
  30. print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))