2017-10-16 19:42:43 +00:00
|
|
|
#!/usr/bin/env python3
|
2017-10-08 16:12:27 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# SSnR.py
|
|
|
|
#
|
|
|
|
# Copyright 2017 Rémi BERTHO <remi.bertho@dalan.fr>
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
# MA 02110-1301, USA.
|
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
import sys
|
2017-10-17 21:43:32 +00:00
|
|
|
import os.path
|
2017-10-21 12:57:10 +00:00
|
|
|
from os import walk
|
2017-10-08 16:25:04 +00:00
|
|
|
import argparse
|
2017-10-08 16:12:27 +00:00
|
|
|
import regex
|
2017-11-03 20:32:19 +00:00
|
|
|
import pyperclip
|
2017-10-08 16:12:27 +00:00
|
|
|
|
2017-10-08 16:25:04 +00:00
|
|
|
def main():
|
2017-10-08 16:12:27 +00:00
|
|
|
"""
|
|
|
|
Main function
|
|
|
|
"""
|
2017-11-03 21:02:52 +00:00
|
|
|
# Parse arguments
|
|
|
|
parser = argparse.ArgumentParser(description='Search and replace tool for UTF-8 files',
|
2017-11-20 22:27:33 +00:00
|
|
|
prog='SSnR', allow_abbrev=False)
|
2017-11-03 21:02:52 +00:00
|
|
|
|
|
|
|
regex_group = parser.add_argument_group('Regular expression', "Search and replace regular expression")
|
|
|
|
regex_group.add_argument('-ex', '--regex', help='Regex', required=True)
|
|
|
|
regex_group.add_argument('-rex', '--replace', help='Replace', required=False)
|
|
|
|
|
|
|
|
input_group = parser.add_argument_group('Input', "Input arguments, if none set use stdin")
|
|
|
|
input_group.add_argument('-if', '--input_file', help='Input file', required=False, nargs='+')
|
|
|
|
input_group.add_argument('-iex', '--input_regex', help='Regex input file', required=False)
|
|
|
|
input_group.add_argument('-str', '--input_string', help='Input string', required=False)
|
|
|
|
input_group.add_argument('-ic', '--input_clipboard', help='Use the clipboard as input',
|
|
|
|
required=False, action='store_true')
|
|
|
|
|
|
|
|
output_group = parser.add_argument_group('Output', "In replace mode, ouput arguments, if none set use stdout")
|
|
|
|
output_group.add_argument('-oex', '--output_regex', help='Regex output file', required=False)
|
|
|
|
output_group.add_argument('-of', '--output_file', help='Output file', required=False)
|
|
|
|
output_group.add_argument('-oc', '--output_clipboard', help='Use the clipboard as output',
|
|
|
|
required=False, action='store_true')
|
|
|
|
|
|
|
|
option_group = parser.add_argument_group('Options', "Some options")
|
|
|
|
option_group.add_argument('-pm', '--print_nb_match', help='Print the number of match in replace',
|
|
|
|
required=False, action='store_true')
|
|
|
|
option_group.add_argument('-igc', '--ignore_case', help='Ignore the case',
|
|
|
|
required=False, action='store_true')
|
2017-11-20 22:27:33 +00:00
|
|
|
option_group.add_argument('-r', '--recursive', help='Use the regex input recursivly in the folders',
|
2017-11-03 21:02:52 +00:00
|
|
|
required=False, action='store_true')
|
2017-10-16 20:51:03 +00:00
|
|
|
|
|
|
|
args = vars(parser.parse_args())
|
2017-10-14 09:46:09 +00:00
|
|
|
|
2017-11-03 21:02:52 +00:00
|
|
|
|
2017-10-17 21:43:32 +00:00
|
|
|
# Compile regex
|
|
|
|
try:
|
2017-10-21 14:49:43 +00:00
|
|
|
ex = compile_regex(args["regex"], args["ignore_case"])
|
2017-10-17 21:43:32 +00:00
|
|
|
except SyntaxError as exception:
|
|
|
|
print("Error when compiling regex: " + str(exception))
|
|
|
|
return -1
|
|
|
|
except regex.error as exception:
|
|
|
|
print("Error when compiling regex: " + exception.msg)
|
|
|
|
return -1
|
|
|
|
|
2017-10-14 09:46:09 +00:00
|
|
|
# Get input
|
2017-10-24 18:12:27 +00:00
|
|
|
input_filenames = []
|
2017-11-03 21:02:52 +00:00
|
|
|
if args["input_regex"] is not None:
|
2017-10-21 12:57:10 +00:00
|
|
|
try:
|
2017-11-03 21:02:52 +00:00
|
|
|
input_ex = compile_regex(args["input_regex"], False)
|
2017-10-21 12:57:10 +00:00
|
|
|
except SyntaxError as exception:
|
|
|
|
print("Error when compiling input regex: " + str(exception))
|
|
|
|
return -1
|
|
|
|
except regex.error as exception:
|
|
|
|
print("Error when compiling input regex: " + exception.msg)
|
|
|
|
return -1
|
|
|
|
for (dirpath, dirnames, dir_filenames) in walk("."):
|
|
|
|
for filename in dir_filenames:
|
|
|
|
if input_ex.fullmatch(filename):
|
2017-10-24 18:12:27 +00:00
|
|
|
input_filenames.append(os.path.join(dirpath, filename))
|
2017-10-21 12:57:10 +00:00
|
|
|
is_file = True
|
|
|
|
if not args["recursive"]:
|
|
|
|
break
|
2017-10-24 18:12:27 +00:00
|
|
|
if not input_filenames:
|
2017-10-21 12:57:10 +00:00
|
|
|
print("Error: no input file")
|
|
|
|
return -1
|
2017-11-03 21:02:52 +00:00
|
|
|
elif args["input_file"] is not None:
|
|
|
|
for input_file in args["input_file"]:
|
2017-10-17 21:43:32 +00:00
|
|
|
if os.path.isfile(input_file):
|
|
|
|
is_file = True
|
2017-10-24 18:12:27 +00:00
|
|
|
input_filenames.append(input_file)
|
2017-10-17 21:43:32 +00:00
|
|
|
else:
|
2017-10-21 12:57:10 +00:00
|
|
|
print("Error: file not found: " + str(args["input"]))
|
2017-10-24 18:12:27 +00:00
|
|
|
if not input_filenames:
|
2017-10-17 21:43:32 +00:00
|
|
|
print("Error: no input file")
|
2017-10-16 20:51:03 +00:00
|
|
|
return -1
|
2017-11-03 21:02:52 +00:00
|
|
|
elif args["input_string"] is not None:
|
|
|
|
string = args["input_string"]
|
2017-10-14 09:46:09 +00:00
|
|
|
is_file = False
|
2017-11-03 20:32:19 +00:00
|
|
|
elif args["input_clipboard"]:
|
|
|
|
string = pyperclip.paste()
|
|
|
|
is_file = False
|
2017-10-14 09:46:09 +00:00
|
|
|
else:
|
2017-10-21 14:21:08 +00:00
|
|
|
string = sys.stdin.read()
|
|
|
|
is_file = False
|
2017-10-08 16:12:27 +00:00
|
|
|
|
2017-10-16 20:51:03 +00:00
|
|
|
# Get output
|
2017-10-24 18:12:27 +00:00
|
|
|
output_filenames = []
|
2017-11-03 21:02:52 +00:00
|
|
|
if args["output_regex"] is not None:
|
2017-10-21 14:09:06 +00:00
|
|
|
if input_ex is None:
|
2017-10-21 14:49:43 +00:00
|
|
|
print("Error: You need a regex input file to use a regex output file")
|
2017-10-21 14:09:06 +00:00
|
|
|
return -1
|
2017-10-24 18:12:27 +00:00
|
|
|
for input_filename in input_filenames:
|
2017-11-03 21:02:52 +00:00
|
|
|
output_filenames.append(input_ex.subn(args["output_regex"], input_filename)[0])
|
2017-10-21 14:09:06 +00:00
|
|
|
use_output_file = True
|
2017-11-03 21:02:52 +00:00
|
|
|
elif args["output_file"] is not None:
|
2018-02-07 18:16:33 +00:00
|
|
|
output_filenames.append(args["output_file"])
|
2017-10-16 20:51:03 +00:00
|
|
|
use_output_file = True
|
2017-11-03 20:32:19 +00:00
|
|
|
elif args["output_clipboard"]:
|
|
|
|
use_output_file = False
|
|
|
|
use_output_clipboard = True
|
2017-10-16 20:51:03 +00:00
|
|
|
else:
|
|
|
|
use_output_file = False
|
2017-11-03 20:32:19 +00:00
|
|
|
use_output_clipboard = False
|
2017-10-16 20:51:03 +00:00
|
|
|
|
2017-10-14 09:46:09 +00:00
|
|
|
# Search or replace
|
2017-10-21 14:09:06 +00:00
|
|
|
file_index = 0
|
2017-10-17 21:43:32 +00:00
|
|
|
if is_file:
|
2017-10-24 18:12:27 +00:00
|
|
|
for filename in input_filenames:
|
2017-10-17 21:43:32 +00:00
|
|
|
try:
|
2017-10-21 16:30:13 +00:00
|
|
|
with open(filename, "r", encoding="utf8") as file:
|
2017-10-17 21:43:32 +00:00
|
|
|
string = file.read()
|
|
|
|
except OSError as exception:
|
2017-10-24 16:49:14 +00:00
|
|
|
print("Error: file \"" + filename + "\" not found: " + str(exception))
|
|
|
|
continue
|
|
|
|
except UnicodeDecodeError as exception:
|
|
|
|
print("Error: the file \"" + filename + "\" is not UTF-8 encoded: " + str(exception))
|
2017-10-17 21:43:32 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
if args["replace"] is not None:
|
2017-10-21 14:09:06 +00:00
|
|
|
replace_string, nb_replace = replace(ex, string, args["replace"])
|
2017-10-17 21:43:32 +00:00
|
|
|
if use_output_file:
|
2017-10-24 18:12:27 +00:00
|
|
|
try:
|
|
|
|
output_file = open(output_filenames[file_index], "w", encoding="utf8")
|
|
|
|
except OSError as exception:
|
|
|
|
print("Error: file not found: " + str(exception))
|
|
|
|
return -1
|
|
|
|
output_file.write(replace_string)
|
2017-10-21 14:09:06 +00:00
|
|
|
print("File: " + filename)
|
|
|
|
print(" - Number of replace: " + str(nb_replace))
|
2017-10-24 18:12:27 +00:00
|
|
|
if len(output_filenames) > 1:
|
2017-10-21 14:09:06 +00:00
|
|
|
file_index += 1
|
2017-11-03 20:32:19 +00:00
|
|
|
elif use_output_clipboard:
|
|
|
|
pyperclip.copy(replace_string)
|
2017-10-17 21:43:32 +00:00
|
|
|
else:
|
|
|
|
print(replace_string)
|
2017-10-21 14:09:06 +00:00
|
|
|
if args["print_nb_match"]:
|
|
|
|
print("File: " + filename)
|
|
|
|
print(" - Number of replace: " + str(nb_replace))
|
2017-10-17 21:43:32 +00:00
|
|
|
else:
|
2017-10-24 17:56:02 +00:00
|
|
|
nb_match, str_found = search(ex, string, is_file)
|
|
|
|
if nb_match > 0:
|
|
|
|
print("File: " + filename)
|
|
|
|
print(" - Number of match: " + str(nb_match))
|
|
|
|
print(str_found)
|
2017-10-13 16:36:54 +00:00
|
|
|
else:
|
2017-10-17 21:43:32 +00:00
|
|
|
if args["replace"] is not None:
|
2017-10-21 14:09:06 +00:00
|
|
|
replace_string, nb_replace = replace(ex, string, args["replace"])
|
2017-10-17 21:43:32 +00:00
|
|
|
if use_output_file:
|
2017-10-21 14:09:06 +00:00
|
|
|
print("Number of replace: " + str(nb_replace))
|
2017-10-24 18:12:27 +00:00
|
|
|
try:
|
|
|
|
output_file = open(output_filenames[file_index], "w", encoding="utf8")
|
|
|
|
except OSError as exception:
|
|
|
|
print("Error: file not found: " + str(exception))
|
|
|
|
return -1
|
|
|
|
output_file.write(replace_string)
|
2017-11-03 20:32:19 +00:00
|
|
|
elif use_output_clipboard:
|
|
|
|
pyperclip.copy(replace_string)
|
2017-10-17 21:43:32 +00:00
|
|
|
else:
|
|
|
|
print(replace_string)
|
2017-10-21 14:09:06 +00:00
|
|
|
if args["print_nb_match"]:
|
|
|
|
print("Number of replace: " + str(nb_replace))
|
2017-10-17 21:43:32 +00:00
|
|
|
else:
|
2017-10-24 17:56:02 +00:00
|
|
|
nb_match, str_found = search(ex, string, is_file)
|
|
|
|
print("Number of match: " + str(nb_match))
|
|
|
|
print(str_found)
|
2017-10-08 16:12:27 +00:00
|
|
|
return 0
|
|
|
|
|
2017-10-21 14:49:43 +00:00
|
|
|
def compile_regex(ex, ignore_case):
|
2017-10-12 18:54:29 +00:00
|
|
|
"""
|
|
|
|
Compile regex
|
|
|
|
:param ex: Regular expression
|
|
|
|
"""
|
2017-10-21 14:49:43 +00:00
|
|
|
if ignore_case:
|
|
|
|
regex_compile = regex.compile(ex, regex.MULTILINE | regex.IGNORECASE)
|
|
|
|
else:
|
|
|
|
regex_compile = regex.compile(ex, regex.MULTILINE)
|
2017-10-14 09:46:09 +00:00
|
|
|
|
2017-10-12 18:54:29 +00:00
|
|
|
if regex_compile is None:
|
|
|
|
raise SyntaxError('Error in the regex')
|
|
|
|
else:
|
|
|
|
return regex_compile
|
|
|
|
|
2017-10-14 09:46:09 +00:00
|
|
|
def search(ex, string, is_file):
|
2017-10-08 16:12:27 +00:00
|
|
|
"""
|
|
|
|
Search in a string
|
2017-10-08 16:25:04 +00:00
|
|
|
:param ex: Regular expression
|
|
|
|
:param string: A string
|
2017-10-08 16:12:27 +00:00
|
|
|
"""
|
2017-10-14 09:46:09 +00:00
|
|
|
if is_file:
|
|
|
|
new_lines = get_line_pos(string)
|
|
|
|
|
2017-10-12 18:54:29 +00:00
|
|
|
ite = ex.finditer(string)
|
2017-10-08 16:12:27 +00:00
|
|
|
nb_match = 0
|
2017-10-24 17:56:02 +00:00
|
|
|
str_found = ""
|
2017-10-08 16:12:27 +00:00
|
|
|
for match in ite:
|
|
|
|
nb_match += 1
|
2017-10-14 09:46:09 +00:00
|
|
|
if is_file:
|
|
|
|
num_line, begin_pos, end_pos = find_line(match.start(0), match.end(0), new_lines)
|
2017-10-24 17:56:02 +00:00
|
|
|
str_found += " - Found \"%s\" at line %d [%d:%d]\n" % (match.group(0), num_line, begin_pos, end_pos)
|
2017-10-14 09:46:09 +00:00
|
|
|
else:
|
2017-10-24 17:56:02 +00:00
|
|
|
str_found += " - Found \"%s\" at [%d:%d]\n" % (match.group(0), match.start(0), match.end(0))
|
|
|
|
return nb_match, str_found
|
2017-10-08 16:12:27 +00:00
|
|
|
|
|
|
|
|
2017-10-21 14:09:06 +00:00
|
|
|
def replace(ex, string, replace_string):
|
2017-10-13 16:36:54 +00:00
|
|
|
"""
|
|
|
|
Replace in a string
|
|
|
|
:param ex: Regular expression
|
|
|
|
:param string: A string
|
2017-10-13 16:48:50 +00:00
|
|
|
:param print_nb: Print the number of match
|
2017-10-13 16:36:54 +00:00
|
|
|
"""
|
|
|
|
res = ex.subn(replace_string, string)
|
2017-10-21 14:09:06 +00:00
|
|
|
return res[0], res[1]
|
2017-10-13 16:36:54 +00:00
|
|
|
|
2017-10-14 09:46:09 +00:00
|
|
|
def get_line_pos(string):
|
|
|
|
"""
|
|
|
|
Get new lines postion in a string
|
|
|
|
:param string: a string
|
|
|
|
"""
|
|
|
|
ex = regex.compile("^", regex.MULTILINE)
|
|
|
|
ite = ex.finditer(string)
|
|
|
|
new_lines = []
|
|
|
|
for match in ite:
|
|
|
|
new_lines.append(match.start(0))
|
|
|
|
return new_lines
|
|
|
|
|
|
|
|
def find_line(begin_pos, end_pos, new_lines):
|
|
|
|
"""
|
|
|
|
Find the line number and the position in the line
|
|
|
|
:param pos: the position to find the line
|
|
|
|
:param new_lines: then new lines
|
|
|
|
"""
|
|
|
|
num_line = 0
|
|
|
|
old_pos_line = 0
|
|
|
|
for pos_line in new_lines:
|
|
|
|
if pos_line > begin_pos:
|
|
|
|
return num_line, begin_pos - old_pos_line + 1, end_pos - old_pos_line
|
|
|
|
num_line += 1
|
|
|
|
old_pos_line = pos_line
|
2017-10-24 16:41:32 +00:00
|
|
|
return num_line, begin_pos - old_pos_line + 1, end_pos - old_pos_line
|
2017-10-14 09:46:09 +00:00
|
|
|
|
|
|
|
|
2017-10-08 16:12:27 +00:00
|
|
|
if __name__ == '__main__':
|
2017-10-08 16:25:04 +00:00
|
|
|
sys.exit(main())
|