31
31
import sys
32
32
import tempfile
33
33
import shutil
34
+ import codecs
34
35
35
36
if sys .version_info .major == 2 :
36
37
# Pythontidy is only supported on Python2
46
47
47
48
DEFAULT_CONFIG_PATHS = ['~/.codevalidatorrc' , '/etc/codevalidatorrc' ]
48
49
50
+ # The first rule name which matches a registered encoding is used
51
+ # both as a check that the file can be read with that encoding,
52
+ # as well as a encoding filter for those rules which support fixing.
53
+
49
54
DEFAULT_RULES = [
50
55
'utf8' ,
51
56
'nobom' ,
77
82
'*.php' : DEFAULT_RULES + ['phpcs' ],
78
83
'*.phtml' : DEFAULT_RULES ,
79
84
'*.pp' : DEFAULT_RULES + ['puppet' ],
80
- '*.properties' : DEFAULT_RULES + ['ascii' ],
85
+ '*.properties' : ['ascii' ] + DEFAULT_RULES ,
81
86
'*.py' : DEFAULT_RULES + ['pyflakes' , 'pythontidy' ],
82
87
'*.rst' : DEFAULT_RULES ,
83
88
'*.rb' : DEFAULT_RULES + ['ruby' , 'rubocop' ],
113
118
114
119
STDIN_CONTENTS = None
115
120
121
+ ENCODING_BY_FILE = dict ()
116
122
117
123
class BaseException (Exception ):
118
124
@@ -164,6 +170,35 @@ def wrap(f):
164
170
165
171
return wrap
166
172
173
+ def needs_unicode (fix_function ):
174
+ """
175
+ decorator for a _fix_... function to make it work with a pair of
176
+ unicode files (or file-like objects) internally instead of a pair
177
+ of byte-files (which are still used externally).
178
+
179
+ The returned function has an attribute `needs_encoding` which tells
180
+ the calling function that it needs an encoding argument (the name of
181
+ the encoding to use).
182
+ """
183
+
184
+ def wrapped_fix (src , dst , encoding_or_options ):
185
+ if isinstance (encoding_or_options , basestring ):
186
+ encoding = encoding_or_options
187
+ options = None
188
+ else :
189
+ encoding = encoding_or_options ['encoding' ]
190
+ options = encoding_or_options
191
+ # decode + encode
192
+ src = codecs .EncodedFile (src , encoding )
193
+ dst = codecs .EncodedFile (dst , encoding )
194
+ if options :
195
+ return fix_function (src , dst , options )
196
+ else :
197
+ return fix_function (src , dst )
198
+
199
+ wrapped_fix .needs_encoding = True
200
+ return wrapped_fix
201
+
167
202
168
203
def is_python3 (fd ):
169
204
'''check first line of file object whether it contains "python3" (shebang)'''
@@ -183,39 +218,33 @@ def _validate_notabs(fd):
183
218
return b'\t ' not in fd .read ()
184
219
185
220
221
+ @needs_unicode
186
222
def _fix_notabs (src , dst ):
187
223
original = src .read ()
188
224
fixed = original .replace (b'\t ' , b' ' * 4 )
189
- dst .write (fixed . decode () )
225
+ dst .write (fixed )
190
226
191
227
192
228
@message ('contains carriage return (CR)' )
193
229
def _validate_nocr (fd ):
194
230
return b'\r ' not in fd .read ()
195
231
196
232
233
+ @needs_unicode
197
234
def _fix_nocr (src , dst ):
198
235
original = src .read ()
199
- fixed = original .replace (b'\r ' , b'' )
200
- dst .write (fixed .decode ())
201
-
202
-
203
- @message ('is not UTF-8 encoded' )
204
- def _validate_utf8 (fd ):
205
- try :
206
- fd .read ().decode ('utf-8' )
207
- except UnicodeDecodeError :
208
- return False
209
- return True
236
+ fixed = original .replace ('\r ' , '' )
237
+ dst .write (fixed )
210
238
211
239
212
- @message ('is not ASCII encoded' )
213
- def _validate_ascii (fd ):
214
- try :
215
- fd .read ().decode ('ascii' )
216
- except UnicodeDecodeError :
217
- return False
218
- return True
240
+ def encoding_validator (encoding ):
241
+ def validate_encoding (fd ):
242
+ try :
243
+ fd .read ().decode (encoding )
244
+ except UnicodeDecodeError :
245
+ return "is not %s-encoded" % encoding .upper ()
246
+ return True
247
+ return validate_encoding
219
248
220
249
221
250
@message ('has UTF-8 byte order mark (BOM)' )
@@ -245,6 +274,7 @@ def _validate_notrailingws(fd):
245
274
return True
246
275
247
276
277
+ @needs_unicode
248
278
def _fix_notrailingws (src , dst ):
249
279
for line in src :
250
280
dst .write (line .rstrip ())
@@ -773,15 +803,29 @@ def notify(*args):
773
803
print (* args )
774
804
775
805
806
+ def get_encoding_rule (rules ):
807
+ for rule in rules :
808
+ try :
809
+ codecs .lookup (rule )
810
+ return rule
811
+ except LookupError :
812
+ continue
813
+
814
+
776
815
def validate_file_with_rules (fname , rules ):
816
+ encoding = get_encoding_rule (rules )
817
+ ENCODING_BY_FILE [fname ] = encoding
777
818
with open_file_for_read (fname ) as fd :
778
819
for rule in rules :
779
820
logging .debug ('Validating %s with %s..' , fname , rule )
780
821
fd .seek (0 )
781
822
func = globals ().get ('_validate_' + rule )
782
823
if not func :
783
- notify (rule , 'does not exist' )
784
- continue
824
+ if rule == encoding :
825
+ func = encoding_validator (encoding )
826
+ else :
827
+ notify (rule , 'does not exist' )
828
+ continue
785
829
options = CONFIG .get ('options' , {}).get (rule )
786
830
try :
787
831
if options :
@@ -837,6 +881,7 @@ def fix_file(fname, rules):
837
881
if CONFIG .get ('create_backup' , True ):
838
882
dirname , basename = os .path .split (fname )
839
883
shutil .copy2 (fname , os .path .join (dirname , CONFIG ['backup_filename' ].format (original = basename ))) # creates a backup
884
+ encoding = ENCODING_BY_FILE [fname ]
840
885
with open_file_for_read (fname ) as fd :
841
886
dst = fd
842
887
for rule in rules :
@@ -849,7 +894,10 @@ def fix_file(fname, rules):
849
894
src .seek (0 )
850
895
try :
851
896
if options :
897
+ options ['encoding' ] = encoding
852
898
func (src , dst , options )
899
+ elif func .needs_encoding :
900
+ func (src , dst , encoding )
853
901
else :
854
902
func (src , dst )
855
903
was_fixed &= True
@@ -863,7 +911,7 @@ def fix_file(fname, rules):
863
911
# b) some fix functions destroyed the code
864
912
if was_fixed and len (fixed ) > 0 :
865
913
with open_file_for_write (fname ) as fd :
866
- fd .write (fixed . encode () )
914
+ fd .write (fixed )
867
915
return True
868
916
else :
869
917
notify ('{0}: ERROR fixing file. File remained unchanged' .format (fname ))
0 commit comments