Index: tools/clang-format/clang-format-test.el =================================================================== --- tools/clang-format/clang-format-test.el +++ tools/clang-format/clang-format-test.el @@ -31,8 +31,8 @@ (with-current-buffer stdout (insert " - - + + ")) 0))))) @@ -59,14 +59,14 @@ '("-output-replacements-xml" "-assume-filename" "foo.cpp" "-style" "file" ;; Length of the UTF-8 byte-order mark. - "-offset" "3" + "-offset" "0" ;; We have two lines with 2×2 bytes for the umlauts, - ;; 2 bytes for the line ending, and 3 bytes for the + ;; 1 byte for the line ending, and 3 bytes for the ;; other ASCII characters each. - "-length" "18" + "-length" "16" ;; Length of a single line (without line ending) plus ;; BOM. - "-cursor" "10"))))))) + "-cursor" "7"))))))) (ert-deftest clang-format-buffer--process-encoding () "Tests that text is sent to the clang-format process in the @@ -105,6 +105,23 @@ (clang-format-buffer)) (should (equal (buffer-string) "ä\n")) (should (eobp))) - (should (equal call-process-inputs '("ef bb bf c3 a4 0d 0a "))))) + (should (equal call-process-inputs '("c3 a4 0a "))))) + +(ert-deftest clang-format-buffer--end-to-end () + "End-to-end test for ‘clang-format-buffer’. +Actually calls the clang-format binary." + (skip-unless (file-executable-p clang-format-executable)) + (with-temp-buffer + (let ((buffer-file-name "foo.cpp") + (buffer-file-coding-system 'utf-8-with-signature-dos) + (default-process-coding-system 'latin-1-unix)) + (insert "ä =ö;\nü= ß;\n") + (goto-char (point-min)) + (end-of-line) + (clang-format-buffer)) + (should (equal (buffer-string) "ä = ö;\nü = ß;\n")) + (should (eolp)) + (should (equal (buffer-substring (point) (point-max)) + "\nü = ß;\n")))) ;;; clang-format-test.el ends here Index: tools/clang-format/clang-format.el =================================================================== --- tools/clang-format/clang-format.el +++ tools/clang-format/clang-format.el @@ -95,9 +95,10 @@ (defun clang-format--replace (offset length &optional text) "Replace the region defined by OFFSET and LENGTH with TEXT. OFFSET and LENGTH are measured in bytes, not characters. OFFSET -is a zero-based file offset." - (let ((start (clang-format--filepos-to-bufferpos offset 'exact)) - (end (clang-format--filepos-to-bufferpos (+ offset length) 'exact))) +is a zero-based file offset, assuming ‘utf-8-unix’ coding." + (let ((start (clang-format--filepos-to-bufferpos offset 'exact 'utf-8-unix)) + (end (clang-format--filepos-to-bufferpos (+ offset length) 'exact + 'utf-8-unix))) (goto-char start) (delete-region start end) (when text @@ -130,15 +131,18 @@ (unless style (setq style clang-format-style)) - (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate)) - (file-end (clang-format--bufferpos-to-filepos end 'approximate)) - (cursor (clang-format--bufferpos-to-filepos (point) 'exact)) + (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate + 'utf-8-unix)) + (file-end (clang-format--bufferpos-to-filepos end 'approximate + 'utf-8-unix)) + (cursor (clang-format--bufferpos-to-filepos (point) 'exact 'utf-8-unix)) (temp-buffer (generate-new-buffer " *clang-format-temp*")) (temp-file (make-temp-file "clang-format")) - (default-process-coding-system - ;; Output is XML, which is always UTF-8. Input encoding should match - ;; the file encoding, otherwise the offsets calculated above are off. - (cons 'utf-8-unix buffer-file-coding-system))) + ;; Output is XML, which is always UTF-8. Input encoding should match + ;; the encoding used to convert between buffer and file positions, + ;; otherwise the offsets calculated above are off. For simplicity, we + ;; always use ‘utf-8-unix’ and ignore the buffer coding system. + (default-process-coding-system '(utf-8-unix . utf-8-unix))) (unwind-protect (let ((status (call-process-region nil nil clang-format-executable @@ -168,7 +172,8 @@ (dolist (rpl replacements) (apply #'clang-format--replace rpl))) (when cursor - (goto-char (clang-format--filepos-to-bufferpos cursor 'exact))) + (goto-char (clang-format--filepos-to-bufferpos cursor 'exact + 'utf-8-unix))) (if incomplete-format (message "(clang-format: incomplete (syntax errors)%s)" stderr) (message "(clang-format: success%s)" stderr))))