【Rust】Shift-JIS ファイルを UTF-8 へ変換

encoding_rs クレートを利用。

バイトバッファ経由での処理となるが、Shift-JISは1〜2バイト、UTF-8は1〜4バイトで、変換後のバイト数が読めないので意外と面倒。

use std::process;
use std::error::Error;
use std::io::Write;
use std::io::Read;
use std::fs::File;
use encoding_rs;

fn main() {
    if let Err(err) = run() {
        println!("{}", err);
        process::exit(1);
    }
}

fn run() -> Result<(), Box<dyn Error>> {

    let mut file = File::open("input.txt")?;
    let mut output = File::create("output.txt")?;

    let mut decoder = encoding_rs::SHIFT_JIS.new_decoder();

    let mut input_buffer = [0u8; 2048];
    let mut buffer_bytes = [0u8; 4096];
    let mut buffer: &mut str = unsafe { std::mem::transmute(&mut buffer_bytes[..]) };

    let mut input_remaining = true;

    while input_remaining {

        // reads from the file into the input buffer
        let decoder_input_end = file.read(&mut input_buffer)?;
        input_remaining = decoder_input_end != 0;
        let mut decoder_input_start = 0usize;
        loop {
            // decodes bytes, writes them to the output buffer
            let (decoder_result, decoder_read, decoder_written, _) =
                decoder.decode_to_str(&input_buffer[decoder_input_start..decoder_input_end],
                                      &mut buffer,
                                      !input_remaining);
            decoder_input_start += decoder_read;

            // write to the output file
            output.write_all(&buffer.as_bytes()[..decoder_written])?;

            match decoder_result {
                encoding_rs::CoderResult::InputEmpty => { break; }
                encoding_rs::CoderResult::OutputFull => { continue; }
            }
        }
    }
    output.flush()?;

    Ok(())
}