file operation

Posted by neverset on November 18, 2022

There are libraries that more efficient than with open function to deal with files in file system

Path.open

from pathlib2 import Path
example_path = Path('./info.csv')
with example_path.open() as f:
   print(f.readline())
   print(f.read())

Path.read_text(): read file in text format Path.read_bytes(): read file in binary format Path.write_text(): write file in text format Path.write_bytes(): write file in binay format

p = Path('my_binary_file')
p.write_bytes(b'Binary file contents')

fileinput

fileinput.input

as default fileinput take stdin as input source

import fileinput
for line in fileinput.input():
    print(line)

to read from file

import fileinput
with fileinput.input(files=('info1.csv', 'info2.csv')) as file:
    for line in file:
        print(f'{fileinput.filename()} 第{fileinput.lineno()}行: {line}', end='')

it works well with glob

import fileinput
import glob

for line in fileinput.input(glob.glob("*.csv")):
    if fileinput.isfirstline():
        print(f'Reading {fileinput.filename()}...'.center(50,'-'))
    print(str(fileinput.filelineno()) + ': ' + line.upper(), end=""

codecs

it makes encoding/decoding automatically, if the source format is unknown

import codecs

src="......\\xxxx.csv"
dst="......\\xxx_utf8.csv"

def ReadFile(filePath):
    with codecs.open(filePath, "r") as f:
        return f.read()

def WriteFile(filePath, u, encoding="utf-8"):
    # with codecs.open(filePath,"w",encoding) as f:
    with codecs.open(filePath, "wb") as f:
        f.write(u.encode(encoding, errors="ignore"))

def CSV_2_UTF8(src, dst):
    content = ReadFile(src)
    WriteFile(dst, content, encoding="utf-8")
    
CSV_2_UTF8(src, dst)

lazylines

used for lazy reading large files without need of load all data into memory

import lazylines

with lazylines.open('large_file.txt') as lines:
    for line in lines:
        print(line.strip())