files python you code example

Example 1: python read file

# Basic syntax:
with open('/path/to/filename.extension', 'open_mode') as filename:
  file_data = filename.readlines()	# Or filename.read() 
# Where:
#	- open imports the file as a file object which then needs to be read
#		with one of the read options
#	- readlines() imports each line of the file as an element in a list
#	- read() imports the file contents as one long new-line-separated 
#		string
#	- open_mode can be one of:
#		- "r" = Read which opens a file for reading (error if the file 
#			doesn't exist)
#		- "a" = Append which opens a file for appending (creates the 
#			file if it doesn't exist)
#		- "w" = Write which opens a file for writing (creates the file 
#			if it doesn't exist)
#		- "x" = Create which creates the specified file (returns an error
#			if the file exists)
# Note, "with open() as" is recommended because the file is closed 
#	automatically so you don't have to remember to use file.close()

# Basic syntax for a delimited file with multiple fields:
import csv
with open('/path/to/filename.extension', 'open_mode') as filename:
	file_data = csv.reader(filename, delimiter='delimiter')
    data_as_list = list(file_data)
# Where:
#	- csv.reader can be used for files that use any delimiter, not just
#		commas, e.g.: '\t', '|', ';', etc. (It's a bit of a misnomer)
#	- csv.reader() returns a csv.reader object which can be iterated 
#		over, directly converted to a list, and etc. 

# Importing data using Numpy:
import numpy as np
data = np.loadtxt('/path/to/filename.extension',
				delimiter=',', 	# String used to separate values
				skiprows=2, 	# Number of rows to skip
				usecols=[0,2], 	# Specify which columns to read
				dtype=str) 		# The type of the resulting array

# Importing data using Pandas:
import pandas as pd
data = pd.read_csv('/path/to/filename.extension',
				nrows=5, 		# Number of rows of file to read
				header=None, 	# Row number to use as column names 
	            sep='\t', 		# Delimiter to use 
	            comment='#', 	# Character to split comments
				na_values=[""])	# String to recognize as NA/NaN

# Note, pandas can also import excel files with pd.read_excel()

Example 2: read files and write into another files python

import sys
import glob
import os.path

list_of_files = glob.glob('/Users/Emily/Topics/*.txt') #500 files

for file_name in list_of_files:
    print(file_name)

    # This needs to be done *inside the loop*
    f= open(file_name, 'r')
    lst = []
    for line in f:
       line.strip()
       line = line.replace("\n" ,'')
       line = line.replace("//" , '')
       lst.append(line)
    f.close()

    f=open(os.path.join('/Users/Emily/UpdatedTopics',
    os.path.basename(file_name)) , 'w')

    for line in lst:
       f.write(line)
    f.close()