data import code example
Example: python import data
# Basic syntax:
with open('/path/to/filename.extension', 'open_mode') as filename:
file_data = filename.readlines() # Or filename.read()
# Where:
# - open imports the file as a file object which then needs to be read
# with one of the read options
# - readlines() imports each line of the file as an element in a list
# - read() imports the file contents as one long new-line-separated
# string
# - open_mode can be one of:
# - "r" = Read which opens a file for reading (error if the file
# doesn't exist)
# - "a" = Append which opens a file for appending (creates the
# file if it doesn't exist)
# - "w" = Write which opens a file for writing (creates the file
# if it doesn't exist)
# - "x" = Create which creates the specified file (returns an error
# if the file exists)
# Note, "with open() as" is recommended because the file is closed
# automatically so you don't have to remember to use file.close()
# Note, if you're getting unwanted newline characters with this approach,
# you can run: file_data = filename.read().splitlines() instead
# Basic syntax for a delimited file with multiple fields:
import csv
with open('/path/to/filename.extension', 'open_mode') as filename:
file_data = csv.reader(filename, delimiter='delimiter')
data_as_list = list(file_data)
# Where:
# - csv.reader can be used for files that use any delimiter, not just
# commas, e.g.: '\t', '|', ';', etc. (It's a bit of a misnomer)
# - csv.reader() returns a csv.reader object which can be iterated
# over, directly converted to a list, and etc.
# Importing data using Numpy:
import numpy as np
data = np.loadtxt('/path/to/filename.extension',
delimiter=',', # String used to separate values
skiprows=2, # Number of rows to skip
usecols=[0,2], # Specify which columns to read
dtype=str) # The type of the resulting array
# Importing data using Pandas:
import pandas as pd
data = pd.read_csv('/path/to/filename.extension',
nrows=5, # Number of rows of file to read
header=None, # Row number to use as column names
sep='\t', # Delimiter to use
comment='#', # Character to split comments
na_values=[""]) # String to recognize as NA/NaN
# Note, pandas can also import excel files with pd.read_excel()