Read a csv file from aws s3 using boto and pandas
Here is what I have done to successfully read the df
from a csv
on S3.
import pandas as pd
import boto3
bucket = "yourbucket"
file_name = "your_file.csv"
s3 = boto3.client('s3')
# 's3' is a key word. create connection to S3 using default config and all buckets within S3
obj = s3.get_object(Bucket= bucket, Key= file_name)
# get object and file (key) from bucket
initial_df = pd.read_csv(obj['Body']) # 'Body' is a key word
Maybe you can try to use pandas read_sql and pyathena:
from pyathena import connect
import pandas as pd
conn = connect(s3_staging_dir='s3://bucket/folder',region_name='region')
df = pd.read_sql('select * from database.table', conn)
This worked for me.
import pandas as pd
import boto3
import io
s3_file_key = 'data/test.csv'
bucket = 'data-bucket'
s3 = boto3.client('s3')
obj = s3.get_object(Bucket=bucket, Key=s3_file_key)
initial_df = pd.read_csv(io.BytesIO(obj['Body'].read()))