Accessing folders, subfolders and subfiles using PyDrive (Python)
Here's my take on getting all the files within the subfolders... This lets you query by the path you set. This is different because it doesn't make 1 request for each folder. It creates batches of folders to query.
Batch Snippet:
'some_id_1234' in parents or 'some_id_1235' in parents or 'some_id_1236' in parents or 'some_id_1237' in parents or 'some_id_1238' in parents or 'some_id_1239' in parents or 'some_id_1240' in parents and trashed=false
You can then query the files in more than 1 folder at a time. Your query can't be too large in size, so anything over 300+ folders ('some_id_1234' in parents'), you will start to get errors, so keep the batch size around 250.
Say the folder you want to check has 1,110 folders and you set your batch size to 250. It will then make 5 separate requests to query all the folders.
-Request 1 queries 250 folders
-Request 2 queries 250 folders
-Request 3 queries 250 folders
-Request 4 queries 250 folders
-Request 5 queries 110 folders
Then any subfolders inside there will be created into batches and queried recursively.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
def parse_gdrive_path(gd_path):
if ':' in gd_path:
gd_path = gd_path.split(':')[1]
gd_path = gd_path.replace('\\', '/').replace('//', '/')
if gd_path.startswith('/'):
gd_path = gd_path[1:]
if gd_path.endswith('/'):
gd_path = gd_path[:-1]
return gd_path.split('/')
def resolve_path_to_id(folder_path):
_id = 'root'
folder_path = parse_gdrive_path(folder_path)
for idx, folder in enumerate(folder_path):
folder_list = gdrive.ListFile({'q': f"'{_id}' in parents and title='{folder}' and trashed=false and mimeType='application/vnd.google-apps.folder'", 'fields': 'items(id, title, mimeType)'}).GetList()
_id = folder_list[0]['id']
title = folder_list[0]['title']
if idx == (len(folder_path) - 1) and folder == title:
return _id
return _id
def get_folder_files(folder_ids, batch_size=100):
base_query = "'{target_id}' in parents"
target_queries = []
query = ''
for idx, folder_id in enumerate(folder_ids):
query += base_query.format(target_id=folder_id)
if len(folder_ids) == 1 or idx > 0 and idx % batch_size == 0:
target_queries.append(query)
query = ''
elif idx != len(folder_ids)-1:
query += " or "
else:
target_queries.append(query)
for query in target_queries:
for f in gdrive.ListFile({'q': f"{query} and trashed=false", 'fields': 'items(id, title, mimeType, version)'}).GetList():
yield f
def get_files(folder_path=None, target_ids=None, files=[]):
if target_ids is None:
target_ids = [resolve_path_to_id(folder_path)]
file_list = get_folder_files(folder_ids=target_ids, batch_size=250)
subfolder_ids = []
for f in file_list:
if f['mimeType'] == 'application/vnd.google-apps.folder':
subfolder_ids.append(f['id'])
else:
files.append(f['title'])
if len(subfolder_ids) > 0:
get_files(target_ids=subfolder_ids)
return files
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
gdrive = GoogleDrive(gauth)
file_list = get_files('/Some/Folder/Path')
for f in file_list:
print(f)
So for example:
Your google drive contains this:
(folder) Root
(folder) Docs
(subfolder) Notes
(subfolder) School
(file) notes_1.txt
(file) notes_2.txt
(file) notes_3.txt
(file) notes_4.txt
(file) notes_5.txt
(subfolder) Important
(file) important_notes_1.txt
(file) important_notes_2.txt
(file) important_notes_3.txt
(subfolder) Old Notes
(file) old_1.txt
(file) old_2.txt
(file) old_3.txt
(subfolder) Secrets
(file) secret_1.txt
(file) secret_2.txt
(file) secret_3.txt
(folder) Stuff
(file) nothing.txt
(file) this-will-not-be-found.txt
And you want to get all the files from the "Notes" folder/subfolders
You would do:
file_list = get_files('/Docs/Notes')
for f in file_list:
print(f)
Output:
>> notes_1.txt
>> notes_2.txt
>> notes_3.txt
>> notes_4.txt
>> notes_5.txt
>> important_notes_1.txt
>> important_notes_2.txt
>> important_notes_3.txt
>> old_1.txt
>> old_2.txt
>> old_3.txt
>> secret_1.txt
>> secret_2.txt
>> secret_3.txt
Hopefully this helps someone :)
It requires iteration with list of files. Based on this, the code fetches the title of file and url link of the each files with in the folder. The code is adjustable to get the specific folder by supplying the id
of the folder such as ListFolder('id')
. The given below example is querying the root
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth() # Creates local webserver and auto handles authentication
#Make GoogleDrive instance with Authenticated GoogleAuth instance
drive = GoogleDrive(gauth)
def ListFolder(parent):
filelist=[]
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % parent}).GetList()
for f in file_list:
if f['mimeType']=='application/vnd.google-apps.folder': # if folder
filelist.append({"id":f['id'],"title":f['title'],"list":ListFolder(f['id'])})
else:
filelist.append({"title":f['title'],"title1":f['alternateLink']})
return filelist
ListFolder('root')
Your code is absolutely correct. But with the default settings of Pydrive , you have access to only the root level files and folders. Changing oauth_scope in settings.yaml file fixes this issue.
client_config_backend: settings
client_config:
client_id: XXX
client_secret: XXXX
save_credentials: True
save_credentials_backend: file
save_credentials_file: credentials.json
get_refresh_token: True
oauth_scope:
- https://www.googleapis.com/auth/drive
- https://www.googleapis.com/auth/drive.metadata