diff --git a/README.md b/README.md index 3b34c6d..0cb7bfb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # slack-backup-python Exporting slack channels, conversation using Web API -pip3 install requests +```pip3 install requests``` +```pip3 install slack-sdk``` ```python3 backup.py --token 'xoxp-8910951619-266447157124-433342496065-687a1ceaea9046688ff28bde02bada5c' --outDir './out'``` diff --git a/app_constants.py b/app_constants.py index 1a16033..966d356 100644 --- a/app_constants.py +++ b/app_constants.py @@ -1,6 +1,8 @@ CHANNEL_LIST_FILE = '/channels.json' CHANNELS_DIRECTORY = '/channels' +PRIVATE_CHANNELS_DIRECTORY = '/private-channels' CHANNEL_HISTORY_FILE = CHANNELS_DIRECTORY + '/{0}.json' +PRIVATE_CHANNEL_HISTORY_FILE = PRIVATE_CHANNELS_DIRECTORY + '/{0}.json' GROUP_LIST_FILE = '/groups.json' GROUPS_DIRECTORY = '/groups' GROUP_HISTORY_FILE = GROUPS_DIRECTORY + '/{0}.json' @@ -8,3 +10,6 @@ USER_LIST_FILE = '/users.json' ONE_TO_ONE_CONVERSATION_DIRECTORY = '/one-to-one' ONE_TO_ONE_CONVERSATION_HISTORY_FILE = ONE_TO_ONE_CONVERSATION_DIRECTORY+'/{0}-{1}.json' +FILE_LIST_FILE = '/files.json' +FILES_DIRECTORY = '/files' +FILES_FILENAME = FILES_DIRECTORY + '/{id} - {author} ({user}) at {date}: {name}' diff --git a/backup.py b/backup.py index 9b9de9b..94684cc 100644 --- a/backup.py +++ b/backup.py @@ -1,9 +1,11 @@ import os import argparse +import datetime import json import requests -import web_constants as WEB_CONSTANTS import app_constants as APP_CONSTANTS +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError parser = argparse.ArgumentParser( description='Backup Slack channel, conversation, Users, and direct messages.') @@ -17,8 +19,10 @@ args = parser.parse_args() token = args.token +auth_headers = {'Authorization': 'Bearer ' + token} outDir = args.outDir +client = WebClient(token=token) def getOutputPath(relativePath): return outDir+relativePath @@ -39,58 +43,110 @@ def readRequestJsonFile(): jsonObj = json.load(file) return jsonObj - -def writeJSONFile(jsonObj, filePath): - outputPath = getOutputPath(filePath) +def makedirPath(outputPath): dirPath = os.path.dirname(outputPath) if not os.path.exists(dirPath): os.makedirs(dirPath) +def writeJSONFile(jsonObj, filePath): + outputPath = getOutputPath(filePath) + makedirPath(outputPath) with open(outputPath, 'w+') as file: json.dump(jsonObj, file, indent=True) +def getUsers(): + response = client.users_list() + return response['members'] -def getChannels(): - response = requests.get(WEB_CONSTANTS.CHANNEL_LIST, - params={'token': token}) - return response.json()['channels'] - - -def getChannelHistory(channelId): - response = requests.get(WEB_CONSTANTS.CHANNEL_HISTORY, params={ - 'token': token, 'channel': channelId}) - return response.json() +def lookupUser(users, userID): + for u in users: + if u['id'] == userID: + return u +def getChannels(): + response = client.conversations_list(types='public_channel,private_channel') + return response['channels'] def getGroups(): - response = requests.get(WEB_CONSTANTS.GROUP_LIST, params={'token': token}) - return response.json()['groups'] - - -def getGroupHistory(groupId): - response = requests.get(WEB_CONSTANTS.GROUP_HISTORY, params={ - 'token': token, 'channel': groupId}) - return response.json() - + response = client.conversations_list(types='mpim') + return response['channels'] def getOneToOneConversations(): - # im for one to one conv. - response = requests.get(WEB_CONSTANTS.CONVERSATION_LIST, params={ - 'token': token, 'types': 'im'}) - return response.json()['channels'] - - -def getUsers(): - # im for one to one conv. - response = requests.get(WEB_CONSTANTS.USERS_LIST, params={'token': token}) - return response.json()['members'] - - -def getConversationHistory(conversationId): - response = requests.get(WEB_CONSTANTS.CONVERSATION_HISTORY, params={ - 'token': token, 'channel': conversationId}) - return response.json() - + response = client.conversations_list(types='im') + return response['channels'] + +def getConversationHistory(channelId): + params = { 'channel': channelId, 'count': 1000} + msgs = [] + while True: + response = client.conversations_history(**params) + msgs.extend(response['messages']) + if not response['has_more']: + break + + params['latest'] = msgs[-1]['ts'] + + for m in msgs: + if not ('reply_count' in m and m['reply_count'] > 0): + continue + + m['replies'] = [] + params = { 'channel': channelId, 'ts': m['ts'] } + while True: + response = client.conversations_replies(**params) + if not response['ok']: + break + + m['replies'].extend(response['messages']) + + if not response['has_more']: + break + params['cursor'] = response['response_metadata']['next_cursor'] + + return msgs + +def getFileList(): + params = { 'count': 100, 'show_files_hidden_by_limit': True, 'page': 1} + files = [] + while True: + response = client.files_list(**params) + if not response['ok']: + break + + files.extend(response['files']) + + if response['paging']['pages'] <= params['page']: + break + params['page'] += 1 + + return files + +def downloadFiles(users): + files = getFileList() + writeJSONFile(files, APP_CONSTANTS.FILE_LIST_FILE) + + skipped_files=0 + for file in files: + if file['mode'] == 'hidden_by_limit': + skipped_files+=1 + continue + + url = file['url_private_download'] + r = requests.get(url, headers={'Authorization': 'Bearer ' + token}, stream=True) + r.raise_for_status() + + file['date'] = datetime.datetime.fromtimestamp(file['timestamp']).strftime('%Y-%m-%d %H:%M:%S') + file['author'] = lookupUser(users, file['user'])['name'] + filename = APP_CONSTANTS.FILES_FILENAME.format(**file) + + print('Downloading to ' + filename) + + outputPath = getOutputPath(filename) + makedirPath(outputPath) + with open(outputPath, 'wb') as f: + for chunk in r.iter_content(chunk_size=32*1024): + f.write(chunk) + print(f"{skipped_files} files hidden by limit") def run(): channels = getChannels() @@ -99,9 +155,12 @@ def run(): for channel in channels: channelId = channel['id'] channelName = channel['name'] - channelHistory = getChannelHistory(channelId) - channelHistoryFilename = parseTemplatedFileName( - APP_CONSTANTS.CHANNEL_HISTORY_FILE, channelName) + channelHistory = getConversationHistory(channelId) + if channel['is_private']: + template = APP_CONSTANTS.PRIVATE_CHANNEL_HISTORY_FILE + else: + template = APP_CONSTANTS.CHANNEL_HISTORY_FILE + channelHistoryFilename = parseTemplatedFileName(template, channelName) writeJSONFile(channelHistory, channelHistoryFilename) groups = getGroups() @@ -111,7 +170,7 @@ def run(): groupId = group['id'] groupName = group['name'] - groupHistory = getGroupHistory(groupId) + groupHistory = getConversationHistory(groupId) groupHistoryFilename = parseTemplatedFileName( APP_CONSTANTS.GROUP_HISTORY_FILE, groupName) @@ -122,7 +181,7 @@ def run(): userIdToNameDict = {user['id']: user['name'] for user in users} - # Getting one to one conversation list + # Get one to one conversation list oneToOneConversations = getOneToOneConversations() writeJSONFile(oneToOneConversations, APP_CONSTANTS.ONE_TO_ONE_CONVERSATION_LIST_FILE) @@ -137,6 +196,7 @@ def run(): APP_CONSTANTS.ONE_TO_ONE_CONVERSATION_HISTORY_FILE, userName, userId) writeJSONFile(conversationHistory, conversationHistoryFilename) + downloadFiles(users) if __name__ == '__main__': run() diff --git a/web_constants.py b/web_constants.py deleted file mode 100644 index 093b1db..0000000 --- a/web_constants.py +++ /dev/null @@ -1,8 +0,0 @@ -BASE_URL = 'https://slack.com/api' -CHANNEL_LIST = BASE_URL + '/channels.list' -CHANNEL_HISTORY = BASE_URL + '/channels.history' -GROUP_LIST = BASE_URL + '/groups.list' -GROUP_HISTORY = BASE_URL + '/groups.history' -CONVERSATION_LIST = BASE_URL + '/conversations.list' -USERS_LIST = BASE_URL + '/users.list' -CONVERSATION_HISTORY = BASE_URL + '/conversations.history' \ No newline at end of file