From a1e4a1507dc44fe8bb1bcea875cdcfcebc1b342b Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Fri, 20 Nov 2020 02:48:35 -0800 Subject: [PATCH 1/7] Update to Authorization header and cookie for authentication --- backup.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/backup.py b/backup.py index 9b9de9b..6b9adac 100644 --- a/backup.py +++ b/backup.py @@ -11,12 +11,20 @@ parser.add_argument('-t', '--token', dest='token',required=True, help='Slack api Access token') +parser.add_argument('-c', '--cookie', dest='cookie',required=False, + help='Slack user cookie') + parser.add_argument('-od', '--outDir', dest='outDir',required=False,default='./output', help='Output directory to store JSON backup files.') args = parser.parse_args() token = args.token +auth_headers = {'Authorization': 'Bearer ' + token} +if args.cookie: + auth_cookies = {'d': args.cookie} +else: + auth_cookies = {} outDir = args.outDir @@ -52,43 +60,44 @@ def writeJSONFile(jsonObj, filePath): def getChannels(): response = requests.get(WEB_CONSTANTS.CHANNEL_LIST, - params={'token': token}) + headers=auth_headers, cookies=auth_cookies) + print(response.json()); return response.json()['channels'] def getChannelHistory(channelId): response = requests.get(WEB_CONSTANTS.CHANNEL_HISTORY, params={ - 'token': token, 'channel': channelId}) + 'channel': channelId}, headers=auth_headers, cookies=auth_cookies) return response.json() def getGroups(): - response = requests.get(WEB_CONSTANTS.GROUP_LIST, params={'token': token}) + response = requests.get(WEB_CONSTANTS.GROUP_LIST, headers=auth_headers, cookies=auth_cookies) return response.json()['groups'] def getGroupHistory(groupId): response = requests.get(WEB_CONSTANTS.GROUP_HISTORY, params={ - 'token': token, 'channel': groupId}) + 'channel': groupId}, headers=auth_headers, cookies=auth_cookies) return response.json() def getOneToOneConversations(): # im for one to one conv. response = requests.get(WEB_CONSTANTS.CONVERSATION_LIST, params={ - 'token': token, 'types': 'im'}) + 'types': 'im'}, headers=auth_headers, cookies=auth_cookies) return response.json()['channels'] def getUsers(): # im for one to one conv. - response = requests.get(WEB_CONSTANTS.USERS_LIST, params={'token': token}) + response = requests.get(WEB_CONSTANTS.USERS_LIST, headers=auth_headers, cookies=auth_cookies) return response.json()['members'] def getConversationHistory(conversationId): response = requests.get(WEB_CONSTANTS.CONVERSATION_HISTORY, params={ - 'token': token, 'channel': conversationId}) + 'channel': conversationId}, headers=auth_headers, cookies=auth_cookies) return response.json() From baffbb2d81982651c5e80cbfbc841f279eed02cb Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Fri, 20 Nov 2020 03:14:07 -0800 Subject: [PATCH 2/7] Save entire scrollback --- backup.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/backup.py b/backup.py index 6b9adac..53ca41f 100644 --- a/backup.py +++ b/backup.py @@ -66,9 +66,18 @@ def getChannels(): def getChannelHistory(channelId): - response = requests.get(WEB_CONSTANTS.CHANNEL_HISTORY, params={ - 'channel': channelId}, headers=auth_headers, cookies=auth_cookies) - return response.json() + params = { 'channel': channelId, 'count': 1000} + msgs = [] + while True: + response = requests.get(WEB_CONSTANTS.CHANNEL_HISTORY, params=params, + headers=auth_headers, cookies=auth_cookies) + rsp = response.json() + msgs.extend(rsp['messages']) + if not rsp['has_more']: + break + + params['latest'] = msgs[-1]['ts'] + return msgs def getGroups(): @@ -77,10 +86,18 @@ def getGroups(): def getGroupHistory(groupId): - response = requests.get(WEB_CONSTANTS.GROUP_HISTORY, params={ - 'channel': groupId}, headers=auth_headers, cookies=auth_cookies) - return response.json() - + params = { 'channel': groupId, 'count': 1000} + msgs = [] + while True: + response = requests.get(WEB_CONSTANTS.GROUP_HISTORY, params=params, + headers=auth_headers, cookies=auth_cookies) + rsp = response.json() + msgs.extend(rsp['messages']) + if not rsp['has_more']: + break + + params['latest'] = msgs[-1]['ts'] + return msgs def getOneToOneConversations(): # im for one to one conv. @@ -96,9 +113,18 @@ def getUsers(): def getConversationHistory(conversationId): - response = requests.get(WEB_CONSTANTS.CONVERSATION_HISTORY, params={ - 'channel': conversationId}, headers=auth_headers, cookies=auth_cookies) - return response.json() + params = { 'channel': conversationId, 'limit': 1000} + msgs = [] + while True: + response = requests.get(WEB_CONSTANTS.CONVERSATION_HISTORY, params=params, + headers=auth_headers, cookies=auth_cookies) + rsp = response.json() + msgs.extend(rsp['messages']) + if not rsp['has_more']: + break + + params['cursor'] = rsp['response_metadata']['next_cursor'] + return msgs def run(): From 498ec4148a6dd351ea563ae782ed5848ba811227 Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Mon, 5 Jul 2021 01:41:03 -0700 Subject: [PATCH 3/7] Update to latest API --- app_constants.py | 2 + backup.py | 96 +++++++++++++----------------------------------- web_constants.py | 8 ---- 3 files changed, 28 insertions(+), 78 deletions(-) delete mode 100644 web_constants.py diff --git a/app_constants.py b/app_constants.py index 1a16033..59d7869 100644 --- a/app_constants.py +++ b/app_constants.py @@ -1,6 +1,8 @@ CHANNEL_LIST_FILE = '/channels.json' CHANNELS_DIRECTORY = '/channels' +PRIVATE_CHANNELS_DIRECTORY = '/private-channels' CHANNEL_HISTORY_FILE = CHANNELS_DIRECTORY + '/{0}.json' +PRIVATE_CHANNEL_HISTORY_FILE = PRIVATE_CHANNELS_DIRECTORY + '/{0}.json' GROUP_LIST_FILE = '/groups.json' GROUPS_DIRECTORY = '/groups' GROUP_HISTORY_FILE = GROUPS_DIRECTORY + '/{0}.json' diff --git a/backup.py b/backup.py index 53ca41f..18d097a 100644 --- a/backup.py +++ b/backup.py @@ -2,8 +2,9 @@ import argparse import json import requests -import web_constants as WEB_CONSTANTS import app_constants as APP_CONSTANTS +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError parser = argparse.ArgumentParser( description='Backup Slack channel, conversation, Users, and direct messages.') @@ -11,9 +12,6 @@ parser.add_argument('-t', '--token', dest='token',required=True, help='Slack api Access token') -parser.add_argument('-c', '--cookie', dest='cookie',required=False, - help='Slack user cookie') - parser.add_argument('-od', '--outDir', dest='outDir',required=False,default='./output', help='Output directory to store JSON backup files.') @@ -21,12 +19,9 @@ token = args.token auth_headers = {'Authorization': 'Bearer ' + token} -if args.cookie: - auth_cookies = {'d': args.cookie} -else: - auth_cookies = {} outDir = args.outDir +client = WebClient(token=token) def getOutputPath(relativePath): return outDir+relativePath @@ -57,73 +52,32 @@ def writeJSONFile(jsonObj, filePath): with open(outputPath, 'w+') as file: json.dump(jsonObj, file, indent=True) +def getUsers(): + response = client.users_list() + return response['members'] def getChannels(): - response = requests.get(WEB_CONSTANTS.CHANNEL_LIST, - headers=auth_headers, cookies=auth_cookies) - print(response.json()); - return response.json()['channels'] - - -def getChannelHistory(channelId): - params = { 'channel': channelId, 'count': 1000} - msgs = [] - while True: - response = requests.get(WEB_CONSTANTS.CHANNEL_HISTORY, params=params, - headers=auth_headers, cookies=auth_cookies) - rsp = response.json() - msgs.extend(rsp['messages']) - if not rsp['has_more']: - break - - params['latest'] = msgs[-1]['ts'] - return msgs - + response = client.conversations_list(types='public_channel,private_channel') + return response['channels'] def getGroups(): - response = requests.get(WEB_CONSTANTS.GROUP_LIST, headers=auth_headers, cookies=auth_cookies) - return response.json()['groups'] - - -def getGroupHistory(groupId): - params = { 'channel': groupId, 'count': 1000} - msgs = [] - while True: - response = requests.get(WEB_CONSTANTS.GROUP_HISTORY, params=params, - headers=auth_headers, cookies=auth_cookies) - rsp = response.json() - msgs.extend(rsp['messages']) - if not rsp['has_more']: - break - - params['latest'] = msgs[-1]['ts'] - return msgs + response = client.conversations_list(types='mpim') + return response['channels'] def getOneToOneConversations(): - # im for one to one conv. - response = requests.get(WEB_CONSTANTS.CONVERSATION_LIST, params={ - 'types': 'im'}, headers=auth_headers, cookies=auth_cookies) - return response.json()['channels'] - + response = client.conversations_list(types='im') + return response['channels'] -def getUsers(): - # im for one to one conv. - response = requests.get(WEB_CONSTANTS.USERS_LIST, headers=auth_headers, cookies=auth_cookies) - return response.json()['members'] - - -def getConversationHistory(conversationId): - params = { 'channel': conversationId, 'limit': 1000} +def getConversationHistory(channelId): + params = { 'channel': channelId, 'count': 1000} msgs = [] while True: - response = requests.get(WEB_CONSTANTS.CONVERSATION_HISTORY, params=params, - headers=auth_headers, cookies=auth_cookies) - rsp = response.json() - msgs.extend(rsp['messages']) - if not rsp['has_more']: + response = client.conversations_history(**params) + msgs.extend(response['messages']) + if not response['has_more']: break - params['cursor'] = rsp['response_metadata']['next_cursor'] + params['latest'] = msgs[-1]['ts'] return msgs @@ -134,9 +88,12 @@ def run(): for channel in channels: channelId = channel['id'] channelName = channel['name'] - channelHistory = getChannelHistory(channelId) - channelHistoryFilename = parseTemplatedFileName( - APP_CONSTANTS.CHANNEL_HISTORY_FILE, channelName) + channelHistory = getConversationHistory(channelId) + if channel['is_private']: + template = APP_CONSTANTS.PRIVATE_CHANNEL_HISTORY_FILE + else: + template = APP_CONSTANTS.CHANNEL_HISTORY_FILE + channelHistoryFilename = parseTemplatedFileName(template, channelName) writeJSONFile(channelHistory, channelHistoryFilename) groups = getGroups() @@ -146,7 +103,7 @@ def run(): groupId = group['id'] groupName = group['name'] - groupHistory = getGroupHistory(groupId) + groupHistory = getConversationHistory(groupId) groupHistoryFilename = parseTemplatedFileName( APP_CONSTANTS.GROUP_HISTORY_FILE, groupName) @@ -157,7 +114,7 @@ def run(): userIdToNameDict = {user['id']: user['name'] for user in users} - # Getting one to one conversation list + # Get one to one conversation list oneToOneConversations = getOneToOneConversations() writeJSONFile(oneToOneConversations, APP_CONSTANTS.ONE_TO_ONE_CONVERSATION_LIST_FILE) @@ -172,6 +129,5 @@ def run(): APP_CONSTANTS.ONE_TO_ONE_CONVERSATION_HISTORY_FILE, userName, userId) writeJSONFile(conversationHistory, conversationHistoryFilename) - if __name__ == '__main__': run() diff --git a/web_constants.py b/web_constants.py deleted file mode 100644 index 093b1db..0000000 --- a/web_constants.py +++ /dev/null @@ -1,8 +0,0 @@ -BASE_URL = 'https://slack.com/api' -CHANNEL_LIST = BASE_URL + '/channels.list' -CHANNEL_HISTORY = BASE_URL + '/channels.history' -GROUP_LIST = BASE_URL + '/groups.list' -GROUP_HISTORY = BASE_URL + '/groups.history' -CONVERSATION_LIST = BASE_URL + '/conversations.list' -USERS_LIST = BASE_URL + '/users.list' -CONVERSATION_HISTORY = BASE_URL + '/conversations.history' \ No newline at end of file From d065ea99d7e0756c3f9b68b14e1d98f2b9c8921b Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Wed, 2 Feb 2022 14:43:39 -0800 Subject: [PATCH 4/7] Don't need requests anymore --- README.md | 2 +- backup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 3b34c6d..85695b6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # slack-backup-python Exporting slack channels, conversation using Web API -pip3 install requests +```pip3 install slack-sdk``` ```python3 backup.py --token 'xoxp-8910951619-266447157124-433342496065-687a1ceaea9046688ff28bde02bada5c' --outDir './out'``` diff --git a/backup.py b/backup.py index 18d097a..6db568c 100644 --- a/backup.py +++ b/backup.py @@ -1,7 +1,6 @@ import os import argparse import json -import requests import app_constants as APP_CONSTANTS from slack_sdk import WebClient from slack_sdk.errors import SlackApiError From 2edc90b8910e5349aa13f37c08601587a54e3d38 Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Fri, 2 Sep 2022 20:37:52 -0700 Subject: [PATCH 5/7] Backup files as well as messages --- README.md | 1 + app_constants.py | 3 +++ backup.py | 52 +++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 85695b6..0cb7bfb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # slack-backup-python Exporting slack channels, conversation using Web API +```pip3 install requests``` ```pip3 install slack-sdk``` ```python3 backup.py --token 'xoxp-8910951619-266447157124-433342496065-687a1ceaea9046688ff28bde02bada5c' --outDir './out'``` diff --git a/app_constants.py b/app_constants.py index 59d7869..966d356 100644 --- a/app_constants.py +++ b/app_constants.py @@ -10,3 +10,6 @@ USER_LIST_FILE = '/users.json' ONE_TO_ONE_CONVERSATION_DIRECTORY = '/one-to-one' ONE_TO_ONE_CONVERSATION_HISTORY_FILE = ONE_TO_ONE_CONVERSATION_DIRECTORY+'/{0}-{1}.json' +FILE_LIST_FILE = '/files.json' +FILES_DIRECTORY = '/files' +FILES_FILENAME = FILES_DIRECTORY + '/{id} - {author} ({user}) at {date}: {name}' diff --git a/backup.py b/backup.py index 6db568c..0219359 100644 --- a/backup.py +++ b/backup.py @@ -1,6 +1,8 @@ import os import argparse +import datetime import json +import requests import app_constants as APP_CONSTANTS from slack_sdk import WebClient from slack_sdk.errors import SlackApiError @@ -41,13 +43,14 @@ def readRequestJsonFile(): jsonObj = json.load(file) return jsonObj - -def writeJSONFile(jsonObj, filePath): - outputPath = getOutputPath(filePath) +def makedirPath(outputPath): dirPath = os.path.dirname(outputPath) if not os.path.exists(dirPath): os.makedirs(dirPath) +def writeJSONFile(jsonObj, filePath): + outputPath = getOutputPath(filePath) + makedirPath(outputPath) with open(outputPath, 'w+') as file: json.dump(jsonObj, file, indent=True) @@ -55,6 +58,11 @@ def getUsers(): response = client.users_list() return response['members'] +def lookupUser(users, userID): + for u in users: + if u['id'] == userID: + return u + def getChannels(): response = client.conversations_list(types='public_channel,private_channel') return response['channels'] @@ -79,6 +87,42 @@ def getConversationHistory(channelId): params['latest'] = msgs[-1]['ts'] return msgs +def getFileList(): + params = { 'count': 100, 'show_files_hidden_by_limit': True, 'page': 1} + files = [] + while True: + response = client.files_list(**params) + if not response['ok']: + break + + files.extend(response['files']) + + if response['paging']['pages'] <= params['page']: + break + params['page'] += 1 + + return files + +def downloadFiles(users): + files = getFileList() + writeJSONFile(files, APP_CONSTANTS.FILE_LIST_FILE) + + for file in files: + url = file['url_private_download'] + r = requests.get(url, headers={'Authorization': 'Bearer ' + token}, stream=True) + r.raise_for_status() + + file['date'] = datetime.datetime.fromtimestamp(file['timestamp']).strftime('%Y-%m-%d %H:%M:%S') + file['author'] = lookupUser(users, file['user'])['name'] + filename = APP_CONSTANTS.FILES_FILENAME.format(**file) + + print('Downloading to ' + filename) + + outputPath = getOutputPath(filename) + makedirPath(outputPath) + with open(outputPath, 'wb') as f: + for chunk in r.iter_content(chunk_size=32*1024): + f.write(chunk) def run(): channels = getChannels() @@ -128,5 +172,7 @@ def run(): APP_CONSTANTS.ONE_TO_ONE_CONVERSATION_HISTORY_FILE, userName, userId) writeJSONFile(conversationHistory, conversationHistoryFilename) + downloadFiles(users) + if __name__ == '__main__': run() From 546db1cfb6ea8007d3a970c831a83328d1f05060 Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Fri, 2 Sep 2022 20:48:31 -0700 Subject: [PATCH 6/7] Fix collection of threads --- backup.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/backup.py b/backup.py index 0219359..7f0435e 100644 --- a/backup.py +++ b/backup.py @@ -85,6 +85,24 @@ def getConversationHistory(channelId): break params['latest'] = msgs[-1]['ts'] + + for m in msgs: + if not ('reply_count' in m and m['reply_count'] > 0): + continue + + m['replies'] = [] + params = { 'channel': channelId, 'ts': m['ts'] } + while True: + response = client.conversations_replies(**params) + if not response['ok']: + break + + m['replies'].extend(response['messages']) + + if not response['has_more']: + break + params['cursor'] = response['response_metadata']['next_cursor'] + return msgs def getFileList(): From 7f0cad8124dbe05c58ea857b4ef1c085c83b010a Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Sun, 25 Dec 2022 18:04:04 -0800 Subject: [PATCH 7/7] Handle skipped files --- backup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backup.py b/backup.py index 7f0435e..94684cc 100644 --- a/backup.py +++ b/backup.py @@ -125,7 +125,12 @@ def downloadFiles(users): files = getFileList() writeJSONFile(files, APP_CONSTANTS.FILE_LIST_FILE) + skipped_files=0 for file in files: + if file['mode'] == 'hidden_by_limit': + skipped_files+=1 + continue + url = file['url_private_download'] r = requests.get(url, headers={'Authorization': 'Bearer ' + token}, stream=True) r.raise_for_status() @@ -141,6 +146,7 @@ def downloadFiles(users): with open(outputPath, 'wb') as f: for chunk in r.iter_content(chunk_size=32*1024): f.write(chunk) + print(f"{skipped_files} files hidden by limit") def run(): channels = getChannels()