1
1
#! /usr/bin/env bash
2
2
3
- # This script downloads the .py and .sh code from the open source repo
4
- # Then it converts the code to markdown files
5
-
6
- # Requres two ABSOLUTE paths to the final destination and source directories
7
- # ex: ./make_mdx.sh /Users/potter/dest /Users/potter/src
8
-
9
- # make temp directory
10
- WORK_DIR=$( mktemp -d)
11
-
12
- PY_DEST_REPO=" docs/source/ingest/destination_connectors/code/python/"
13
- SH_DEST_REPO=" docs/source/ingest/destination_connectors/code/bash/"
14
- DEST_TARGET_DIR=$1 # first argument to script
15
- PY_SRC_REPO=" docs/source/ingest/source_connectors/code/python/"
16
- SH_SRC_REPO=" docs/source/ingest/source_connectors/code/bash/"
17
- SRC_TARGET_DIR=$2 # second argument to script
18
-
19
- # Clone the correct directories in the open source repo
20
- cd " $WORK_DIR "
21
- git init
22
- git remote add -f origin https://github.com/Unstructured-IO/unstructured
23
- git config core.sparseCheckout true
24
- echo " $PY_DEST_REPO " >> .git/info/sparse-checkout
25
- echo " $SH_DEST_REPO " >> .git/info/sparse-checkout
26
- echo " $PY_SRC_REPO " >> .git/info/sparse-checkout
27
- echo " $SH_SRC_REPO " >> .git/info/sparse-checkout
28
- git pull origin main
29
-
30
- cp -R " $WORK_DIR /$PY_DEST_REPO /." " $DEST_TARGET_DIR /"
31
- cp -R " $WORK_DIR /$SH_DEST_REPO /." " $DEST_TARGET_DIR /"
32
- cp -R " $WORK_DIR /$PY_SRC_REPO /." " $SRC_TARGET_DIR /"
33
- cp -R " $WORK_DIR /$SH_SRC_REPO /." " $SRC_TARGET_DIR /"
34
-
35
- function to_mdx() {
36
- for f in * .py
37
- do sed -i ' 1i```python\' $f
38
- sed -i ' $ a ```' $f
39
- mv $f $f .mdx
40
- done
3
+ # Expects paths for the destination and source directories.
4
+ # Example usage: ./make_mdx.sh /Users/potter/dest ./src
41
5
42
- for f in * .sh
43
- do sed -i ' 1i```bash\' $f
44
- sed -i ' $ a ```' $f
45
- mv $f $f .mdx
46
- done
47
- }
6
+ if [ $# -ne 2 ]; then
7
+ echo " Error: Two paths (destination and source) are required."
8
+ echo " Usage: $0 <destination_directory> <source_directory>"
9
+ exit 1
10
+ fi
11
+
12
+ # Convert relative paths to absolute paths and ensure they are valid
13
+ DEST_TARGET_DIR=$( realpath --relative-to=. " $1 " 2> /dev/null || echo " $1 " )
14
+ SRC_TARGET_DIR=$( realpath --relative-to=. " $2 " 2> /dev/null || echo " $2 " )
15
+
16
+ # Create the directories if they do not exist
17
+ mkdir -p " ${DEST_TARGET_DIR} "
18
+ mkdir -p " ${SRC_TARGET_DIR} "
19
+
20
+ # Normalize the paths to absolute paths again after creation
21
+ DEST_TARGET_DIR=$( realpath " $DEST_TARGET_DIR " )
22
+ SRC_TARGET_DIR=$( realpath " $SRC_TARGET_DIR " )
23
+
24
+ # Ensure directories end with a slash
25
+ [[ " ${DEST_TARGET_DIR} " != * / ]] && DEST_TARGET_DIR=" ${DEST_TARGET_DIR} /"
26
+ [[ " ${SRC_TARGET_DIR} " != * / ]] && SRC_TARGET_DIR=" ${SRC_TARGET_DIR} /"
48
27
49
- # Convert the destination_connectors to markdown
50
- cd " $DEST_TARGET_DIR "
51
- to_mdx
28
+ echo " Working with DEST_TARGET_DIR=${DEST_TARGET_DIR} "
29
+ echo " Working with SRC_TARGET_DIR=${SRC_TARGET_DIR} "
52
30
53
- # Convert the source_connectors to markdown
54
- cd " $SRC_TARGET_DIR "
55
- to_mdx
31
+ # Create a temporary directory for work
32
+ WORK_DIR=$( mktemp -d)
33
+
34
+ # Repository and path configurations
35
+ REPO_URL=" https://github.com/Unstructured-IO/unstructured"
36
+ DEST_PYTHON_PATH=" docs/source/ingest/destination_connectors/code/python/"
37
+ DEST_SHELL_PATH=" docs/source/ingest/destination_connectors/code/bash/"
38
+ SRC_PYTHON_PATH=" docs/source/ingest/source_connectors/code/python/"
39
+ SRC_SHELL_PATH=" docs/source/ingest/source_connectors/code/bash/"
40
+
41
+ # Clone only the necessary directories using sparse-checkout
42
+ git clone --depth 1 --filter=blob:none --sparse " ${REPO_URL} " " ${WORK_DIR} "
43
+ cd " ${WORK_DIR} " || exit
44
+ git sparse-checkout set " ${DEST_PYTHON_PATH} " " ${DEST_SHELL_PATH} " " ${SRC_PYTHON_PATH} " " ${SRC_SHELL_PATH} "
45
+
46
+ # Copy files to destination and source directories
47
+ cp -R " ${WORK_DIR} /${DEST_PYTHON_PATH} " * " ${DEST_TARGET_DIR} "
48
+ cp -R " ${WORK_DIR} /${DEST_SHELL_PATH} " * " ${DEST_TARGET_DIR} "
49
+ cp -R " ${WORK_DIR} /${SRC_PYTHON_PATH} " * " ${SRC_TARGET_DIR} "
50
+ cp -R " ${WORK_DIR} /${SRC_SHELL_PATH} " * " ${SRC_TARGET_DIR} "
51
+
52
+ # Function to convert code files to markdown
53
+ function convert_to_mdx() {
54
+ local CODE_DIR=" $1 "
55
+ cd " ${CODE_DIR} " || exit
56
+ echo " Converting files in ${CODE_DIR} "
57
+ for f in * .py * .sh; do
58
+ if [ -f " $f " ]; then
59
+ local extension=" ${f##* .} "
60
+ awk ' BEGIN {print "```" extension}
61
+ {print}
62
+ END {print "```"}' extension=" $extension " " $f " > " ${f} .mdx"
63
+ mv " ${f} .mdx" " $f "
64
+ else
65
+ echo " No files to convert in ${CODE_DIR} "
66
+ fi
67
+ done
68
+ }
56
69
57
- rm -rf " $WORK_DIR "
70
+ convert_to_mdx " ${DEST_TARGET_DIR} "
71
+ convert_to_mdx " ${SRC_TARGET_DIR} "
58
72
59
- echo " Markdown files created in $DEST_TARGET_DIR and $SRC_TARGET_DIR "
73
+ # Clean up the work directory
74
+ rm -rf " ${WORK_DIR} "
75
+ echo " Markdown files created in ${DEST_TARGET_DIR} and ${SRC_TARGET_DIR} "
0 commit comments