From 7674e374cbd639d284512e0bb5aa0618a852bdf9 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Fri, 21 Apr 2023 01:55:25 +0100
Subject: [PATCH 01/10] Drop unexpected columsn when writing PDB dataframes
 #124

---
 biopandas/pdb/pandas_pdb.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 9e713c7..c25773d 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -610,6 +610,9 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
 
         dfs = {r: self.df[r].copy() for r in records if not self.df[r].empty}
 
+        # Drop unexpected columns
+        dfs = {k: v[[pdb_df_columns]] for k, v in dfs.items() if k in {"ATOM", "HETATM"}}
+
         for r in dfs:
             for col in pdb_records[r]:
                 dfs[r][col["id"]] = dfs[r][col["id"]].apply(col["strf"])

From 0757e3955206425169ca44600effdee4b4d0e5d2 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Mon, 24 Jul 2023 14:48:43 -1000
Subject: [PATCH 02/10] change list to set

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index c25773d..7fe1bb7 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -611,7 +611,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
         dfs = {r: self.df[r].copy() for r in records if not self.df[r].empty}
 
         # Drop unexpected columns
-        dfs = {k: v[[pdb_df_columns]] for k, v in dfs.items() if k in {"ATOM", "HETATM"}}
+        dfs = {k: v[list(pdb_df_columns)] for k, v in dfs.items() if k in {"ATOM", "HETATM"}}
 
         for r in dfs:
             for col in pdb_records[r]:

From 9d0de4f73ab28602f825732fe72fae50ac2ccffe Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Mon, 31 Jul 2023 13:58:35 -0400
Subject: [PATCH 03/10] make column subsetting more robust

---
 biopandas/pdb/pandas_pdb.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 7fe1bb7..5fd4ffe 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -611,7 +611,10 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
         dfs = {r: self.df[r].copy() for r in records if not self.df[r].empty}
 
         # Drop unexpected columns
-        dfs = {k: v[list(pdb_df_columns)] for k, v in dfs.items() if k in {"ATOM", "HETATM"}}
+        for k, v in dfs.items():
+            if k in {"ATOM", "HETATM"}}:
+                overlap_columns = set(pdb_df_columns).intersection(set(df.columns))
+                dfs[k] = v[list(overlap_columns)]
 
         for r in dfs:
             for col in pdb_records[r]:

From 0c2cc06cdf19e1a03b37c92a911354a4d0b903de Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Mon, 31 Jul 2023 14:05:34 -0400
Subject: [PATCH 04/10] Fix syntax error

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 5fd4ffe..b9b535c 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -612,7 +612,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
 
         # Drop unexpected columns
         for k, v in dfs.items():
-            if k in {"ATOM", "HETATM"}}:
+            if k in {"ATOM", "HETATM"}:
                 overlap_columns = set(pdb_df_columns).intersection(set(df.columns))
                 dfs[k] = v[list(overlap_columns)]
 

From d32838b4921cfd1409e593d4b3757bdbf3fdb29c Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Mon, 31 Jul 2023 14:15:10 -0400
Subject: [PATCH 05/10] Fix syntax error

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index b9b535c..8998fd6 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -613,7 +613,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
         # Drop unexpected columns
         for k, v in dfs.items():
             if k in {"ATOM", "HETATM"}:
-                overlap_columns = set(pdb_df_columns).intersection(set(df.columns))
+                overlap_columns = set(pdb_df_columns).intersection(set(v.columns))
                 dfs[k] = v[list(overlap_columns)]
 
         for r in dfs:

From 564cf0831936f5d91d8dee8a2f0954f174981926 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Mon, 31 Jul 2023 14:30:11 -0400
Subject: [PATCH 06/10] sort by atom number

---
 biopandas/pdb/pandas_pdb.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 8998fd6..74e9186 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -638,6 +638,11 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                 else:
                     dfs[r]["OUT"] = dfs[r]["OUT"] + dfs[r][c]
 
+        if "line_idx" in dfs.columns:
+                sort_column = "line_idx"
+            else:
+                sort_column = "atom_number"
+
         if pd_version < LooseVersion("0.17.0"):
             warn(
                 "You are using an old pandas version (< 0.17)"
@@ -646,7 +651,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                 " installation to a more recent version.",
                 DeprecationWarning,
             )
-            dfs.sort(columns="line_idx", inplace=True)
+            dfs.sort(columns=sort_column, inplace=True)
 
         elif pd_version < LooseVersion("0.23.0"):
             df = pd.concat(dfs)
@@ -654,7 +659,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
         else:
             df = pd.concat(dfs, sort=False)
 
-        df.sort_values(by="line_idx", inplace=True)
+        df.sort_values(by=sort_column, inplace=True)
 
         with openf(path, w_mode) as f:
 

From 5252d3c310e85874a658ee05ad67d1ed22fdbd3d Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Tue, 1 Aug 2023 08:09:42 -0400
Subject: [PATCH 07/10] syntax error, drop legacy pandas support

---
 biopandas/pdb/pandas_pdb.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 74e9186..69e2fce 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -638,27 +638,17 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                 else:
                     dfs[r]["OUT"] = dfs[r]["OUT"] + dfs[r][c]
 
-        if "line_idx" in dfs.columns:
-                sort_column = "line_idx"
-            else:
-                sort_column = "atom_number"
-
-        if pd_version < LooseVersion("0.17.0"):
-            warn(
-                "You are using an old pandas version (< 0.17)"
-                " that relies on the old sorting syntax."
-                " Please consider updating your pandas"
-                " installation to a more recent version.",
-                DeprecationWarning,
-            )
-            dfs.sort(columns=sort_column, inplace=True)
-
-        elif pd_version < LooseVersion("0.23.0"):
+        
+        if pd_version < LooseVersion("0.23.0"):
             df = pd.concat(dfs)
-
         else:
             df = pd.concat(dfs, sort=False)
 
+        if "line_idx" in dfs.columns:
+            sort_column = "line_idx"
+        else:
+            sort_column = "atom_number"
+
         df.sort_values(by=sort_column, inplace=True)
 
         with openf(path, w_mode) as f:

From f2d0929bfa4c1884a87a8e9d0618f2ed19d7eb4a Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Tue, 1 Aug 2023 08:16:30 -0400
Subject: [PATCH 08/10] fix: typo

---
 biopandas/pdb/pandas_pdb.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 69e2fce..175676f 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -638,13 +638,12 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                 else:
                     dfs[r]["OUT"] = dfs[r]["OUT"] + dfs[r][c]
 
-        
         if pd_version < LooseVersion("0.23.0"):
             df = pd.concat(dfs)
         else:
             df = pd.concat(dfs, sort=False)
 
-        if "line_idx" in dfs.columns:
+        if "line_idx" in df.columns:
             sort_column = "line_idx"
         else:
             sort_column = "atom_number"

From affb5b6b353078931dda31f8f8e9d82781f59b73 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Tue, 1 Aug 2023 13:44:03 -0400
Subject: [PATCH 09/10] simplify writing dfs with added columns

---
 biopandas/pdb/pandas_pdb.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 175676f..d4129ef 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -610,12 +610,6 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
 
         dfs = {r: self.df[r].copy() for r in records if not self.df[r].empty}
 
-        # Drop unexpected columns
-        for k, v in dfs.items():
-            if k in {"ATOM", "HETATM"}:
-                overlap_columns = set(pdb_df_columns).intersection(set(v.columns))
-                dfs[k] = v[list(overlap_columns)]
-
         for r in dfs:
             for col in pdb_records[r]:
                 dfs[r][col["id"]] = dfs[r][col["id"]].apply(col["strf"])
@@ -628,7 +622,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                     for idx in range(dfs[r][c].values.shape[0]):
                         if len(dfs[r][c].values[idx]) > 8:
                             dfs[r][c].values[idx] = str(dfs[r][c].values[idx]).strip()
-                if c in {"line_idx", "OUT"}:
+                if c in {"line_idx", "OUT", "model_id"}:
                     pass
                 elif r in {"ATOM", "HETATM"} and c not in pdb_df_columns:
                     warn(
@@ -643,12 +637,7 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
         else:
             df = pd.concat(dfs, sort=False)
 
-        if "line_idx" in df.columns:
-            sort_column = "line_idx"
-        else:
-            sort_column = "atom_number"
-
-        df.sort_values(by=sort_column, inplace=True)
+        df.sort_values(by="line_idx", inplace=True)
 
         with openf(path, w_mode) as f:
 

From 86cf6dcd050b8fdf1cafc1173d8b791e7a11af18 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Tue, 1 Aug 2023 13:52:35 -0400
Subject: [PATCH 10/10] add test

---
 biopandas/pdb/tests/test_write_pdb.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/biopandas/pdb/tests/test_write_pdb.py b/biopandas/pdb/tests/test_write_pdb.py
index a726336..f8b258f 100644
--- a/biopandas/pdb/tests/test_write_pdb.py
+++ b/biopandas/pdb/tests/test_write_pdb.py
@@ -71,3 +71,14 @@ def test_anisou():
         f1 = f.read()
     os.remove(OUTFILE)
     assert f1 == four_eiy
+
+def test_write_with_model_id():
+    """Test writing a dataframe with a model ID column added."""
+    ppdb = PandasPdb()
+    ppdb.read_pdb(TESTDATA_FILENAME)
+    df.label_models()
+    ppdb.to_pdb(path=OUTFILE, records=None)
+    with open(OUTFILE, "r") as f:
+        f1 = f.read()
+    os.remove(OUTFILE)
+    asser f1 == f2
\ No newline at end of file