-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add circle annotations #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
62f8f36
18afbb6
cf9f4bb
e8ac6ec
0ebe540
2463c7f
a555717
c490f64
2b0d315
dd80d0b
5c882bf
720c77f
56eb1e6
18649c0
f29db36
745687b
ba4da0e
db089ac
e3315e3
8f07ad2
cdcefd3
24d860e
930090d
eee26e3
b8ec1f9
bf658b9
a20bb6a
d01eb49
88bdb58
dc6b69d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- nodefaults | ||
dependencies: | ||
- bioconductor-annotatr =1.28 | ||
- r-tidyverse = 2.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- nodefaults | ||
dependencies: | ||
- bioconductor-biomart =2.58 | ||
- r-tidyverse = 2.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- nodefaults | ||
dependencies: | ||
- bioconductor-rtracklayer =1.62 | ||
- r-tidyverse = 2.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
log <- file(snakemake@log[[1]], open="wt") | ||
sink(log) | ||
sink(log, type="message") | ||
|
||
library("tidyverse") | ||
rlang::global_entrace() | ||
|
||
library("annotatr") | ||
library("GenomicRanges") | ||
|
||
circles <- read_tsv( | ||
snakemake@input[["tsv"]] | ||
) | ||
|
||
genome_build <- snakemake@params[["build"]] | ||
|
||
circles_gr <- GRanges( | ||
seqnames = pull(circles, region), | ||
ecDNA_status = "ecDNA" | ||
) | ||
|
||
genome(circles_gr) <- genome_build | ||
|
||
Comment on lines
+17
to
+23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Parse the -regions = pull(circles, region),
-ecDNA_status = "ecDNA"
+seqnames = sub(":.*", "", circles$region),
+ranges = IRanges(
+ start = as.integer(sub(".*:(\\d+)-.*", "\\1", circles$region)),
+ end = as.integer(sub(".*-(\\d+)$", "\\1", circles$region))
+),
+ecDNA_status = "ecDNA"
🤖 Prompt for AI Agents
|
||
overlapping_annotations <- rtracklayer::import( | ||
snakemake@input[["all_annotations"]], | ||
which = circles_gr, #import only intervals overlapping circles | ||
genome = genome_build | ||
) | ||
|
||
annotated_circles <- annotate_regions( | ||
regions = circles_gr, | ||
annotations = overlapping_annotations, | ||
ignore.strand = TRUE, | ||
quiet = FALSE | ||
) |> | ||
as_tibble() |> | ||
dplyr::select( | ||
-c( | ||
strand, | ||
width, | ||
ecDNA_status, | ||
annot.width, | ||
annot.source, | ||
annot.score, | ||
annot.phase | ||
) | ||
) |> | ||
arrange( | ||
annot.seqnames, | ||
annot.start, | ||
annot.end | ||
) |> | ||
mutate( | ||
circle_region = str_c( | ||
seqnames, | ||
":", | ||
start, | ||
"-", | ||
end | ||
), | ||
region = str_c( | ||
annot.seqnames, | ||
":", | ||
annot.start, | ||
"-", | ||
annot.end | ||
) | ||
) |> | ||
dplyr::select( | ||
-c( | ||
seqnames, | ||
start, | ||
end, | ||
annot.seqnames, | ||
annot.start, | ||
annot.end | ||
) | ||
) |> | ||
dplyr::rename( | ||
exon_rank = annot.rank | ||
) |> | ||
dplyr::rename_with( | ||
~ str_replace(.x, fixed("annot."), "") | ||
) |> | ||
dplyr::select( | ||
region, | ||
type, | ||
id, | ||
name, | ||
parent_type, | ||
parent_id, | ||
exon_rank, | ||
circle_region, | ||
) |> | ||
group_by( | ||
circle_region | ||
) |> | ||
group_walk( | ||
~ write_tsv( | ||
.x, | ||
file = file.path( | ||
str_c( | ||
str_replace_all(.y$circle_region, "[:-]", "_"), | ||
".tsv" | ||
) | ||
) | ||
) | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Files written outside the declared output directory The call file = file.path(str_c(str_replace_all(.y$circle_region, "[:-]", "_"), ".tsv")) creates files in Add: outdir <- snakemake@params[["outdir"]]
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
...
file = file.path(outdir,
str_c(str_replace_all(.y$circle_region, "[:-]", "_"), ".tsv")) 🤖 Prompt for AI Agents
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -33,7 +33,7 @@ | |||||||||||||||||||||
] | ||||||||||||||||||||||
|
||||||||||||||||||||||
# turn int cols into int | ||||||||||||||||||||||
circles.loc[:, int_cols] = circles.loc[:, int_cols].round(0).applymap(lambda v: int(v) if not pd.isna(v) else pd.NA) | ||||||||||||||||||||||
circles[int_cols] = circles[int_cols].round(0).map(lambda v: int(v) if not pd.isna(v) else pd.NA) | ||||||||||||||||||||||
|
||||||||||||||||||||||
Comment on lines
35
to
37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
-circles[int_cols] = circles[int_cols].round(0).map(lambda v: int(v) if not pd.isna(v) else pd.NA)
+# convert each value to integer while keeping NA
+circles[int_cols] = (
+ circles[int_cols]
+ .round(0)
+ .applymap(lambda v: pd.NA if pd.isna(v) else int(v))
+) 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents
|
||||||||||||||||||||||
# filter out low-quality circles, according to: | ||||||||||||||||||||||
# https://github.com/iprada/Circle-Map/wiki/Circle-Map-Realign-output-files | ||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Output directory declared, but the R script never writes to it
annotate_cleaned_circles.R
creates files in the current working directory viafile.path(...)
, yet the rule’s declared output isresults/circle-map/{sample}.circles.cleaned.annotated/
.Snakemake will mark the rule as incomplete because the declared directory is still empty.
Pass the directory path to the script and create it there, e.g.:
and in the R script use
snakemake@params[["outdir"]]
+dir.create(outdir, recursive=TRUE)
.📝 Committable suggestion
🤖 Prompt for AI Agents