Skip to content

feat: switch to compressed mapping #1335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,504 changes: 1,139 additions & 365 deletions examples/conda_mapping/pixi.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions examples/conda_mapping/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ python = "~=3.11.0"
scipy = "~=1.11.4"
boltons = "*"
jupyter-ros = { version = "*", channel = "robostack" }
jupyter-amphion = {version = "*", channel = "robostack"}

[pypi-dependencies]
black = { version = "~=23.10", extras = ["jupyter"] }
Expand Down
3 changes: 2 additions & 1 deletion examples/conda_mapping/robostack_mapping.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"jupyter-ros": "my-name-from-mapping"
"jupyter-ros": "my-name-from-mapping",
"jupyter-amphion": null
}
15 changes: 13 additions & 2 deletions examples/conda_mapping/test_conda_mapping.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import yaml


PACKAGE_NAME_TO_TEST = {"boltons": "my-boltons-name", "jupyter-ros": "my-name-from-mapping"}
PACKAGE_NAME_TO_TEST = {
"boltons": "my-boltons-name?source=project-defined-mapping",
"jupyter-ros": "my-name-from-mapping?source=project-defined-mapping"
}

PACKAGE_NAME_SHOULD_BE_NULL = ("jupyter-amphion",)


if __name__ == "__main__":
Expand All @@ -16,7 +21,9 @@
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_TO_TEST
]

assert len(expected_packages) == 2
expected_null_packages = [
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_SHOULD_BE_NULL
]

for package in expected_packages:
package_name = package["name"]
Expand All @@ -29,3 +36,7 @@
expected_purl = f"pkg:pypi/{PACKAGE_NAME_TO_TEST[package_name]}"

assert purls[0] == expected_purl


for package in expected_null_packages:
assert "purls" not in package
958 changes: 501 additions & 457 deletions examples/pypi/pixi.lock

Large diffs are not rendered by default.

23 changes: 1 addition & 22 deletions src/lock_file/package_identifier.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{project::manifest::python::PyPiPackageName, pypi_mapping};
use crate::project::manifest::python::PyPiPackageName;
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use std::{collections::HashSet, str::FromStr};
Expand Down Expand Up @@ -32,31 +32,10 @@ impl PypiPackageIdentifier {
result: &mut Vec<Self>,
) -> Result<(), ConversionError> {
// Check the PURLs for a python package.
let mut has_pypi_purl = false;
for purl in record.package_record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.package_record.version.as_str())?
{
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_mapping::is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.package_record.name.as_source()).ok();
let version =
pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
url: record.url.clone(),
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
})
}
}

Expand Down
61 changes: 38 additions & 23 deletions src/pypi_mapping/custom_pypi_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ use async_once_cell::OnceCell;
use crate::pypi_mapping::MappingLocation;

use super::{
prefix_pypi_name_mapping::{self},
build_pypi_purl_from_package_record, is_conda_forge_record, prefix_pypi_name_mapping,
MappingMap, Reporter,
};

pub async fn fetch_mapping_from_url(
pub async fn fetch_mapping_from_url<T>(
client: &ClientWithMiddleware,
url: &Url,
) -> miette::Result<HashMap<String, String>> {
) -> miette::Result<T>
where
T: serde::de::DeserializeOwned,
{
let response = client
.get(url.clone())
.send()
Expand All @@ -34,8 +37,7 @@ pub async fn fetch_mapping_from_url(
));
}

let mapping_by_name: HashMap<String, String> =
response.json().await.into_diagnostic().context(format!(
let mapping_by_name: T = response.json().await.into_diagnostic().context(format!(
"failed to parse pypi name mapping located at {}. Please make sure that it's a valid json",
url
))?;
Expand All @@ -46,11 +48,11 @@ pub async fn fetch_mapping_from_url(
pub async fn fetch_custom_mapping(
client: &ClientWithMiddleware,
mapping_url: &MappingMap,
) -> miette::Result<&'static HashMap<String, HashMap<String, String>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, String>>> = OnceCell::new();
) -> miette::Result<&'static HashMap<String, HashMap<String, Option<String>>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, Option<String>>>> = OnceCell::new();
MAPPING
.get_or_try_init(async {
let mut mapping_url_to_name: HashMap<String, HashMap<String, String>> =
let mut mapping_url_to_name: HashMap<String, HashMap<String, Option<String>>> =
Default::default();

for (name, url) in mapping_url.iter() {
Expand Down Expand Up @@ -83,10 +85,9 @@ pub async fn fetch_custom_mapping(
let contents = std::fs::read_to_string(path)
.into_diagnostic()
.context(format!("mapping on {path:?} could not be loaded"))?;
let data: HashMap<String, String> = serde_json::from_str(&contents)
.unwrap_or_else(|_| {
panic!("Failed to parse JSON mapping located at {path:?}")
});
let data: HashMap<String, Option<String>> = serde_json::from_str(&contents)
.into_diagnostic()
.context(format!("Failed to parse JSON mapping located at {path:?}"))?;

mapping_url_to_name.insert(name.to_string(), data);
}
Expand Down Expand Up @@ -149,7 +150,7 @@ pub async fn amend_pypi_purls(
/// a conda-forge package.
fn amend_pypi_purls_for_record(
record: &mut RepoDataRecord,
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
// If the package already has a pypi name we can stop here.
if record
Expand All @@ -161,27 +162,41 @@ fn amend_pypi_purls_for_record(
return Ok(());
}

let mut not_a_pypi = false;

// If this package is a conda-forge package or user specified a custom channel mapping
// we can try to guess the pypi name from the conda name
if custom_mapping.contains_key(&record.channel) {
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) =
mapped_channel.get(record.package_record.name.as_normalized())
{
record.package_record.purls.push(
PackageUrl::new(String::from("pypi"), mapped_name)
.expect("valid pypi package url"),
);
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) = mapped_channel.get(record.package_record.name.as_normalized()) {
if let Some(name) = mapped_name {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It think you can use an if-let chain for this: rust-lang/rust#88642. Not sure, have not used it yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is saying that ``let expressions in this position are unstable
rust-lang/rust#53667

let purl = PackageUrl::builder(String::from("pypi"), name.to_string())
.with_qualifier("source", "project-defined-mapping")
.expect("valid qualifier");

record
.package_record
.purls
.push(purl.build().expect("valid pypi package url"));
} else {
not_a_pypi = true;
}
}
}

if !not_a_pypi && record.package_record.purls.is_empty() && is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
if let Some(purl) = build_pypi_purl_from_package_record(&record.package_record) {
record.package_record.purls.push(purl);
}
}

Ok(())
}

pub fn _amend_only_custom_pypi_purls(
conda_packages: &mut [RepoDataRecord],
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
for record in conda_packages.iter_mut() {
amend_pypi_purls_for_record(record, custom_mapping)?;
Expand Down
29 changes: 25 additions & 4 deletions src/pypi_mapping/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use std::{collections::HashMap, path::PathBuf, str::FromStr, sync::Arc};

use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use rattler_conda_types::RepoDataRecord;
use rattler_conda_types::{PackageRecord, PackageUrl, RepoDataRecord};
use reqwest_middleware::ClientBuilder;
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use url::Url;

use crate::config::get_cache_dir;

mod custom_pypi_mapping;
pub mod custom_pypi_mapping;
pub mod prefix_pypi_name_mapping;

pub trait Reporter: Send + Sync {
Expand All @@ -19,9 +19,9 @@ pub trait Reporter: Send + Sync {

pub type ChannelName = String;

type MappingMap = HashMap<ChannelName, MappingLocation>;
pub type MappingMap = HashMap<ChannelName, MappingLocation>;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum MappingLocation {
Path(PathBuf),
Url(Url),
Expand All @@ -32,6 +32,15 @@ pub enum MappingSource {
Prefix,
}

impl MappingSource {
pub fn custom(&self) -> Option<MappingMap> {
match self {
MappingSource::Custom { mapping } => Some(mapping.clone()),
_ => None,
}
}
}

pub async fn amend_pypi_purls(
client: reqwest::Client,
mapping_source: &MappingSource,
Expand Down Expand Up @@ -78,3 +87,15 @@ pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool {
pub fn is_conda_forge_url(url: &Url) -> bool {
url.path().starts_with("/conda-forge")
}

pub fn build_pypi_purl_from_package_record(package_record: &PackageRecord) -> Option<PackageUrl> {
let name = pep508_rs::PackageName::from_str(package_record.name.as_source()).ok();
let version = pep440_rs::Version::from_str(&package_record.version.as_str()).ok();
if let (Some(name), Some(_)) = (name, version) {
let purl = PackageUrl::builder(String::from("pypi"), name.to_string());
let built_purl = purl.build().expect("valid pypi package url");
return Some(built_purl);
}

None
}
50 changes: 38 additions & 12 deletions src/pypi_mapping/prefix_pypi_name_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ use std::sync::Arc;
use tokio::sync::Semaphore;
use url::Url;

use super::{custom_pypi_mapping, Reporter};
use super::{
build_pypi_purl_from_package_record, custom_pypi_mapping, is_conda_forge_record, Reporter,
};

const STORAGE_URL: &str = "https://conda-mapping.prefix.dev";
const HASH_DIR: &str = "hash-v0";
const COMPRESSED_MAPPING: &str =
"https://raw.githubusercontent.com/prefix-dev/parselmouth/main/files/mapping_as_grayskull.json";
"https://raw.githubusercontent.com/prefix-dev/parselmouth/main/files/compressed_mapping.json";

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Package {
Expand Down Expand Up @@ -143,7 +145,7 @@ pub async fn conda_pypi_name_mapping(
/// Downloads and caches prefix.dev conda-pypi mapping.
pub async fn conda_pypi_name_compressed_mapping(
client: &ClientWithMiddleware,
) -> miette::Result<HashMap<String, String>> {
) -> miette::Result<HashMap<String, Option<String>>> {
let compressed_mapping_url =
Url::parse(COMPRESSED_MAPPING).expect("COMPRESSED_MAPPING static variable should be valid");

Expand All @@ -158,6 +160,7 @@ pub async fn amend_pypi_purls(
) -> miette::Result<()> {
let conda_mapping = conda_pypi_name_mapping(client, conda_packages, reporter).await?;
let compressed_mapping = conda_pypi_name_compressed_mapping(client).await?;

for record in conda_packages.iter_mut() {
amend_pypi_purls_for_record(record, &conda_mapping, &compressed_mapping)?;
}
Expand All @@ -172,7 +175,7 @@ pub async fn amend_pypi_purls(
pub fn amend_pypi_purls_for_record(
record: &mut RepoDataRecord,
conda_forge_mapping: &HashMap<Sha256Hash, Package>,
compressed_mapping: &HashMap<String, String>,
compressed_mapping: &HashMap<String, Option<String>>,
) -> miette::Result<()> {
// If the package already has a pypi name we can stop here.
if record
Expand All @@ -184,25 +187,48 @@ pub fn amend_pypi_purls_for_record(
return Ok(());
}

let mut no_a_pypi = false;

if let Some(sha256) = record.package_record.sha256 {
if let Some(mapped_name) = conda_forge_mapping.get(&sha256) {
if let Some(pypi_names) = &mapped_name.versions {
for pypi_name in pypi_names.keys() {
let purl = PackageUrl::builder(String::from("pypi"), pypi_name);
if let Some(pypi_names) = &mapped_name.pypi_normalized_names {
for pypi_name in pypi_names {
let purl = PackageUrl::builder(String::from("pypi"), pypi_name)
.with_qualifier("source", "conda-forge-mapping")
.expect("valid qualifier");
let built_purl = purl.build().expect("valid pypi package url");
record.package_record.purls.push(built_purl);
}
} else {
// it's not a pypi name
no_a_pypi = true;
}
} else if let Some(mapped_name) =
} else if let Some(possible_mapped_name) =
compressed_mapping.get(record.package_record.name.as_normalized())
{
// maybe the packages is not yet updated
// so fallback to the one from compressed mapping
let purl = PackageUrl::builder(String::from("pypi"), mapped_name);
let built_purl = purl.build().expect("valid pypi package url");
record.package_record.purls.push(built_purl);
if let Some(mapped_name) = possible_mapped_name {
let purl = PackageUrl::builder(String::from("pypi"), mapped_name)
.with_qualifier("source", "conda-forge-mapping")
.expect("valid qualifier");
let built_purl = purl.build().expect("valid pypi package url");
record.package_record.purls.push(built_purl);
} else {
// it's not a pypi name
no_a_pypi = true;
}
}
}

// package is not in our mapping yet
// so we assume that it is the same as the one from conda-forge
if !no_a_pypi && record.package_record.purls.is_empty() && is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
if let Some(purl) = build_pypi_purl_from_package_record(&record.package_record) {
record.package_record.purls.push(purl);
}
// nothing was matched so we don't add purls for it
}

Ok(())
Expand Down
Loading