Browse Source

`locate()` for `linked::Db` without cache for now

pull/68/head
Sebastian Thiel 1 month ago
parent
commit
014bc3c74a
No known key found for this signature in database GPG Key ID: 9CB5EE7895E8268B
  1. 33
      git-odb/src/compound/locate.rs
  2. 24
      git-odb/src/linked/locate.rs
  3. 3
      git-odb/src/linked/mod.rs
  4. 38
      git-odb/tests/linked/mod.rs
  5. 6
      performance-tasks.md

33
git-odb/src/compound/locate.rs

@ -10,6 +10,11 @@ pub enum Error {
Pack(#[from] pack::data::decode::Error),
}
pub(crate) enum LooseOrPack {
Loose(Box<loose::Object>),
Packed(usize, u32),
}
impl compound::Db {
/// Find an object as identified by [`ObjectId`][git_hash::ObjectId] and store its data in full in the provided `buffer`.
/// This will search the object in all contained object databases.
@ -32,4 +37,32 @@ impl compound::Db {
}
Ok(None)
}
/// Internal-use function to look up a packed object index or loose object.
/// Used to avoid double-lookups in linked::Db::locate.
/// (The polonius borrow-checker would support this via the locate
/// function, so this can be [simplified](https://github.com/Byron/gitoxide/blob/0c5f4043da4615820cb180804a81c2d4fe75fe5e/git-odb/src/compound/locate.rs#L47)
/// once polonius is stable.)
pub(crate) fn internal_locate(&self, id: impl AsRef<git_hash::oid>) -> Result<Option<LooseOrPack>, Error> {
let id = id.as_ref();
for (pack_idx, pack) in self.packs.iter().enumerate() {
if let Some(idx) = pack.internal_locate_index(id) {
return Ok(Some(LooseOrPack::Packed(pack_idx, idx)));
}
}
if let Some(object) = self.loose.locate(id)? {
return Ok(Some(LooseOrPack::Loose(Box::new(object))));
}
Ok(None)
}
pub(crate) fn internal_get_packed_object_by_index<'a>(
&self,
pack_index: usize,
object_index: u32,
buffer: &'a mut Vec<u8>,
pack_cache: &mut impl pack::cache::DecodeEntry,
) -> Result<crate::borrowed::Object<'a>, pack::data::decode::Error> {
self.packs[pack_index].internal_get_object_by_index(object_index, buffer, pack_cache)
}
}

24
git-odb/src/linked/locate.rs

@ -0,0 +1,24 @@
use crate::{compound, linked, pack};
impl linked::Db {
pub fn locate<'a>(
&self,
id: impl AsRef<git_hash::oid>,
buffer: &'a mut Vec<u8>,
) -> Result<Option<compound::Object<'a>>, compound::locate::Error> {
let id = id.as_ref();
for db in self.dbs.iter() {
match db.internal_locate(id)? {
Some(compound::locate::LooseOrPack::Loose(object)) => return Ok(Some(compound::Object::Loose(object))),
Some(compound::locate::LooseOrPack::Packed(pack_index, object_index)) => {
return db
.internal_get_packed_object_by_index(pack_index, object_index, buffer, &mut pack::cache::Noop)
.map(|object| Some(compound::Object::Borrowed(object)))
.map_err(Into::into)
}
None => continue,
}
}
Ok(None)
}
}

3
git-odb/src/linked/mod.rs

@ -8,3 +8,6 @@ pub struct Db {
///
pub mod init;
///
pub mod locate;

38
git-odb/tests/linked/mod.rs

@ -1,5 +1,34 @@
use crate::fixture_path;
use git_odb::linked::Db;
fn db() -> Db {
Db::at(fixture_path("objects")).expect("valid object path")
}
mod locate {
use crate::{hex_to_id, linked::db};
use git_odb::linked::Db;
fn can_locate(db: &Db, hex_id: &str) {
let mut buf = vec![];
assert!(db.locate(hex_to_id(hex_id), &mut buf).expect("no read error").is_some());
}
#[test]
fn loose_object() {
can_locate(&db(), "37d4e6c5c48ba0d245164c4e10d5f41140cab980");
}
#[test]
fn pack_object() {
can_locate(&db(), "501b297447a8255d3533c6858bb692575cdefaa0"); // pack 11fd
can_locate(&db(), "4dac9989f96bc5b5b1263b582c08f0c5f0b58542"); // pack a2bf
can_locate(&db(), "dd25c539efbb0ab018caa4cda2d133285634e9b5"); // pack c043
}
}
mod init {
use crate::alternate::alternate;
use crate::{alternate::alternate, linked::db};
use git_odb::linked;
use std::convert::TryFrom;
@ -22,4 +51,11 @@ mod init {
assert_eq!(db.dbs[0].loose.path, tmp.path());
Ok(())
}
#[test]
fn has_packs() {
let db = db();
assert_eq!(db.dbs.len(), 1);
assert_eq!(db.dbs[0].packs.len(), 3)
}
}

6
performance-tasks.md

@ -1,5 +1,11 @@
## Potential for improving performance
### NLL/Borrowcheck limitation git-odb::(compound|linked)::Db cause additional code complexity
* Once polonius is available with production-ready performance, we should simplify the `locate(…)` code in `(compound|linked)::Db()` respectively.
Currently these first have to obtain an index, and when found, access the data to avoid having the borrowchecker fail to understand our buffer
usage within a loop correctly. Performance itself it probably not reasonably affected.
### Pack Decoding
* [ ] On **ARM64 on MacOS** the SHA1 implementation of the [`sha-1` crate](https://github.com/RustCrypto/hashes) is capped at about 550MB/s, half the speed of what I saw on Intel and about 50% slower than what's implemented in `libcorecrypto.dylib`. Get that fast and the decoding stage will be able

Loading…
Cancel
Save