@@ -53,6 +53,26 @@ async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: Reques
5353export function getHuggingFaceSource ( sourceId : string , options ?: { requestInit ?: RequestInit , accessToken ?: string } ) : FileSource | DirSource | undefined {
5454 try {
5555 const url = parseHuggingFaceUrl ( sourceId )
56+ async function fetchVersions ( ) {
57+ const refsList = await fetchRefsList ( url . repo , options )
58+ return {
59+ label : 'Branches' ,
60+ versions : refsList . map ( ( { refType, name, ref } ) => {
61+ const label = refType === 'branches' ? name :
62+ refType === 'converts' ? `[convert] ${ name } ` :
63+ refType === 'tags' ? `[tag] ${ name } ` :
64+ `[pr] ${ name } `
65+ // remove refs/heads/ from the ref name
66+ // e.g. refs/heads/main -> main
67+ const fixedRef = refType === 'branches' ? ref . replace ( / r e f s \/ h e a d s \/ / , '' ) : ref
68+ const branchSourceId = `${ url . origin } /datasets/${ url . repo } /${ url . kind === 'file' ? 'blob' : 'tree' } /${ fixedRef } ${ url . path } `
69+ return {
70+ label,
71+ sourceId : branchSourceId ,
72+ }
73+ } ) ,
74+ }
75+ }
5676 if ( url . kind === 'file' ) {
5777 return {
5878 kind : 'file' ,
@@ -61,6 +81,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
6181 fileName : getFileName ( url . path ) ,
6282 resolveUrl : url . resolveUrl ,
6383 requestInit : options ?. requestInit ,
84+ fetchVersions,
6485 }
6586 } else {
6687 return {
@@ -69,6 +90,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
6990 sourceParts : getSourceParts ( url ) ,
7091 prefix : getPrefix ( url ) ,
7192 listFiles : ( ) => fetchFilesList ( url , options ) ,
93+ fetchVersions,
7294 }
7395 }
7496 } catch ( e ) {
@@ -169,3 +191,65 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
169191
170192 throw new Error ( 'Unsupported Hugging Face URL' )
171193}
194+
195+ interface RefResponse {
196+ name : string ;
197+ ref : string ;
198+ targetCommit : string ;
199+ }
200+
201+ export const refTypes = [
202+ 'branches' ,
203+ 'tags' ,
204+ 'converts' ,
205+ 'pullRequests' ,
206+ ] as const
207+ type RefType = ( typeof refTypes ) [ number ] ;
208+ type RefsResponse = Partial < Record < RefType , RefResponse [ ] > > ;
209+
210+ export interface RefMetadata extends RefResponse {
211+ refType : RefType ; // TODO(SL): use it to style the refs differently?
212+ }
213+
214+ /**
215+ * List refs in a HF dataset repo
216+ *
217+ * Example API URL: https://huggingface.co/api/datasets/codeparrot/github-code/refs
218+ *
219+ * @param repo (namespace/repo)
220+ * @param [options]
221+ * @param [options.requestInit] - request init object to pass to fetch
222+ * @param [options.accessToken] - access token to use for authentication
223+ *
224+ * @returns the list of branches, tags, pull requests, and converts
225+ */
226+ export async function fetchRefsList (
227+ repo : string ,
228+ options ?: { requestInit ?: RequestInit , accessToken ?: string } ,
229+ ) : Promise < RefMetadata [ ] > {
230+ if ( options ?. accessToken && ! options . accessToken . startsWith ( 'hf_' ) ) {
231+ throw new TypeError ( 'Your access token must start with \'hf_\'' )
232+ }
233+ const headers = new Headers ( options ?. requestInit ?. headers )
234+ headers . set ( 'accept' , 'application/json' )
235+ if ( options ?. accessToken ) {
236+ headers . set ( 'Authorization' , `Bearer ${ options . accessToken } ` )
237+ }
238+ const response = await fetch ( `https://huggingface.co/api/datasets/${ repo } /refs` , { ...options ?. requestInit , headers } )
239+ if ( ! response . ok ) {
240+ throw new Error ( `HTTP error ${ response . status . toString ( ) } ` )
241+ }
242+ const refsByType = await response . json ( ) as RefsResponse
243+ return refTypes . flatMap ( ( refType ) => {
244+ const refResponse = refsByType [ refType ]
245+ if ( ! refResponse ) {
246+ return [ ]
247+ }
248+ return refResponse . map ( ( refResponse ) => {
249+ return {
250+ refType,
251+ ...refResponse ,
252+ }
253+ } )
254+ } )
255+ }
0 commit comments