Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Learn Renku
Teaching on Renku
Advanced teaching automation
Commits
aad6e6a1
Commit
aad6e6a1
authored
Mar 11, 2022
by
Cyril Matthey-Doret
Browse files
add fork collection scripts
parent
db3fcdae
Pipeline
#327043
passed with stage
in 14 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
notebooks/.gitkeep
deleted
100644 → 0
View file @
db3fcdae
scripts/clone_forks_from_json.sh
0 → 100644
View file @
aad6e6a1
#!/usr/bin/env bash
# Reads JSON output from collect_forks.py (either as a file or in stdin)
# and clone all target forks into provided directory at the target commit.
# usage ./clone_forks_from_json.sh OUT_DIR forks.json
# ./collect_forks.py --token token.asc URL | ./clone_forks_from_json.sh OUT_DIR
# Help message
function
usage
()
{
cat
<<
EOF
Usage:
$(
basename
$0
)
[outdir] [in_file.json]
./collect_forks.py --token token.asc URL |
$(
basename
$0
)
[outdir]
Reads json output from collect_forks.py and clone all target forks into provided directory
at the target commit. Repositories are cloned into outdir/namespace.
Arguments:
outdir: Directory where all forks will be cloned [default: .]
in_file.json: JSON output of collect_forks.py containing fork metadata [default: stdin]
EOF
exit
0
}
# Parsing CL arguments
OUT_DIR
=
${
1
:-
.
}
JSON
=
${
2
:-
/dev/stdin
}
if
[[
$#
-gt
2
]]
||
[[
$1
==
'-h'
]]
||
[[
$1
==
'--help'
]]
;
then
usage
fi
mkdir
-p
${
OUT_DIR
}
jq
".[] |
\"
git clone
\(
.url)
$OUT_DIR
/
\(
.group) && cd
${
OUT_DIR
}
/
\(
.group) && git checkout
\(
.commit)
\"
"
\
<
${
JSON
}
\
| xargs
-L
1
-I
{}
sh
-c
"{}"
scripts/collect_forks.py
0 → 100644
View file @
aad6e6a1
#!/use/bin/env python3
# Collect all group-owned forks of a given project using Gitlab API
# Forks metadata is sent to stdout and contains the following fields:
# - id (gitlab identifier)
# - url
# - group
# - members (username, full name and email of each member)
# - commit (the last commit before specified deadline)
# - autostart url (the URL to start a renku session at the last pre-deadline commit_
import
re
from
typing
import
Tuple
,
List
,
Dict
,
Optional
import
json
import
requests
import
click
from
datetime
import
datetime
import
pytz
# Replace URL
def
validate_iso_date
(
date
:
str
)
->
str
:
"""Check that input string is in ISO-8601 format and
keep only year, month and day informations"""
try
:
_
=
datetime
.
fromisoformat
(
date
)
except
ValueError
:
raise
ValueError
(
"Deadline must be in ISO-8601 format."
)
def
parse_repo_url
(
url
:
str
)
->
Tuple
[
str
,
str
,
str
]:
"""Decompose a full repo URL into 3 parts:
- the organization base URL
- the namespace (i.e. groups and subgroups
- the name of the repository
Examples
--------
>>> parse_repo_url('https://org.com/group/subgroup/repo')
('https://org.com', 'group/subgroup', 'repo')
>>> parse_repo_url('https://org.com/gitlab/group/repo')
('https://org.com/gitlab', 'group', 'repo')
"""
regex
=
re
.
compile
(
(
"(?P<base>https://[^/]*(/gitlab|/projects)?)/"
"(?P<namespace>([^/]*/)*)"
"(?P<repo>[^/]*)$"
),
re
.
IGNORECASE
,
)
captured
=
re
.
match
(
regex
,
url
).
groupdict
()
base
,
namespace
,
repo
=
[
captured
[
group
]
for
group
in
[
"base"
,
"namespace"
,
"repo"
]]
namespace
=
namespace
.
strip
(
"/"
)
return
base
,
namespace
,
repo
def
get_project_id
(
project_url
:
str
,
header
=
Dict
[
str
,
str
])
->
int
:
"""Given a project's URL, return it's gitlab ID"""
base
,
namespace
,
repo
=
parse_repo_url
(
project_url
)
project
=
[]
page
=
1
while
not
len
(
project
):
resp
=
requests
.
get
(
f
"
{
base
}
/api/v4/projects?search=
{
repo
}
&per_page=100&page=
{
page
}
"
,
headers
=
header
,
)
if
resp
.
ok
:
resp
=
resp
.
json
()
else
:
resp
.
raise_for_status
()
page
+=
1
project
=
[
p
for
p
in
resp
if
p
[
"path_with_namespace"
]
==
f
"
{
namespace
}
/
{
repo
}
"
]
if
len
(
list
(
project
))
>
1
:
raise
ValueError
(
"More than one project matched input url"
)
return
project
[
0
][
"id"
]
def
collect_forks
(
project_url
:
str
,
header
=
Dict
[
str
,
str
])
->
List
[
Dict
]:
"""Retrieve the metadata from all forks of input project"""
base
,
namespace
,
repo
=
parse_repo_url
(
project_url
)
upstream_id
=
get_project_id
(
project_url
,
header
)
# Collect all forks
page
=
1
forks
=
[]
has_content
=
True
while
has_content
:
new_forks
=
requests
.
get
(
f
"
{
base
}
/api/v4/projects/
{
upstream_id
}
/forks?per_page=100&page=
{
page
}
"
,
headers
=
header
,
).
json
()
has_content
=
len
(
new_forks
)
>
0
forks
+=
new_forks
page
+=
1
return
forks
def
filter_group_forks
(
forks
:
List
[
Dict
])
->
List
[
Dict
]:
"""Given a list of forks' metadata, only keep those that belong to a group
Examples
--------
>>> d1 = {'id': 1, 'namespace': {'kind': 'user'}}
>>> d2 = {'id': 2, 'namespace': {'kind': 'group'}}
>>> filter_group_forks([d1, d2]) == [d2]
True
"""
return
[
f
for
f
in
forks
if
f
[
"namespace"
][
"kind"
]
==
"group"
]
def
get_last_commit_hash
(
project_id
:
int
,
base_url
:
str
,
header
:
Dict
[
str
,
str
],
deadline
:
Optional
[
str
]
=
None
,
)
->
str
:
"""Get the hash of the last commit before a deadline. The deadline must
be a valid ISO-8601 date time."""
# There is an 'until' option in the commits API, but it seems bugged...
commits
=
requests
.
get
(
f
"
{
base_url
}
/api/v4/projects/
{
project_id
}
/repository/commits"
,
headers
=
header
,
).
json
()
# Commits are sorted in reverse chronological order by default
# Assumes we're in UTC
utc
=
pytz
.
UTC
loc_deadline
=
utc
.
localize
(
datetime
.
fromisoformat
(
deadline
))
for
commit
in
commits
:
commit_date
=
datetime
.
fromisoformat
(
commit
[
"authored_date"
])
if
(
deadline
is
None
)
or
(
commit_date
<=
loc_deadline
):
return
commit
[
"id"
]
return
None
def
format_fork_metadata
(
fork
:
Dict
,
header
:
Dict
[
str
,
str
],
deadline
:
Optional
[
str
]
=
None
)
->
Dict
:
"""Format and add fields to a fork's metadata. The
resulting metadata will have the following fields:
id, http_url_to_repo, autostart_url, commit, members, group"""
meta
=
{
"id"
:
fork
[
"id"
],
"url"
:
fork
[
"http_url_to_repo"
],
"group"
:
fork
[
"namespace"
][
"full_path"
],
}
# Additional API query to retrieve members metadata
members_url
=
fork
[
"_links"
][
"members"
]
members
=
requests
.
get
(
members_url
+
"/all"
,
headers
=
header
).
json
()
# Only retain relevant user fields
member_keys
=
(
"username"
,
"name"
,
"email"
)
meta
[
"members"
]
=
[
{
key
:
member
.
get
(
key
)
for
key
in
member_keys
}
for
member
in
members
]
base
,
namespace
,
repo
=
parse_repo_url
(
meta
[
"url"
])
# Get the last commit before deadline
meta
[
"commit"
]
=
get_last_commit_hash
(
meta
[
"id"
],
base
,
header
,
deadline
)
# Build a renku autostart url using the commit hash
autostart_url
=
(
f
"
{
re
.
sub
(
r
'/gitlab.*$'
,
''
,
base
)
}
"
f
"/projects/
{
namespace
}
/
{
repo
.
removesuffix
(
'.git'
)
}
/sessions/"
f
"new?autostart=1&commit=
{
meta
[
'commit'
]
}
&branch=master"
)
meta
[
"autostart_url"
]
=
autostart_url
return
meta
@
click
.
command
()
@
click
.
option
(
"--deadline"
,
type
=
str
,
help
=
"ISO-8601 formatted date. Example: 2022-03-29T13:10:29"
)
@
click
.
argument
(
"repo_url"
,
type
=
str
)
@
click
.
option
(
"--token"
,
type
=
click
.
Path
(
exists
=
True
),
help
=
"Armored ASCII file containing the Gitlab API token. If not provided, you will be prompted for the token."
,
)
def
main
(
repo_url
,
token
,
deadline
=
None
):
if
token
is
None
:
token
=
click
.
prompt
(
"Please enter your Gitlab API token"
,
hide_input
=
True
)
else
:
token
=
open
(
token
).
read
().
strip
()
# Check for valid deadline format
if
deadline
is
not
None
:
validate_iso_date
(
deadline
)
# Get metadata of all forks from input project
header
=
{
"PRIVATE-TOKEN"
:
token
}
forks
=
collect_forks
(
repo_url
,
header
)
# Only keep those which belong to a group
forks
=
filter_group_forks
(
forks
)
# Reformat metadata for convenience
meta
=
map
(
lambda
f
:
format_fork_metadata
(
f
,
header
,
deadline
),
forks
)
print
(
json
.
dumps
(
list
(
meta
)))
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment