Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Eric Simon
fso_metadata_fork
Commits
dc4bb4fc
Commit
dc4bb4fc
authored
Oct 20, 2021
by
Pauline Maury Laribière
Browse files
update while doing the R library
parent
3d571861
Changes
8
Hide whitespace changes
Inline
Side-by-side
fso_metadata/README.md
View file @
dc4bb4fc
# Metadata Auto
# Metadata Auto
Python Library
## Introduction
...
...
fso_metadata/api_call.py
View file @
dc4bb4fc
...
...
@@ -3,7 +3,7 @@ from typing import Type, Union
import
pandas
as
pd
from
api_class
es
import
Api
from
api_class
import
Api
from
constants
import
DCAT_URL
from
utils
import
stringify_filters
...
...
@@ -40,7 +40,7 @@ def get_codelist(
version_format
=
version_format
,
parameters
=
f
'annotations=
{
str
(
annotations
).
lower
()
}
'
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_content_configuration
()
->
dict
:
...
...
@@ -50,7 +50,7 @@ def get_content_configuration() -> dict:
- response (dict): the configured content's display information
'''
api
=
Api
(
api_type
=
'content_configuration'
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_identifier_content
(
identifier
)
->
dict
:
...
...
@@ -62,7 +62,7 @@ def get_identifier_content(identifier) -> dict:
- response (dict): the nomenclature's information
'''
api
=
Api
(
api_type
=
'content_configuration_identifier'
,
_id
=
identifier
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_dataset_description
(
identifier
:
str
,
language
:
str
=
'fr'
)
->
dict
:
...
...
@@ -80,7 +80,7 @@ def get_dataset_description(identifier: str, language: str = 'fr') -> dict:
_id
=
identifier
,
language
=
language
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_dataset_information
(
identifier
:
str
,
language
:
str
=
'fr'
)
->
dict
:
...
...
@@ -98,7 +98,7 @@ def get_dataset_information(identifier: str, language: str = 'fr') -> dict:
_id
=
identifier
,
language
=
language
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_data_structure
(
identifier
:
str
,
language
:
str
=
'fr'
)
->
dict
:
...
...
@@ -111,8 +111,8 @@ def get_data_structure(identifier: str, language: str = 'fr') -> dict:
Returns:
- response: datastructure dictionnary
'''
api
=
Api
(
api_type
=
'data_structure'
,
_id
=
identifier
,
language
=
language
)
return
api
.
api_call
()
api
=
Api
(
api_type
=
'
dcat_
data_structure'
,
_id
=
identifier
,
language
=
language
)
return
api
.
get_response
()
def
get_nomenclature_path_nodes
(
...
...
@@ -138,7 +138,7 @@ def get_nomenclature_path_nodes(
language
=
language
,
parameters
=
filters
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_nomenclature_one_level
(
...
...
@@ -173,7 +173,7 @@ def get_nomenclature_one_level(
parameters
=
parameters
,
export_format
=
'CSV'
)
return
api
.
api_call
()
return
api
.
get_response
()
def
get_nomenclature_multiple_levels
(
...
...
@@ -212,7 +212,7 @@ def get_nomenclature_multiple_levels(
parameters
=
parameters
,
export_format
=
'CSV'
)
df
=
api
.
api_call
()
df
=
api
.
get_response
()
# Post-processing:
# fill sub groups rows with parent group's values (instead of NaN)
...
...
@@ -260,7 +260,7 @@ def query_nomenclature(
parameters
=
parameters
)
return
api
.
api_call
()
return
api
.
get_response
()
###############################################################
...
...
@@ -274,7 +274,7 @@ def dcat_list_all_agents():
- response (dict): all agents
'''
api
=
Api
(
api_type
=
'agents_list'
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_agent_from_id
(
agent_id
):
...
...
@@ -286,7 +286,7 @@ def dcat_get_agent_from_id(agent_id):
- response (dict): agent with this id
'''
api
=
Api
(
api_type
=
'agent_id'
,
_id
=
agent_id
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_list_all_datasets
():
...
...
@@ -296,7 +296,7 @@ def dcat_list_all_datasets():
- response (dict): list of all datasets
'''
api
=
Api
(
api_type
=
'dataset_list'
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_distributions_from_dataset_id
(
dataset_id
:
str
):
...
...
@@ -312,7 +312,7 @@ def dcat_get_distributions_from_dataset_id(dataset_id: str):
_id
=
dataset_id
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_dataset_from_id
(
dataset_id
:
str
):
...
...
@@ -324,7 +324,7 @@ def dcat_get_dataset_from_id(dataset_id: str):
- response (dict): the dataset
'''
api
=
Api
(
api_type
=
'dataset_id'
,
_id
=
dataset_id
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_dataset_from_identifier
(
identifier
:
str
):
...
...
@@ -336,7 +336,7 @@ def dcat_get_dataset_from_identifier(identifier: str):
- response (dict): the dataset
'''
api
=
Api
(
api_type
=
'dataset_identifier'
,
_id
=
identifier
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_distributions_from_dataset_identifier
(
identifier
:
str
):
...
...
@@ -353,7 +353,7 @@ def dcat_get_distributions_from_dataset_identifier(identifier: str):
_id
=
identifier
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_list_all_distributions
():
...
...
@@ -363,7 +363,7 @@ def dcat_list_all_distributions():
- response (dict): all distributions
'''
api
=
Api
(
api_type
=
'distributions_list'
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
def
dcat_get_distribution_from_id
(
distribution_id
:
str
):
...
...
@@ -376,7 +376,7 @@ def dcat_get_distribution_from_id(distribution_id: str):
'''
api
=
Api
(
api_type
=
'distribution_id'
,
_id
=
'
distribution_id
'
,
_id
=
distribution_id
,
root_url
=
DCAT_URL
)
return
api
.
api_call
()
return
api
.
get_response
()
fso_metadata/api_class
es
.py
→
fso_metadata/api_class.py
View file @
dc4bb4fc
...
...
@@ -2,8 +2,7 @@ from typing import Union
import
pandas
as
pd
from
format_call
import
OUTPUT_FUNCTION_MAPPING
from
constants
import
BASE_URL
from
constants
import
BASE_URL
,
REQUEST_FUNCTION_MAPPING
class
Api
():
...
...
@@ -42,13 +41,10 @@ class Api():
self
.
export_format
=
export_format
self
.
parameters
=
parameters
self
.
_id
=
_id
self
.
version
=
version_format
self
.
language
=
language
self
.
path
=
path
self
.
api_url
=
get_url
(
api_type
,
self
)
self
.
api_url
=
self
.
get_url
(
api_type
,
_id
,
version_format
,
language
,
path
)
def
api_call
(
self
)
->
Union
[
dict
,
pd
.
DataFrame
]:
def
get_response
(
self
)
->
Union
[
dict
,
pd
.
DataFrame
]:
'''
Depending on the expected output, call the api appropriately
Returns:
...
...
@@ -56,59 +52,65 @@ class Api():
- a pd.DataFrame if export_format was CSV or XLSX
- a dictionnary if export_format was SDMX-ML or SDMX-JSON.
'''
request_function
=
OUTPU
T_FUNCTION_MAPPING
[
self
.
export_format
]
request_function
=
REQUES
T_FUNCTION_MAPPING
[
self
.
export_format
]
return
request_function
(
f
'
{
self
.
root_url
}
/api/
{
self
.
api_url
}
'
,
self
.
parameters
f
'
{
self
.
root_url
}
/api/
{
self
.
api_url
}
'
,
self
.
parameters
)
def
get_url
(
api_type
,
self
):
'''
Get the url call based on the api type and parameters
'''
url_mapping
=
{
# i14y
'codelist'
:
f
'CodeLists/
{
self
.
_id
}
/exports/
{
self
.
export_format
}
/
{
self
.
version
}
'
,
'content_configuration'
:
'ContentConfigurations'
,
'content_configuration_identifier'
:
f
'ContentConfigurations/
{
self
.
_id
}
'
,
'dcat_dataset_description'
:
f
'Datasets/
{
self
.
_id
}
/
{
self
.
language
}
/description'
,
'dcat_dataset_information'
:
f
'Datasets/
{
self
.
_id
}
/
{
self
.
language
}
/distributions'
,
'data_structure'
:
f
'DataStructures/
{
self
.
_id
}
/
{
self
.
language
}
'
,
'nomenclature_path_nodes'
:
f
'Nomenclatures/Childnodes/
{
self
.
_id
}
/
{
self
.
language
}
/
{
self
.
path
}
'
,
'nomenclature_one_level'
:
f
'Nomenclatures/
{
self
.
_id
}
/levelexport/CSV'
,
'nomenclature_multiple_levels'
:
f
'Nomenclatures/
{
self
.
_id
}
/multiplelevels/CSV'
,
'nomenclature_search'
:
f
'Nomenclatures/
{
self
.
_id
}
/search'
,
# dcat
'agents_list'
:
'Agent'
,
'agent_id'
:
f
'Agent/
{
self
.
_id
}
'
,
'dataset_list'
:
'Dataset'
,
'dataset_id_distributions'
:
f
'Dataset/
{
self
.
_id
}
/distributions'
,
'dataset_id'
:
f
'Dataset/
{
self
.
_id
}
'
,
'dataset_identifier'
:
f
'Datataset/identifier/
{
self
.
_id
}
'
,
'dataset_identifier_distributions'
:
f
'Datataset/identifier/
{
self
.
_id
}
/distributions'
,
'distributions_list'
:
f
'Distribution'
,
'distribution_id'
:
f
'Distribution/
{
self
.
_id
}
'
}
return
url_mapping
[
api_type
]
def
get_url
(
self
,
api_type
:
str
,
_id
:
str
,
version_format
:
float
,
language
:
str
,
path
:
str
):
'''
Get the url call based on the api type and parameters
'''
url_mapping
=
{
# i14y
'codelist'
:
f
'CodeLists/
{
_id
}
/exports/
{
self
.
export_format
}
/
{
version_format
}
'
,
'content_configuration'
:
'ContentConfigurations'
,
'content_configuration_identifier'
:
f
'ContentConfigurations/
{
_id
}
'
,
'dcat_dataset_description'
:
f
'Datasets/
{
_id
}
/
{
language
}
/description'
,
'dcat_dataset_information'
:
f
'Datasets/
{
_id
}
/
{
language
}
/distributions'
,
'dcat_data_structure'
:
f
'DataStructures/
{
_id
}
/
{
language
}
'
,
'nomenclature_path_nodes'
:
f
'Nomenclatures/Childnodes/
{
_id
}
/
{
language
}
/
{
path
}
'
,
'nomenclature_one_level'
:
f
'Nomenclatures/
{
_id
}
/levelexport/CSV'
,
'nomenclature_multiple_levels'
:
f
'Nomenclatures/
{
_id
}
/multiplelevels/CSV'
,
'nomenclature_search'
:
f
'Nomenclatures/
{
_id
}
/search'
,
# dcat
'agents_list'
:
'Agent'
,
'agent_id'
:
f
'Agent/
{
_id
}
'
,
'dataset_list'
:
'Dataset'
,
'dataset_id_distributions'
:
f
'Dataset/
{
_id
}
/distributions'
,
'dataset_id'
:
f
'Dataset/
{
_id
}
'
,
'dataset_identifier'
:
f
'Datataset/identifier/
{
_id
}
'
,
'dataset_identifier_distributions'
:
f
'Datataset/identifier/
{
_id
}
/distributions'
,
'distributions_list'
:
f
'Distribution'
,
'distribution_id'
:
f
'Distribution/
{
_id
}
'
}
return
url_mapping
[
api_type
]
fso_metadata/constants.py
View file @
dc4bb4fc
from
format_request
import
json_request
,
sdmx_request
,
csv_request
# Root URL constants
BASE_URL
=
'https://www.i14y.admin.ch'
DCAT_URL
=
'https://dcat.app.cfap02.atlantica.admin.ch'
# Map the appropriate function based on the output type
REQUEST_FUNCTION_MAPPING
=
{
'JSON'
:
json_request
,
'SDMX-JSON'
:
json_request
,
'SDMX-ML'
:
sdmx_request
,
'CSV'
:
csv_request
}
\ No newline at end of file
fso_metadata/examples.ipynb
View file @
dc4bb4fc
%% Cell type:markdown id:fcad4b17 tags:
# Example notebook
In this notebook, we show one example per possible API call.
%% Cell type:code id:a529fab5-af2f-4439-b98e-13d814b00a94 tags:
```
python
import
fso_metadata
```
%% Cell type:code id:486bc684-f4a1-4b26-80ff-c156c51fdb97 tags:
```
python
# from api_call import (
from
fso_metadata
import
(
dcat_get_agent_from_id
,
dcat_get_dataset_from_id
,
dcat_get_dataset_from_identifier
,
dcat_get_distributions_from_dataset_id
,
dcat_get_distributions_from_dataset_identifier
,
dcat_get_distribution_from_id
,
dcat_list_all_agents
,
dcat_list_all_datasets
,
dcat_list_all_distributions
,
get_codelist
,
get_content_configuration
,
get_data_structure
,
get_dataset_description
,
get_dataset_information
,
get_identifier_content
,
get_nomenclature_path_nodes
,
get_nomenclature_one_level
,
get_nomenclature_multiple_levels
,
query_nomenclature
)
```
%%%% Output: stream
/opt/conda/lib/python3.9/site-packages/pandasdmx/remote.py:11: RuntimeWarning: optional dependency requests_cache is not installed; cache options to Session() have no effect
warn(
%% Cell type:markdown id:94312182-0616-4938-8d82-d666611bf64d tags:
## Available everywhere with the interoperability plateform (i14y)
%% Cell type:markdown id:bdd766a5-c013-449c-9fd4-7356835396af tags:
[
i14y Swagger UI
](
https://www.i14y.admin.ch/api/index.html
)
%% Cell type:markdown id:446b07a4 tags:
### Code List
%% Cell type:code id:317c3e55 tags:
```
python
# Get a codelist pd.Serie based on an identifier
codelist
=
get_codelist
(
identifier
=
'CL_NOGA_SECTION'
,
export_format
=
"SDMX-ML"
,
version_format
=
2.1
,
annotations
=
True
)
codelist
.
head
(
5
)
```
%%%% Output: execute_result
CL_NOGA_SECTION
A AGRICULTURE, FORESTRY AND FISHING
B MINING AND QUARRYING
C MANUFACTURING
D ELECTRICITY, GAS, STEAM AND AIR-CONDITIONING S...
E WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND R...
Name: NOGA Section, dtype: object
%% Cell type:markdown id:b0029468 tags:
### Content Creation
%% Cell type:code id:5bf187c8 tags:
```
python
# Get the display information for the available configured content
content
=
get_content_configuration
()
content
```
%%%% Output: execute_result
[{'default': True,
'identifier': 'HCL_CH_ISCO_19_PROF',
'items': [],
'label': 'CH-ISCO-19',
'skipRoute': False}]
%% Cell type:code id:88581bea tags:
```
python
# Get a nomenclature information based on its identifier
identifier_content
=
get_identifier_content
(
identifier
=
'HCL_CH_ISCO_19_PROF'
)
identifier_content
```
%%%% Output: execute_result
{'agencyIdentifier': 'FSO',
'controllerName': 'Nomenclatures',
'descriptionIdentifier': 'HCL_CH_ISCO_19_PROF',
'exportFormats': ['CSV', 'XLSX'],
'exportLanguages': ['de', 'fr', 'it', 'en', 'rm'],
'exportLevels': ['1', '2', '3', '4', '5', '6'],
'exportTypes': {'Single': 'levelexport', 'Multi': 'multilevels'},
'filters': [{'identifier': 'AF_ACTIVE', 'values': ['0', '1']},
{'identifier': 'AFC_ISCO_REDUCED_LIST', 'values': ['1']},
{'identifier': 'AFC_ISCO_DUPLICATE', 'values': ['0']},
{'identifier': 'AF_LEARNED_OR_PRACTICED', 'values': ['1', '2']},
{'identifier': 'AF_AVAM', 'values': ['1']}],
'hasAnnotations': True,
'identifier': 'HCL_CH_ISCO_19_PROF',
'type': 'Nomenclature'}
%% Cell type:markdown id:42d2d24c-3e0b-40b6-a4b3-5e409345b449 tags:
### Datasets
%% Cell type:code id:b1d312b6-4b66-41aa-8d2e-4441e803ba07 tags:
```
python
# Get the dcat dataset description
dataset_description
=
get_dataset_description
(
identifier
=
'HCL_NOGA'
,
language
=
'de'
)
dataset_description
[
'contactPoint'
][
0
]
```
%%%% Output: execute_result
{'adrWork': {'cultureCode': 'de',
'text': "Unternehmensregisterdaten URD\nEspace de l'Europe 10\nCH-2010 Neuchâtel\nSchweiz"},
'child': 0,
'emailInternet': 'noga@bfs.admin.ch',
'fn': {'cultureCode': 'de', 'text': 'Bundesamt für Statistik'},
'note': {'cultureCode': 'de',
'text': 'Von Montag bis Freitag\n8.30–11.30 Uhr und 14.00–16.00 Uhr'},
'org': {'cultureCode': None, 'text': None},
'telWorkVoice': '+41 58 463 65 23'}
%% Cell type:code id:36e4a8c2-097f-4c1d-9dc3-b1c86f855ffa tags:
```
python
# Get the dcat dataset information
dataset_information
=
get_dataset_information
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
language
=
'fr'
)
dataset_information
[
0
][
'accessUrl'
]
```
%%%% Output: execute_result
[{'href': 'https://www.i14y.admin.ch/api/nomenclatures/HCL_CH_ISCO_19_PROF/levelexport/XLSX?level=6&annotations=true',
'label': 'https://www.i14y.admin.ch/api/nomenclatures/HCL_CH_ISCO_19_PROF/levelexport/XLSX?level=6&annotations=true'}]
%% Cell type:markdown id:c945eee9-8908-4012-b022-af419d5999b9 tags:
### Data Structures
%% Cell type:code id:56e92700-881f-48af-81d4-1ed622b87400 tags:
```
python
# Get the data structure
data_structure
=
get_data_structure
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
language
=
'it'
)
data_structure
```
%%%% Output: execute_result
{'type': 'https://httpstatuses.com/404',
'title': 'Not Found',
'status': 404,
'detail': 'DataStructure with type Nomenclature and identifiers HCL_CH_ISCO_19_PROF/HR_CH_ISCO_19_PROF is not supported.',
'traceId': '|
f51d8358-4dd7c2b42061dbb5
.'}
'traceId': '|
6d3c2cd5-4ee6924f4e8dca24
.'}
%% Cell type:markdown id:99f3ee98 tags:
### Nomenclature
%% Cell type:code id:ddd5eb9f tags:
```
python
# Get the nodes of a path within a nomenclature, add filters to get more specific results
filters
=
{
'code'
:
False
}
# TODO: ask what filters are and how they work
path_nodes
=
get_nomenclature_path_nodes
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
path
=
'.'
,
filters
=
filters
,
language
=
'fr'
)
path_nodes
```
%%%% Output: execute_result
[{'annotations': [],
'code': '0',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions militaires'}},
{'annotations': [],
'code': '1',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Directeurs, cadres de direction et gérants'}},
{'annotations': [],
'code': '2',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Professions intellectuelles et scientifiques'}},
{'annotations': [],
'code': '3',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions intermédiaires'}},
{'annotations': [],
'code': '4',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Employés de type administratif'}},
{'annotations': [],
'code': '5',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Personnel des services directs aux particuliers, commerçants et vendeurs'}},
{'annotations': [],
'code': '6',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Agriculteurs et ouvriers qualifiés de l’agriculture, de la sylviculture et de la pêche'}},
{'annotations': [],
'code': '7',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Métiers qualifiés de l’industrie et de l’artisanat'}},
{'annotations': [],
'code': '8',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Conducteurs d’installations et de machines, et ouvriers de l’assemblage'}},
{'annotations': [],
'code': '9',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions élémentaires'}}]
%% Cell type:code id:dbc2f301 tags:
```
python
# Get one level of a nomenclature in a dataframe
filters
=
{
'code'
:
'1'
}
# TODO: ask what filters are and how they work
filters
=
{
'code'
:
[
'1'
]
}
# TODO: ask what filters are and how they work
single_level
=
get_nomenclature_one_level
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
filters
=
filters
,
level_number
=
2
,
language
=
'fr'
,
annotations
=
False
)
single_level
.
head
()
```
%%%% Output: execute_result
Code Parent Name_fr
0 01 0 Officiers des forces armées
1 02 0 Sous-officiers des forces armées
2 03 0 Autres membres des forces armées
3 10 1 Directeurs, cadres de direction et gérants, sip
4 11 1 Directeurs généraux, cadres supérieurs et memb...
%% Cell type:code id:94499315 tags:
```
python
# Get multiple levels of a nomenclature in a dataframe
filters
=
{
'code'
:
'1'
}
# TODO: ask what filters are and how they work
filters
=
{
'code'
:
[
'1'
]
}
multiple_levels
=
get_nomenclature_multiple_levels
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
level_from
=
1
,
level_to
=
4
,
filters
=
filters
,
language
=
'en'
,
annotations
=
True
)
multiple_levels
.
head
(
5
)
```
%%%% Output: execute_result
Major_groups Sub-major_groups Minor_groups Unit_groups Code Name_en
0 0 NaN NaN NaN 0 NaN
1 0 01 NaN NaN 01 NaN
2 0 01 011 NaN 011 NaN
3 0 01 011 0110 0110 NaN
4 0 02 011 0110 02 NaN
%% Cell type:code id:7fa4060a tags:
```
python
# Search a query within a nomenclature
filters
=
{
'code'
:
'1'
}
# TODO: ask what filters are and how they work
filters
=
{
'code'
:
[
'1'
]
}
query_result
=
query_nomenclature
(
identifier
=
'HCL_CH_ISCO_19_PROF'
,
query
=
'SELECT *'
,
page_number
=
5
,
page_size
=
2
,
filters
=
filters
,
language
=
'fr'
)
query_result
[
0
][
'annotations'
][
0
]
```
%%%% Output: execute_result
{'identifier': None,
'text': {'cultureCode': 'fr', 'text': 'Divisionnaire'},