Commit dc4bb4fc authored by Pauline Maury Laribière's avatar Pauline Maury Laribière
Browse files

update while doing the R library

parent 3d571861
# Metadata Auto
# Metadata Auto Python Library
## Introduction
......
......@@ -3,7 +3,7 @@ from typing import Type, Union
import pandas as pd
from api_classes import Api
from api_class import Api
from constants import DCAT_URL
from utils import stringify_filters
......@@ -40,7 +40,7 @@ def get_codelist(
version_format = version_format,
parameters = f'annotations={str(annotations).lower()}'
)
return api.api_call()
return api.get_response()
def get_content_configuration() -> dict:
......@@ -50,7 +50,7 @@ def get_content_configuration() -> dict:
- response (dict): the configured content's display information
'''
api = Api(api_type = 'content_configuration')
return api.api_call()
return api.get_response()
def get_identifier_content(identifier) -> dict:
......@@ -62,7 +62,7 @@ def get_identifier_content(identifier) -> dict:
- response (dict): the nomenclature's information
'''
api = Api(api_type = 'content_configuration_identifier', _id = identifier)
return api.api_call()
return api.get_response()
def get_dataset_description(identifier: str, language: str = 'fr') -> dict:
......@@ -80,7 +80,7 @@ def get_dataset_description(identifier: str, language: str = 'fr') -> dict:
_id = identifier,
language = language
)
return api.api_call()
return api.get_response()
def get_dataset_information(identifier: str, language: str = 'fr') -> dict:
......@@ -98,7 +98,7 @@ def get_dataset_information(identifier: str, language: str = 'fr') -> dict:
_id = identifier,
language = language
)
return api.api_call()
return api.get_response()
def get_data_structure(identifier: str, language: str = 'fr') -> dict:
......@@ -111,8 +111,8 @@ def get_data_structure(identifier: str, language: str = 'fr') -> dict:
Returns:
- response: datastructure dictionnary
'''
api = Api(api_type = 'data_structure', _id = identifier, language = language)
return api.api_call()
api = Api(api_type = 'dcat_data_structure', _id = identifier, language = language)
return api.get_response()
def get_nomenclature_path_nodes(
......@@ -138,7 +138,7 @@ def get_nomenclature_path_nodes(
language = language,
parameters = filters
)
return api.api_call()
return api.get_response()
def get_nomenclature_one_level(
......@@ -173,7 +173,7 @@ def get_nomenclature_one_level(
parameters = parameters,
export_format='CSV'
)
return api.api_call()
return api.get_response()
def get_nomenclature_multiple_levels(
......@@ -212,7 +212,7 @@ def get_nomenclature_multiple_levels(
parameters = parameters,
export_format='CSV'
)
df = api.api_call()
df = api.get_response()
# Post-processing:
# fill sub groups rows with parent group's values (instead of NaN)
......@@ -260,7 +260,7 @@ def query_nomenclature(
parameters = parameters
)
return api.api_call()
return api.get_response()
###############################################################
......@@ -274,7 +274,7 @@ def dcat_list_all_agents():
- response (dict): all agents
'''
api = Api(api_type = 'agents_list', root_url = DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_get_agent_from_id(agent_id):
......@@ -286,7 +286,7 @@ def dcat_get_agent_from_id(agent_id):
- response (dict): agent with this id
'''
api = Api(api_type = 'agent_id', _id = agent_id, root_url = DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_list_all_datasets():
......@@ -296,7 +296,7 @@ def dcat_list_all_datasets():
- response (dict): list of all datasets
'''
api = Api(api_type = 'dataset_list', root_url=DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_get_distributions_from_dataset_id(dataset_id: str):
......@@ -312,7 +312,7 @@ def dcat_get_distributions_from_dataset_id(dataset_id: str):
_id = dataset_id,
root_url = DCAT_URL
)
return api.api_call()
return api.get_response()
def dcat_get_dataset_from_id(dataset_id: str):
......@@ -324,7 +324,7 @@ def dcat_get_dataset_from_id(dataset_id: str):
- response (dict): the dataset
'''
api = Api(api_type = 'dataset_id', _id = dataset_id, root_url = DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_get_dataset_from_identifier(identifier: str):
......@@ -336,7 +336,7 @@ def dcat_get_dataset_from_identifier(identifier: str):
- response (dict): the dataset
'''
api = Api(api_type = 'dataset_identifier', _id = identifier, root_url=DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_get_distributions_from_dataset_identifier(identifier: str):
......@@ -353,7 +353,7 @@ def dcat_get_distributions_from_dataset_identifier(identifier: str):
_id = identifier,
root_url = DCAT_URL
)
return api.api_call()
return api.get_response()
def dcat_list_all_distributions():
......@@ -363,7 +363,7 @@ def dcat_list_all_distributions():
- response (dict): all distributions
'''
api = Api(api_type = 'distributions_list', root_url = DCAT_URL)
return api.api_call()
return api.get_response()
def dcat_get_distribution_from_id(distribution_id: str):
......@@ -376,7 +376,7 @@ def dcat_get_distribution_from_id(distribution_id: str):
'''
api = Api(
api_type = 'distribution_id',
_id = 'distribution_id',
_id = distribution_id,
root_url = DCAT_URL
)
return api.api_call()
return api.get_response()
......@@ -2,8 +2,7 @@ from typing import Union
import pandas as pd
from format_call import OUTPUT_FUNCTION_MAPPING
from constants import BASE_URL
from constants import BASE_URL, REQUEST_FUNCTION_MAPPING
class Api():
......@@ -42,13 +41,10 @@ class Api():
self.export_format = export_format
self.parameters = parameters
self._id = _id
self.version = version_format
self.language = language
self.path = path
self.api_url = get_url(api_type, self)
self.api_url = self.get_url(api_type, _id, version_format, language, path)
def api_call(self) -> Union[dict, pd.DataFrame]:
def get_response(self) -> Union[dict, pd.DataFrame]:
'''
Depending on the expected output, call the api appropriately
Returns:
......@@ -56,59 +52,65 @@ class Api():
- a pd.DataFrame if export_format was CSV or XLSX
- a dictionnary if export_format was SDMX-ML or SDMX-JSON.
'''
request_function = OUTPUT_FUNCTION_MAPPING[self.export_format]
request_function = REQUEST_FUNCTION_MAPPING[self.export_format]
return request_function(
f'{self.root_url}/api/{self.api_url}',
self.parameters
f'{self.root_url}/api/{self.api_url}', self.parameters
)
def get_url(api_type, self):
'''
Get the url call based on the api type and parameters
'''
url_mapping = {
# i14y
'codelist':
f'CodeLists/{self._id}/exports/{self.export_format}/{self.version}',
'content_configuration':
'ContentConfigurations',
'content_configuration_identifier':
f'ContentConfigurations/{self._id}',
'dcat_dataset_description':
f'Datasets/{self._id}/{self.language}/description',
'dcat_dataset_information':
f'Datasets/{self._id}/{self.language}/distributions',
'data_structure':
f'DataStructures/{self._id}/{self.language}',
'nomenclature_path_nodes':
f'Nomenclatures/Childnodes/{self._id}/{self.language}/{self.path}',
'nomenclature_one_level':
f'Nomenclatures/{self._id}/levelexport/CSV',
'nomenclature_multiple_levels':
f'Nomenclatures/{self._id}/multiplelevels/CSV',
'nomenclature_search':
f'Nomenclatures/{self._id}/search',
# dcat
'agents_list':
'Agent',
'agent_id':
f'Agent/{self._id}',
'dataset_list':
'Dataset',
'dataset_id_distributions':
f'Dataset/{self._id}/distributions',
'dataset_id':
f'Dataset/{self._id}',
'dataset_identifier':
f'Datataset/identifier/{self._id}',
'dataset_identifier_distributions':
f'Datataset/identifier/{self._id}/distributions',
'distributions_list':
f'Distribution',
'distribution_id':
f'Distribution/{self._id}'
}
return url_mapping[api_type]
def get_url(
self,
api_type: str,
_id: str,
version_format: float,
language: str,
path: str
):
'''
Get the url call based on the api type and parameters
'''
url_mapping = {
# i14y
'codelist':
f'CodeLists/{_id}/exports/{self.export_format}/{version_format}',
'content_configuration':
'ContentConfigurations',
'content_configuration_identifier':
f'ContentConfigurations/{_id}',
'dcat_dataset_description':
f'Datasets/{_id}/{language}/description',
'dcat_dataset_information':
f'Datasets/{_id}/{language}/distributions',
'dcat_data_structure':
f'DataStructures/{_id}/{language}',
'nomenclature_path_nodes':
f'Nomenclatures/Childnodes/{_id}/{language}/{path}',
'nomenclature_one_level':
f'Nomenclatures/{_id}/levelexport/CSV',
'nomenclature_multiple_levels':
f'Nomenclatures/{_id}/multiplelevels/CSV',
'nomenclature_search':
f'Nomenclatures/{_id}/search',
# dcat
'agents_list':
'Agent',
'agent_id':
f'Agent/{_id}',
'dataset_list':
'Dataset',
'dataset_id_distributions':
f'Dataset/{_id}/distributions',
'dataset_id':
f'Dataset/{_id}',
'dataset_identifier':
f'Datataset/identifier/{_id}',
'dataset_identifier_distributions':
f'Datataset/identifier/{_id}/distributions',
'distributions_list':
f'Distribution',
'distribution_id':
f'Distribution/{_id}'
}
return url_mapping[api_type]
from format_request import json_request, sdmx_request, csv_request
# Root URL constants
BASE_URL = 'https://www.i14y.admin.ch'
DCAT_URL = 'https://dcat.app.cfap02.atlantica.admin.ch'
# Map the appropriate function based on the output type
REQUEST_FUNCTION_MAPPING = {
'JSON': json_request,
'SDMX-JSON': json_request,
'SDMX-ML': sdmx_request,
'CSV': csv_request
}
\ No newline at end of file
%% Cell type:markdown id:fcad4b17 tags:
# Example notebook
In this notebook, we show one example per possible API call.
%% Cell type:code id:a529fab5-af2f-4439-b98e-13d814b00a94 tags:
``` python
import fso_metadata
```
%% Cell type:code id:486bc684-f4a1-4b26-80ff-c156c51fdb97 tags:
``` python
# from api_call import (
from fso_metadata import (
dcat_get_agent_from_id,
dcat_get_dataset_from_id,
dcat_get_dataset_from_identifier,
dcat_get_distributions_from_dataset_id,
dcat_get_distributions_from_dataset_identifier,
dcat_get_distribution_from_id,
dcat_list_all_agents,
dcat_list_all_datasets,
dcat_list_all_distributions,
get_codelist,
get_content_configuration,
get_data_structure,
get_dataset_description,
get_dataset_information,
get_identifier_content,
get_nomenclature_path_nodes,
get_nomenclature_one_level,
get_nomenclature_multiple_levels,
query_nomenclature
)
```
%%%% Output: stream
/opt/conda/lib/python3.9/site-packages/pandasdmx/remote.py:11: RuntimeWarning: optional dependency requests_cache is not installed; cache options to Session() have no effect
warn(
%% Cell type:markdown id:94312182-0616-4938-8d82-d666611bf64d tags:
## Available everywhere with the interoperability plateform (i14y)
%% Cell type:markdown id:bdd766a5-c013-449c-9fd4-7356835396af tags:
[i14y Swagger UI](https://www.i14y.admin.ch/api/index.html)
%% Cell type:markdown id:446b07a4 tags:
### Code List
%% Cell type:code id:317c3e55 tags:
``` python
# Get a codelist pd.Serie based on an identifier
codelist = get_codelist(identifier='CL_NOGA_SECTION', export_format="SDMX-ML", version_format=2.1, annotations=True)
codelist.head(5)
```
%%%% Output: execute_result
CL_NOGA_SECTION
A AGRICULTURE, FORESTRY AND FISHING
B MINING AND QUARRYING
C MANUFACTURING
D ELECTRICITY, GAS, STEAM AND AIR-CONDITIONING S...
E WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND R...
Name: NOGA Section, dtype: object
%% Cell type:markdown id:b0029468 tags:
### Content Creation
%% Cell type:code id:5bf187c8 tags:
``` python
# Get the display information for the available configured content
content = get_content_configuration()
content
```
%%%% Output: execute_result
[{'default': True,
'identifier': 'HCL_CH_ISCO_19_PROF',
'items': [],
'label': 'CH-ISCO-19',
'skipRoute': False}]
%% Cell type:code id:88581bea tags:
``` python
# Get a nomenclature information based on its identifier
identifier_content = get_identifier_content(identifier='HCL_CH_ISCO_19_PROF')
identifier_content
```
%%%% Output: execute_result
{'agencyIdentifier': 'FSO',
'controllerName': 'Nomenclatures',
'descriptionIdentifier': 'HCL_CH_ISCO_19_PROF',
'exportFormats': ['CSV', 'XLSX'],
'exportLanguages': ['de', 'fr', 'it', 'en', 'rm'],
'exportLevels': ['1', '2', '3', '4', '5', '6'],
'exportTypes': {'Single': 'levelexport', 'Multi': 'multilevels'},
'filters': [{'identifier': 'AF_ACTIVE', 'values': ['0', '1']},
{'identifier': 'AFC_ISCO_REDUCED_LIST', 'values': ['1']},
{'identifier': 'AFC_ISCO_DUPLICATE', 'values': ['0']},
{'identifier': 'AF_LEARNED_OR_PRACTICED', 'values': ['1', '2']},
{'identifier': 'AF_AVAM', 'values': ['1']}],
'hasAnnotations': True,
'identifier': 'HCL_CH_ISCO_19_PROF',
'type': 'Nomenclature'}
%% Cell type:markdown id:42d2d24c-3e0b-40b6-a4b3-5e409345b449 tags:
### Datasets
%% Cell type:code id:b1d312b6-4b66-41aa-8d2e-4441e803ba07 tags:
``` python
# Get the dcat dataset description
dataset_description = get_dataset_description(identifier='HCL_NOGA', language='de')
dataset_description['contactPoint'][0]
```
%%%% Output: execute_result
{'adrWork': {'cultureCode': 'de',
'text': "Unternehmensregisterdaten URD\nEspace de l'Europe 10\nCH-2010 Neuchâtel\nSchweiz"},
'child': 0,
'emailInternet': 'noga@bfs.admin.ch',
'fn': {'cultureCode': 'de', 'text': 'Bundesamt für Statistik'},
'note': {'cultureCode': 'de',
'text': 'Von Montag bis Freitag\n8.30–11.30 Uhr und 14.00–16.00 Uhr'},
'org': {'cultureCode': None, 'text': None},
'telWorkVoice': '+41 58 463 65 23'}
%% Cell type:code id:36e4a8c2-097f-4c1d-9dc3-b1c86f855ffa tags:
``` python
# Get the dcat dataset information
dataset_information = get_dataset_information(identifier='HCL_CH_ISCO_19_PROF', language='fr')
dataset_information[0]['accessUrl']
```
%%%% Output: execute_result
[{'href': 'https://www.i14y.admin.ch/api/nomenclatures/HCL_CH_ISCO_19_PROF/levelexport/XLSX?level=6&annotations=true',
'label': 'https://www.i14y.admin.ch/api/nomenclatures/HCL_CH_ISCO_19_PROF/levelexport/XLSX?level=6&annotations=true'}]
%% Cell type:markdown id:c945eee9-8908-4012-b022-af419d5999b9 tags:
### Data Structures
%% Cell type:code id:56e92700-881f-48af-81d4-1ed622b87400 tags:
``` python
# Get the data structure
data_structure = get_data_structure(identifier='HCL_CH_ISCO_19_PROF', language='it')
data_structure
```
%%%% Output: execute_result
{'type': 'https://httpstatuses.com/404',
'title': 'Not Found',
'status': 404,
'detail': 'DataStructure with type Nomenclature and identifiers HCL_CH_ISCO_19_PROF/HR_CH_ISCO_19_PROF is not supported.',
'traceId': '|f51d8358-4dd7c2b42061dbb5.'}
'traceId': '|6d3c2cd5-4ee6924f4e8dca24.'}
%% Cell type:markdown id:99f3ee98 tags:
### Nomenclature
%% Cell type:code id:ddd5eb9f tags:
``` python
# Get the nodes of a path within a nomenclature, add filters to get more specific results
filters = {'code': False} # TODO: ask what filters are and how they work
path_nodes = get_nomenclature_path_nodes(identifier='HCL_CH_ISCO_19_PROF', path='.', filters=filters, language='fr')
path_nodes
```
%%%% Output: execute_result
[{'annotations': [],
'code': '0',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions militaires'}},
{'annotations': [],
'code': '1',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Directeurs, cadres de direction et gérants'}},
{'annotations': [],
'code': '2',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Professions intellectuelles et scientifiques'}},
{'annotations': [],
'code': '3',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions intermédiaires'}},
{'annotations': [],
'code': '4',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Employés de type administratif'}},
{'annotations': [],
'code': '5',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Personnel des services directs aux particuliers, commerçants et vendeurs'}},
{'annotations': [],
'code': '6',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Agriculteurs et ouvriers qualifiés de l’agriculture, de la sylviculture et de la pêche'}},
{'annotations': [],
'code': '7',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Métiers qualifiés de l’industrie et de l’artisanat'}},
{'annotations': [],
'code': '8',
'hasChilds': True,
'name': {'cultureCode': 'fr',
'text': 'Conducteurs d’installations et de machines, et ouvriers de l’assemblage'}},
{'annotations': [],
'code': '9',
'hasChilds': True,
'name': {'cultureCode': 'fr', 'text': 'Professions élémentaires'}}]
%% Cell type:code id:dbc2f301 tags:
``` python
# Get one level of a nomenclature in a dataframe
filters = {'code': '1'} # TODO: ask what filters are and how they work
filters = {'code': ['1']} # TODO: ask what filters are and how they work
single_level = get_nomenclature_one_level(identifier='HCL_CH_ISCO_19_PROF', filters=filters, level_number=2, language='fr', annotations=False)
single_level.head()
```
%%%% Output: execute_result
Code Parent Name_fr
0 01 0 Officiers des forces armées
1 02 0 Sous-officiers des forces armées
2 03 0 Autres membres des forces armées
3 10 1 Directeurs, cadres de direction et gérants, sip
4 11 1 Directeurs généraux, cadres supérieurs et memb...
%% Cell type:code id:94499315 tags:
``` python
# Get multiple levels of a nomenclature in a dataframe
filters = {'code': '1'} # TODO: ask what filters are and how they work
filters = {'code': ['1']}
multiple_levels = get_nomenclature_multiple_levels(identifier='HCL_CH_ISCO_19_PROF', level_from=1, level_to=4, filters=filters, language='en', annotations=True)
multiple_levels.head(5)
```
%%%% Output: execute_result
Major_groups Sub-major_groups Minor_groups Unit_groups Code Name_en
0 0 NaN NaN NaN 0 NaN
1 0 01 NaN NaN 01 NaN
2 0 01 011 NaN 011 NaN
3 0 01 011 0110 0110 NaN
4 0 02 011 0110 02 NaN
%% Cell type:code id:7fa4060a tags:
``` python
# Search a query within a nomenclature
filters = {'code': '1'} # TODO: ask what filters are and how they work
filters = {'code': ['1']}
query_result = query_nomenclature(identifier='HCL_CH_ISCO_19_PROF', query='SELECT *', page_number=5, page_size=2, filters=filters, language='fr')
query_result[0]['annotations'][0]
```
%%%% Output: execute_result
{'identifier': None,
'text': {'cultureCode': 'fr', 'text': 'Divisionnaire'},