| Title: | Access PX-Web Statistical Data from R |
|---|---|
| Description: | A pipe-friendly R client for PX-Web statistical APIs. Provides a search-then-fetch workflow for discovering and downloading data from national statistics agencies (SCB, SSB, Statistics Finland, etc.) using a consistent tibble-based interface. |
| Authors: | Love Hansson [aut, cre, cph] |
| Maintainer: | Love Hansson <[email protected]> |
| License: | AGPL (>= 3) |
| Version: | 0.1.1.9002 |
| Built: | 2026-05-16 08:57:03 UTC |
| Source: | https://github.com/lchansson/pixieweb |
Print human-readable codelist summaries
codelist_describe(cl_df, max_n = 5, format = "inline", heading_level = 2)codelist_describe(cl_df, max_n = 5, format = "inline", heading_level = 2)
cl_df |
A tibble returned by |
max_n |
Maximum codelists to describe. |
format |
Output format: |
heading_level |
Heading level. |
cl_df invisibly (for piping).
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") |> codelist_describe() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") |> codelist_describe() }
Extract codelist IDs
codelist_extract_ids(cl_df)codelist_extract_ids(cl_df)
cl_df |
A tibble returned by |
A character vector of codelist IDs.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") |> codelist_extract_ids() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") |> codelist_extract_ids() }
Extract values for a specific codelist
codelist_values(cl_df, codelist_id)codelist_values(cl_df, codelist_id)
cl_df |
A tibble returned by |
codelist_id |
Codelist ID (character). |
A tibble with columns code and text.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { cls <- get_codelists(scb, "TAB638", "Region") codelist_values(cls, cls$id[1]) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { cls <- get_codelists(scb, "TAB638", "Region") codelist_values(cls, cls$id[1]) }
Build the URL and JSON body for a data request without executing it. Useful for inspecting or modifying queries before sending them.
compose_data_query(api, table_id, ..., .codelist = NULL)compose_data_query(api, table_id, ..., .codelist = NULL)
api |
A |
table_id |
Single table ID. |
... |
Variable selections (same as |
.codelist |
Named list of codelist overrides. |
A list with $url (character) and $body (list, JSON-serializable).
scb <- px_api("scb", lang = "en") if (px_available(scb)) { q <- compose_data_query(scb, "TAB638", Region = c("0180"), Tid = px_top(5)) str(q$url) str(q$body) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { q <- compose_data_query(scb, "TAB638", Region = c("0180"), Tid = px_top(5)) str(q$url) str(q$body) }
Build the URL for querying the tables endpoint (advanced use).
compose_table_query( api, query = NULL, id = NULL, updated_since = NULL, page = NA, per_page = NA )compose_table_query( api, query = NULL, id = NULL, updated_since = NULL, page = NA, per_page = NA )
api |
A |
query |
Free-text search string. |
id |
Table ID(s). |
updated_since |
Days since last update. |
page |
Page number. |
per_page |
Results per page. |
A character URL string.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { compose_table_query(scb, query = "population") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { compose_table_query(scb, query = "population") }
Extract comments from data
data_comments(data_df)data_comments(data_df)
data_df |
A tibble returned by |
A tibble with columns variable, value, comment, or NULL.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3), .comments = TRUE) data_comments(d) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3), .comments = TRUE) data_comments(d) }
Builds a human-readable source attribution string from a data tibble
returned by get_data() and, optionally, a variable tibble returned
by get_variables(). The string is suitable for use as a caption
in ggplot2::labs().
data_legend( data_df, var_df = NULL, lang = NULL, omit_varname = FALSE, omit_desc = FALSE )data_legend( data_df, var_df = NULL, lang = NULL, omit_varname = FALSE, omit_desc = FALSE )
data_df |
A tibble returned by |
var_df |
Optional tibble returned by |
lang |
Language for the caption wording: |
omit_varname |
Logical. If |
omit_desc |
Logical. If |
By default the caption shows the API and table that the data came
from, and — if var_df is supplied — the variables included in the
table with both a human-readable label and the raw code, e.g.
Source: Statistics Sweden (SCB), table TAB638 Region (Region) | Marital status (Civilstand) | Year (Tid)
Use omit_varname to drop the codes, omit_desc to drop the labels,
and lang to switch between English and Swedish wording of the
source prefix.
A single character string suitable for plot captions.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) data_legend(d, vars) data_legend(d, vars, lang = "SV") data_legend(d, vars, omit_varname = TRUE) data_legend(d, vars, omit_desc = TRUE) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) data_legend(d, vars) data_legend(d, vars, lang = "SV") data_legend(d, vars, omit_varname = TRUE) data_legend(d, vars, omit_desc = TRUE) }
Remove monotonous columns from a data tibble
data_minimize(data_df, remove_monotonous_data = TRUE)data_minimize(data_df, remove_monotonous_data = TRUE)
data_df |
A tibble returned by |
remove_monotonous_data |
Remove columns where all values are identical. |
A tibble.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) d |> data_minimize() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) d |> data_minimize() }
Low-level function to execute a query built with compose_data_query().
Handles rate limiting, retries, and error handling.
execute_query(api, url, body = NULL, verbose = FALSE)execute_query(api, url, body = NULL, verbose = FALSE)
api |
A |
url |
API endpoint URL. |
body |
JSON body as a list, or |
verbose |
Print request details. |
Parsed JSON as a list, or NULL on failure.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { q <- compose_data_query(scb, "TAB638", Region = "0180", Tid = px_top(3)) raw <- execute_query(scb, q$url, q$body) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { q <- compose_data_query(scb, "TAB638", Region = "0180", Tid = px_top(3)) raw <- execute_query(scb, q$url, q$body) }
Codelists provide alternative groupings of variable values (e.g. municipalities grouped into counties).
get_codelists(api, table_id, variable_code, verbose = FALSE)get_codelists(api, table_id, variable_code, verbose = FALSE)
api |
A |
table_id |
Table ID (character). |
variable_code |
Variable code (character). |
verbose |
Print request details. |
A tibble with columns: id, text, type, values.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_codelists(scb, "TAB638", "Region") }
The core function for downloading statistical data. Variable selections
are passed as named arguments via ..., or via a prepared query object
from prepare_query().
get_data( api, table_id, ..., query = NULL, .codelist = NULL, .output = "long", .comments = FALSE, simplify = TRUE, auto_chunk = TRUE, max_results = NULL, cache = FALSE, cache_location = NULL, verbose = FALSE )get_data( api, table_id, ..., query = NULL, .codelist = NULL, .output = "long", .comments = FALSE, simplify = TRUE, auto_chunk = TRUE, max_results = NULL, cache = FALSE, cache_location = NULL, verbose = FALSE )
api |
A |
table_id |
A single table ID (character). Vectors are not supported;
use |
... |
Variable selections as named arguments. Each name is a variable code, each value is one of:
|
query |
A |
.codelist |
Named list of codelist overrides
(e.g. |
.output |
|
.comments |
Include footnotes/comments as an attribute. |
simplify |
Add human-readable text label columns alongside codes. |
auto_chunk |
Automatically split large queries that exceed the cell
limit into multiple requests. When |
max_results |
Override the API's cell limit. When set, this value is used instead of the limit reported by the API's config endpoint. Useful for keeping result size manageable or for testing chunking behavior. |
cache |
Logical. If |
cache_location |
Either a path to a |
verbose |
Print request details. |
When simplify = TRUE and .output = "long" (defaults), columns are:
table_id: back-reference to the source table
One pair per dimension: {code} (raw code) + {code}_text (label)
value: the numeric measurement
When simplify = FALSE, only raw codes and value are returned.
When .output = "wide", content variables are pivoted into separate columns.
When auto_chunk = TRUE and the query would exceed the API's cell limit,
get_data() automatically splits the request. It picks the variable with
the most values and batches its values so each request fits under the limit.
Requests are paced to respect the API's rate limit.
A tibble of data. See Details for column structure.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Fetch with explicit selections get_data(scb, "TAB638", Region = c("0180", "1480"), Tid = px_top(5) ) # Fetch from a prepared query q <- prepare_query(scb, "TAB638", Region = c("0180")) get_data(scb, query = q) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Fetch with explicit selections get_data(scb, "TAB638", Region = c("0180", "1480"), Tid = px_top(5) ) # Fetch from a prepared query q <- prepare_query(scb, "TAB638", Region = c("0180")) get_data(scb, query = q) }
PX-Web v2 saved queries are server-side stored query definitions (table + variable selections) that can be shared via ID/URL.
get_saved_query( api, query_id, .output = "long", simplify = TRUE, verbose = FALSE )get_saved_query( api, query_id, .output = "long", simplify = TRUE, verbose = FALSE )
api |
A |
query_id |
Saved query ID (character). |
.output |
|
simplify |
Add text label columns. |
verbose |
Print request details. |
A tibble in the same format as get_data().
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_saved_query(scb, "some-query-id") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_saved_query(scb, "some-query-id") }
Search for and list statistical tables available on a PX-Web instance.
get_tables( api, query = NULL, id = NULL, updated_since = NULL, max_results = NULL, .timeout = 15, cache = FALSE, cache_location = pixieweb_cache_dir, verbose = FALSE )get_tables( api, query = NULL, id = NULL, updated_since = NULL, max_results = NULL, .timeout = 15, cache = FALSE, cache_location = pixieweb_cache_dir, verbose = FALSE )
api |
A |
query |
Free-text search string (sent to API as server-side search).
On v2 APIs (e.g. SCB) the server-side search is an exact token match
by default — |
id |
Character vector of specific table IDs to retrieve. |
updated_since |
Only return tables updated in the last N days (integer). |
max_results |
Maximum number of tables to return. |
.timeout |
Maximum seconds to spend on v1 hierarchy tree walks (default
15). Only relevant when a v1 API lacks a |
cache |
Logical, cache results locally. |
cache_location |
Cache directory. Defaults to |
verbose |
Print request details. |
A tibble with columns: id, title, description, category,
updated, first_period, last_period, time_unit, variables,
subject_code, subject_path, source, discontinued.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Server-side search get_tables(scb, query = "population") # Fetch specific tables by ID get_tables(scb, id = c("TAB638", "TAB1278")) # Tables updated in the last 30 days get_tables(scb, updated_since = 30) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Server-side search get_tables(scb, query = "population") # Fetch specific tables by ID get_tables(scb, id = c("TAB638", "TAB1278")) # Tables updated in the last 30 days get_tables(scb, updated_since = 30) }
Retrieves the variable structure of a PX-Web table, including available values and codelists. This is the key discovery step before fetching data.
get_variables( api, table_id, cache = FALSE, cache_location = pixieweb_cache_dir, verbose = FALSE )get_variables( api, table_id, cache = FALSE, cache_location = pixieweb_cache_dir, verbose = FALSE )
api |
A |
table_id |
A single table ID (character). |
cache |
Logical, cache results locally. When combined with a sqlite
|
cache_location |
Either a path to a |
verbose |
Print request details. |
A tibble with columns: code, text, n_values, elimination,
time, values, codelists, table_id.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") }
Returns the path to the user-level cache directory for pixieweb, creating it
if it does not exist. Uses tools::R_user_dir() so the cache survives
across R sessions.
pixieweb_cache_dir()pixieweb_cache_dir()
A single character string (directory path).
pixieweb_cache_dir()pixieweb_cache_dir()
Removes cached API responses stored in the default or specified location. Can selectively clear by entity type and/or API.
pixieweb_clear_cache( entity = NULL, api = NULL, cache_location = pixieweb_cache_dir() )pixieweb_clear_cache( entity = NULL, api = NULL, cache_location = pixieweb_cache_dir() )
entity |
Character entity to clear (e.g. |
api |
A |
cache_location |
Directory to clear. Defaults to |
invisible(NULL)
scb <- px_api("scb") if (px_available(scb)) { pixieweb_clear_cache() pixieweb_clear_cache(entity = "tables") pixieweb_clear_cache(api = scb) pixieweb_clear_cache(entity = "enriched", api = scb) }scb <- px_api("scb") if (px_available(scb)) { pixieweb_clear_cache() pixieweb_clear_cache(entity = "tables") pixieweb_clear_cache(api = scb) pixieweb_clear_cache(entity = "enriched", api = scb) }
Bridges the gap between table/variable exploration and data fetching.
Fetches variable metadata, applies sensible defaults for variable
selections, and returns a query object that can be passed to get_data().
prepare_query( api, table_id, ..., .codelist = NULL, max_default_values = 22, maximize_selection = FALSE, verbose = FALSE ) ## S3 method for class 'px_query' print(x, ...)prepare_query( api, table_id, ..., .codelist = NULL, max_default_values = 22, maximize_selection = FALSE, verbose = FALSE ) ## S3 method for class 'px_query' print(x, ...)
api |
A |
table_id |
A single table ID (character). |
... |
Ignored. |
.codelist |
Named list of codelist overrides. |
max_default_values |
Maximum number of values for a variable to receive
a wildcard default. Defaults to |
maximize_selection |
If |
verbose |
Print request details. |
x |
A |
Default selection strategy:
ContentsCode: all values ("*")
Time variable: most recent 10 periods (px_top(10))
Eliminable variables: omitted (API aggregates automatically)
Small mandatory variables (<= max_default_values values): all ("*")
Large mandatory variables: first value only (px_top(1))
When maximize_selection = TRUE, the function expands selections to use
as much of the API's cell limit as possible. Expansion order: smallest
eliminable variables first, then smallest mandatory, then time last.
The returned query object prints a human-readable summary showing what
was selected for each variable and why. Modify selections before passing
to get_data() by assigning to the $selections list.
A <px_query> object. Pass to get_data() via the query
parameter.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Prepare with defaults q <- prepare_query(scb, "TAB638") q # Override specific variables, let defaults handle the rest q <- prepare_query(scb, "TAB638", Region = c("0180", "1480")) # Maximize data within API limits q <- prepare_query(scb, "TAB638", maximize_selection = TRUE) # Fetch data from a prepared query get_data(scb, query = q) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { # Prepare with defaults q <- prepare_query(scb, "TAB638") q # Override specific variables, let defaults handle the rest q <- prepare_query(scb, "TAB638", Region = c("0180", "1480")) # Maximize data within API limits q <- prepare_query(scb, "TAB638", maximize_selection = TRUE) # Fetch data from a prepared query get_data(scb, query = q) }
Creates a <px_api> connection object used by all other pixieweb functions.
You can pass a known alias (e.g. "scb", "ssb") or a full base URL.
px_api(x, lang = NULL, version = "v2", verbose = FALSE) ## S3 method for class 'px_api' print(x, ...) ## S3 method for class 'px_api' format(x, ...)px_api(x, lang = NULL, version = "v2", verbose = FALSE) ## S3 method for class 'px_api' print(x, ...) ## S3 method for class 'px_api' format(x, ...)
x |
A |
lang |
Language code (e.g. |
version |
API version: |
verbose |
Print connection details. |
... |
Ignored. |
A <px_api> object.
if (px_available(px_api("scb"))) { scb <- px_api("scb", lang = "en") ssb <- px_api("ssb", lang = "no") custom <- px_api("https://my.statbank.example/api/v2/", lang = "en") }if (px_available(px_api("scb"))) { scb <- px_api("scb", lang = "en") ssb <- px_api("ssb", lang = "no") custom <- px_api("https://my.statbank.example/api/v2/", lang = "en") }
Returns a tibble of known PX-Web APIs with their aliases, URLs, supported versions, and available languages.
px_api_catalogue()px_api_catalogue()
A tibble with columns: alias, description, url, url_v1,
versions, langs, default_lang.
px_api_catalogue()px_api_catalogue()
Check if a PX-Web API is reachable
px_available(api)px_available(api)
api |
A |
Logical: TRUE if the API responds, FALSE otherwise.
scb <- px_api("scb") if (px_available(scb)) { px_available(scb) }scb <- px_api("scb") if (px_available(scb)) { px_available(scb) }
Produces a citation string from metadata attached to data by get_data().
px_cite(data_df)px_cite(data_df)
data_df |
A tibble returned by |
A character string (formatted citation).
scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) px_cite(d) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { d <- get_data(scb, "TAB638", Region = "0180", Tid = px_top(3)) px_cite(d) }
These helpers create <px_selection> objects that get_data() translates
into the appropriate API filter. Each represents a different way to select
variable values in PX-Web queries.
px_all(pattern = "*") px_top(n) px_bottom(n) px_from(value) px_to(value) px_range(from, to) ## S3 method for class 'px_selection' print(x, ...)px_all(pattern = "*") px_top(n) px_bottom(n) px_from(value) px_to(value) px_range(from, to) ## S3 method for class 'px_selection' print(x, ...)
pattern |
Glob pattern (default |
n |
Number of values. |
value |
Value code (inclusive bound). |
from, to
|
Value codes for range bounds (inclusive). |
x |
A |
... |
Ignored. |
A <px_selection> object.
Persists a set of variable selections server-side so the query can be shared or re-used later.
save_query(api, table_id, ..., .codelist = NULL, verbose = FALSE)save_query(api, table_id, ..., .codelist = NULL, verbose = FALSE)
api |
A |
table_id |
Table ID (character). |
... |
Variable selections (same as |
.codelist |
Named list of codelist overrides. |
verbose |
Print request details. |
A character string: the saved query ID.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { query_id <- save_query(scb, "TAB638", Region = "0180", Tid = px_top(5)) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { query_id <- save_query(scb, "TAB638", Region = "0180", Tid = px_top(5)) }
Print human-readable table summaries
table_describe(table_df, max_n = 5, format = "inline", heading_level = 2)table_describe(table_df, max_n = 5, format = "inline", heading_level = 2)
table_df |
A tibble returned by |
max_n |
Maximum number of tables to describe. |
format |
Output format: |
heading_level |
Heading level for output. |
table_df invisibly (for piping).
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_describe(max_n = 3) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_describe(max_n = 3) }
Fetches the metadata endpoint for each table and adds columns with
notes, contents description, contact information, and more. This is
an extra API call per table, so it's separated from get_tables() to
give users control over when the cost is incurred.
table_enrich( table_df, api = NULL, cache = FALSE, cache_location = pixieweb_cache_dir, async = FALSE, verbose = FALSE )table_enrich( table_df, api = NULL, cache = FALSE, cache_location = pixieweb_cache_dir, async = FALSE, verbose = FALSE )
table_df |
A tibble returned by |
api |
A |
cache |
Logical. If |
cache_location |
Either a directory path (legacy |
async |
Logical. When |
verbose |
Print request details. |
When cache_location points at a SQLite file (or nxt_handle from the
nordstatExtras package) the cache is per table rather than per
enrich call. That gives three properties you don't get from the legacy
.rds path: (1) enrichment results are reused across any
table_enrich() call that touches the same table_id; (2) a long
enrich run can be interrupted and resumes from where it left off; and
(3) with async = TRUE, the call returns immediately with whatever is
already cached and keeps fetching in the background.
The input tibble with additional columns: notes, contents,
subject_area, official_statistics, contact.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { # API is picked up automatically from the tibble get_tables(scb, query = "population", max_results = 5) |> table_enrich() |> table_describe() # Cache enriched results for offline use (legacy .rds path) get_tables(scb, query = "population", cache = TRUE) |> table_enrich(cache = TRUE) # Per-table cache in a shared SQLite file handle <- nordstatExtras::nxt_open("cache.sqlite") get_tables(scb, query = "population", cache = TRUE, cache_location = handle) |> table_enrich(cache = TRUE, cache_location = handle) }scb <- px_api("scb", lang = "en") if (px_available(scb)) { # API is picked up automatically from the tibble get_tables(scb, query = "population", max_results = 5) |> table_enrich() |> table_describe() # Cache enriched results for offline use (legacy .rds path) get_tables(scb, query = "population", cache = TRUE) |> table_enrich(cache = TRUE) # Per-table cache in a shared SQLite file handle <- nordstatExtras::nxt_open("cache.sqlite") get_tables(scb, query = "population", cache = TRUE, cache_location = handle) |> table_enrich(cache = TRUE, cache_location = handle) }
Extract table IDs from a table tibble
table_extract_ids(table_df)table_extract_ids(table_df)
table_df |
A tibble returned by |
A character vector of table IDs.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_extract_ids() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_extract_ids() }
Remove monotonous columns from a table tibble
table_minimize(table_df, remove_monotonous_data = TRUE)table_minimize(table_df, remove_monotonous_data = TRUE)
table_df |
A tibble returned by |
remove_monotonous_data |
Remove columns where all values are identical. |
A tibble.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_minimize() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_tables(scb, query = "population") |> table_minimize() }
Filter an already-fetched table tibble by regex. Complements
get_tables(query = ...) which does server-side search. Use this for
further refinement on cached results.
table_search(table_df, query, column = NULL)table_search(table_df, query, column = NULL)
table_df |
A tibble returned by |
query |
Character vector of search terms (combined with OR). |
column |
Column names to search. |
A filtered tibble.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { tables <- get_tables(scb, query = "population") # Further filter by regex tables |> table_search("municipality") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { tables <- get_tables(scb, query = "population") # Further filter by regex tables |> table_search("municipality") }
Print human-readable variable summaries
variable_describe(var_df, max_n = 10, format = "inline", heading_level = 2)variable_describe(var_df, max_n = 10, format = "inline", heading_level = 2)
var_df |
A tibble returned by |
max_n |
Maximum number of variables to describe. |
format |
Output format: |
heading_level |
Heading level. |
var_df invisibly (for piping).
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_describe() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_describe() }
Extract variable codes from a variable tibble
variable_extract_ids(var_df)variable_extract_ids(var_df)
var_df |
A tibble returned by |
A character vector of variable codes.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_extract_ids() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_extract_ids() }
Remove nested columns for a compact variable overview
variable_minimize(var_df)variable_minimize(var_df)
var_df |
A tibble returned by |
A tibble without values and codelists columns.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_minimize() }scb <- px_api("scb", lang = "en") if (px_available(scb)) { get_variables(scb, "TAB638") |> variable_minimize() }
Convert variable names to codes
variable_name_to_code(var_df, name)variable_name_to_code(var_df, name)
var_df |
A tibble returned by |
name |
Character vector of human-readable variable names. |
A named character vector: names are the input names, values are codes.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") variable_name_to_code(vars, "Region") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") variable_name_to_code(vars, "Region") }
Searches across variable codes, texts, and optionally nested value texts.
variable_search(var_df, query, column = NULL)variable_search(var_df, query, column = NULL)
var_df |
A tibble returned by |
query |
Character vector of search terms (combined with OR). |
column |
Column names to search. |
A filtered tibble.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") vars |> variable_search("region") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") vars |> variable_search("region") }
Extract values for a specific variable
variable_values(var_df, variable_code)variable_values(var_df, variable_code)
var_df |
A tibble returned by |
variable_code |
Variable code (character). |
A tibble with columns code and text.
scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") vars |> variable_values("Kon") }scb <- px_api("scb", lang = "en") if (px_available(scb)) { vars <- get_variables(scb, "TAB638") vars |> variable_values("Kon") }