Code to download data. Choose a dataset, then a table, and get code to download it as Pandas or Numpy.

hub > wine > it

R

R code Copy

# RImports
library("glue")
library("rjson")

download.data.from.remote.web.location <- function(src_url, tmp_dir, 
    dest_file_name, table_name, dataset_name) {
    path <- paste(tmp_dir, dataset_name, table_name, sep = "/")
    if (!dir.exists(path)) {
        dir.create(path, recursive = TRUE)
    }
    dest <- paste(path, dest_file_name, sep = "/")
    download.file(src_url, dest)
}
read.csv.for.table <- function(static_data_table_name, dataset_name, 
    columns, tmp_dir) {
    columns <- fromJSON(columns)
    data <- read.csv(glue("{tmp_dir}/{dataset_name}/{static_data_table_name}/{static_data_table_name}.csv", 
        tmp_dir = tmp_dir, dataset_name = dataset_name, static_data_table_name = static_data_table_name))
    colnames(data) <- columns
    sprintf("Downloaded dataframe with %d rows and %d columns.", 
        nrow(data), ncol(data))
    return(data)
}
# ## Downloading data from remote web location
# Data for this particular asset(s) is located somewhere on the web.
# We need to download it to a local directory first, before we can
# do anything with it.
download_location <- download.data.from.remote.web.location(table_name = "it", 
    dataset_name = "wine", src_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", 
    dest_file_name = "it.csv", tmp_dir = "/tmp/wine/it")

# ## Create an R data frame from CSV data
# We convert an intermediary CSV file to an R dataframe.
r_data <- read.csv.for.table(static_data_table_name = "it", tmp_dir = "/tmp/wine/it", 
    columns = "[\"wine_class_identifier\",\"alcohol\",\"malic_acid\",\"ash\",\"alcalinity_of_ash\",\"magnesium\",\"total_phenols\",\"flavanoids\",\"non_flavanoid_phenols\",\"proanthocyanins\",\"color_intensity\",\"hue\",\"od_280__od_315_diluted_wines\",\"proline\"]", 
    dataset_name = "wine")

Pandas

requirements.txt Copy

- pandas==1.2.3
pip command Copy

pip3 install pandas==1.2.3
Pandas code Copy

# PythonImports
import json
import os
import pandas as pd
import urllib.request


def download_data_from_remote_web_location(
    src_url, tmp_dir, dest_file_name, table_name, dataset_name
):
    os.makedirs(tmp_dir + "/" + dataset_name + "/" + table_name, exist_ok=True)
    urllib.request.urlretrieve(
        src_url, os.path.join(tmp_dir, dataset_name, table_name, dest_file_name)
    )


def read_csv_into_pandas(
    static_data_table_name, dataset_name, columns, tmp_dir
):
    columns = json.loads(columns)
    data = pd.read_csv(
        "{tmp_dir}/{dataset_name}/{static_data_table_name}/{static_data_table_name}.csv".format(
            tmp_dir=tmp_dir,
            dataset_name=dataset_name,
            static_data_table_name=static_data_table_name,
        ),
        header=None,
    )
    data.columns = columns
    print("Downloaded dataframe with %d rows and %d columns." % data.shape)
    return data


# ## Downloading data from remote web location
# Data for this particular asset(s) is located somewhere on the web.
# We need to download it to a local directory first, before we can
# do anything with it.
download_location = download_data_from_remote_web_location(
    src_url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
    dest_file_name="it.csv",
    dataset_name="wine",
    table_name="it",
    tmp_dir="/tmp/wine/it",
)

# ## Create Pandas Dataframe from CSV data
# We convert an intermediary CSV file to a Pandas dataframe.
pandas_data = read_csv_into_pandas(
    columns='["wine_class_identifier","alcohol","malic_acid","ash","alcalinity_of_ash","magnesium","total_phenols","flavanoids","non_flavanoid_phenols","proanthocyanins","color_intensity","hue","od_280__od_315_diluted_wines","proline"]',
    static_data_table_name="it",
    tmp_dir="/tmp/wine/it",
    dataset_name="wine",
)

Numpy

requirements.txt Copy

numpy==1.20.1
pip command Copy

pip3 install numpy==1.20.1
Numpy code Copy

# PythonImports
import csv
import json
import numpy as np
import os
import urllib.request


def download_data_from_remote_web_location(
    src_url, tmp_dir, dest_file_name, table_name, dataset_name
):
    os.makedirs(tmp_dir + "/" + dataset_name + "/" + table_name, exist_ok=True)
    urllib.request.urlretrieve(
        src_url, os.path.join(tmp_dir, dataset_name, table_name, dest_file_name)
    )


def read_csv_into_numpy(static_data_table_name, dataset_name, columns, tmp_dir):
    columns = json.loads(columns)
    file_name = "{tmp_dir}/{dataset_name}/{static_data_table_name}/{static_data_table_name}.csv".format(
        dataset_name=dataset_name,
        static_data_table_name=static_data_table_name,
        tmp_dir=tmp_dir,
    )
    escaped = (
        "\t".join([i.replace("\t", " ") for i in x])
        for x in csv.reader(open(file_name))
    )
    data = np.genfromtxt(escaped, delimiter="\t", names=columns, dtype=None)
    assert data is not None
    print("Downloaded ndarray with %d rows." % data.shape[0])
    return data


# ## Downloading data from remote web location
# Data for this particular asset(s) is located somewhere on the web.
# We need to download it to a local directory first, before we can
# do anything with it.
download_location = download_data_from_remote_web_location(
    src_url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
    dest_file_name="it.csv",
    dataset_name="wine",
    table_name="it",
    tmp_dir="/tmp/wine/it",
)

# ## Create Numpy ndarray from CSV data
# We convert an intermediary CSV file to a Numpy ndarray.
numpy_data = read_csv_into_numpy(
    dataset_name="wine",
    tmp_dir="/tmp/wine/it",
    columns='["wine_class_identifier","alcohol","malic_acid","ash","alcalinity_of_ash","magnesium","total_phenols","flavanoids","non_flavanoid_phenols","proanthocyanins","color_intensity","hue","od_280__od_315_diluted_wines","proline"]',
    static_data_table_name="it",
)

Schema

NameType
wine_class_identifierCategorical
alcoholPositiveFloat
malic_acidPositiveFloat
ashPositiveFloat
alcalinity_of_ashPositiveFloat
magnesiumPositiveFloat
total_phenolsPositiveFloat
flavanoidsPositiveFloat
non_flavanoid_phenolsPositiveFloat
proanthocyaninsPositiveFloat
color_intensityPositiveFloat
huePositiveFloat
od_280__od_315_diluted_winesPositiveFloat
prolinePositiveFloat

WebLocation Storage

ParamValue
addresshttps://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data