Coverage for src/colorspace/datasets.py: 91%

3def _getdataset_volcano():

4 """Topographic Information on Auckland's Maunga Whau Volcano

6 Convenience function, for more details about this data set see

7 man page for function :py:func:`get_volcano_data <colorspace.demos.get_volcano_data>`.

8 """

9 from .demos import get_volcano_data

10 return get_volcano_data(array = True)

13def _getdataset_HarzTraffic():

14 """Daily Traffic Counts Sonnenberg

16 Requires `pandas` to be installed.

17 """

18 try:

19 import pandas as pd

20 except:

21 raise Exception("'HarzTraffic' requires `pandas` to be installed")

22 import os

23 import numpy as np

25 # Loading the data set

26 resource_package = os.path.dirname(__file__)

27 csv = os.path.join(resource_package, "data", "HarzTraffic.csv")

29 # Trying to read the data set

30 try:

31 data = pd.read_csv(csv)

32 except:

33 raise Exception("problems reading \"{csv}\"")

35 # Convert 'data' column to datetime

36 data.date = pd.to_datetime(data.date).dt.date

38 # Adding season

39 m = pd.DatetimeIndex(data.date).month

40 data["season"] = np.repeat("winter", data.shape[0])

41 data.loc[(m >= 3) & (m <= 5), "season"] = "spring"

42 data.loc[(m >= 6) & (m <= 8), "season"] = "summer"

43 data.loc[(m >= 9) & (m <= 11), "season"] = "autumn"

44 del m

46 # Boolean flag for 'weekend'

47 d = pd.DatetimeIndex(data.date).dayofweek

48 names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

49 data["dow"] = d # Use integer this for order

50 data["dayofweek"] = np.asarray([names[x] for x in d])

51 data["weekend"] = np.repeat(False, data.shape[0])

52 data.loc[(d >= 5), "weekend"] = True # Saturday (5) or Sunday (6)

53 del d

55 return data

57def _getdataset_MonthlyHarzTraffic():

58 """Monthly Traffic Counts Summary Sonnenberg

60 Requires `pandas` to be installed.

61 """

62 try:

63 import pandas as pd

64 except:

65 raise Exception("'HarzTraffic' requires `pandas` to be installed")

67 import numpy as np

69 # Loading the data set

70 from .datasets import dataset

71 data = dataset("HarzTraffic")

73 # Appending year and month

74 data["year"] = pd.DatetimeIndex(data.date).year

75 data["month"] = pd.DatetimeIndex(data.date).month

77 # Aggregating sums (a1) and means (a2) for specific columns

78 tmp = ["year", "month", "bikes", "cars", "trucks", "others", "rain"]

79 a1 = data.loc[:, tmp].groupby(["year", "month"]).agg("sum")

80 tmp = ["year", "month", "temp", "sunshine", "wind"]

81 a2 = data.loc[:, tmp].groupby(["year", "month"]).agg("mean")

83 # Merge and round to 1 digit

84 data = a1.merge(a2, on = ["year", "month"]).reset_index()

85 for col in data.columns:

86 if np.issubdtype(data.loc[:, col], np.floating):

87 data.loc[:, col] = np.round(data.loc[:, col], 1)

89 # Adding season flat

90 data["season"] = np.repeat("winter", data.shape[0])

91 data.loc[(data.month >= 3) & (data.month <= 5), "season"] = "spring"

92 data.loc[(data.month >= 6) & (data.month <= 8), "season"] = "summer"

93 data.loc[(data.month >= 9) & (data.month <= 11), "season"] = "autumn"

95 return data

99def dataset(name):

100 """Loading colorspace Package Example Data

101

102 The package `colorspace` comes with a few small data sets used

103 in the Examples and/or the documentation. This function allows

104 for easy access to these data sets. Note that some data sets

105 are require `pandas` to be installed.

106

107 #### **volcano**: Maunga Whau Volcano

108

109 Topographic information on Auckland's Maunga Whau Volcano on

110 a 10m x 10m grid. Will return a two-dimensional `numpy.ndarray`

111 of dimension 61x87 (int64).

112

113 Digitized from a topographic map by Ross Ihaka. These data should

114 not be regarded as accurate.

115

116 #### **HarzTraffic**: Daily Traffic Counts at Sonnenberg

117

118 The data set provides daily traffic counts for bikes (motor bikes),

119 cars, trucks, and other vehicles in the vicinity of Sonnenberg

120 located in the Harz region in Germany. The data set covers

121 a period of nearly three years (2021-01-01 to 2023-11-30).

122

123 A `pandas.DataFrame` containing 1057 observations (rows) on 16 variables:

124

125 * `date` date, the date of the record.

126 * `yday` int64, the day of the year.

127 * `bikes` int64, the number of motorcycles on that day.

128 * `cars` int64, the number of cars on that day.

129 * `trucks` int64, the number of trucks on that day.

130 * `others` int64, the number of other vehicles on that day.

131 * `tempmin` float64, minimum temperature in degrees Celsius.

132 * `tempmax` float64, maximum temperature in degrees Celsius.

133 * `temp` float64, mean temperature in degrees Celsius.

134 * `humidity` int64, mean relative humidity in percent.

135 * `tempdew` float64, average dewpoint temperature in degrees Celsius.

136 * `cloudiness` int64, average cloud cover in percent.

137 * `rain` float64, amount of precipitation in mm (snow and rain).

138 * `sunshine` int64, sunshine duration in minutes.

139 * `wind` float64, mean wind speed in meters per second.

140 * `windmax` float64, maximum wind speed in meters per second.

141 * `season`: object, local season (sprint, summer, autumn, winter).

142 * `dow`: int64, numeric day of week (0 = Mon, 6 = Sun).

143 * `dayofweek`: object, short name of day of week.

144 * `weekend`: bool, True if the day is Saturday or Sunday, else False.

145

146 Weather data: Deutscher Wetterdienst (DWD), Climate Data Center (CDC),

147 station Wernigerode (5490; Sachsen-Anhalt) w/ location 10.7686/51.8454/233

148 (lon, lat, alt, EPSG 4326). CC-BY 4.0, available via

149 <https://opendata.dwd.de/climate_environment/CDC/>.

150

151 Traffic data: Bundesanstalt für Strassenwesen (BASt), station Sonnenberg.

152 CC-BY 4.0, available via <https://www.bast.de>,

153 <https://www.bast.de/DE/Verkehrstechnik/Fachthemen/v2-verkehrszaehlung/Verkehrszaehlung.html>.

154

155

156 #### **MonthlyHarzTraffic**: Monthly Summary of Traffic Counts at Sonnenberg

157

158 Based on the daily data set `HarzTraffic` (see above) but aggregated on

159 a monthly basis.

160

161 A `pandas.DataFrame` containing 35 observations (rows) on 10 variables:

162

163 * `year`: int32, year of record.

164 * `month`: int32, year of record.

165 * `bikes`: int64, the total number of bikes in that month.

166 * `cars`: int64, the total number of cars in that month.

167 * `trucks`: int64, the total number of trucks in that month.

168 * `others`: int64, the total number of other vehicles in that month.

169 * `rain`: float64, monthly precipitation sum in mm (snow and rain).

170 * `temp`: float64, monthly mean temperature in degrees Celsius.

171 * `sunshine`: int64, monthly average of sunshine per day in minutes.

172 * `wind`: float64, monthly mean wind speed in meters per second.

173 * `season`: object, local season (sprint, summer, autumn, winter).

174

175 Data source and license: see data set description 'HarzTraffic'.

176

177

178 Args:

179 name (str): Name of the data set to be returned.

180

181 Returns:

182 The object returned depends on the data set (see above).

183 """

184

185

186 from . import datasets

187 from re import compile

188

189 if not isinstance(name, str):

190 raise TypeError("argument `name` must be str")

191

192

193 # Create listing of all available datasets

194 available = []

195 pattern = compile("(?<=(^_getdataset_))(.*)")

196 for fn in dir(datasets):

197 tmp = pattern.findall(fn)

198 if len(tmp) > 0: available.append(tmp[0][1])

199

200 try:

201 fun = getattr(datasets, f"_getdataset_{name}")

202 except:

203 raise ValueError(f"dataset \"{name}\" does not exist. " + \

204 f"Available data sets are: {', '.join(available)}.")

205

206 return fun()

207