Source code for swmm.pandas.report.report

from __future__ import annotations
import re
from io import StringIO

from pandas.core.api import DataFrame, Timestamp, to_datetime, to_timedelta, Series
from pandas.io.parsers import read_csv, read_fwf


[docs] class Report: _rptfile: str """path to swmm rpt file""" _rpt_text: str """text string of rpt file contents""" _sections: dict[str, str] """dictionary of SWMM report sections as {section name: section text}""" def __init__(self, rptfile: str): """Base class for a SWMM simulation report file. The report object provides an api for the tables in the the SWMM simulation report file. Tables are access as properties of the object and returned as pandas DataFrames. Parameters ---------- rptfile: str model report file path """ self._rptfile = rptfile with open(rptfile) as file: self._rpt_text = file.read() self._sections = { self._find_title(section): section for section in self._find_sections(self._rpt_text) } @staticmethod def _find_sections(rpt_text: str) -> list[str]: r""" Function to split the report file text into separate sections using a regex pattern match: "^\s+$\s+(?=\*|A)": pattern matches blank lines followed by at least 1 white space followed by a lookhead for a asterisk (demarks section headers) or the letter A (looks for the word Analysis at the end of the report file) Parameters ---------- rpt_text: str Text content of the report file Returns ------- List[str] A list section texts """ # pattern to match blank lines preceding a line of asterisks section_pattern = R"^\s+$\s+(?=\*|A)" section_comp = re.compile(section_pattern, re.MULTILINE) return list( map(lambda x: x.replace("\n ", "\n"), section_comp.split(rpt_text)[2:-1]) ) @staticmethod def _find_title(section: str) -> str: r""" Function to extract the title of section produced by _find_sections using regex to match lines between two lines of asterisks. "^\*+[\s\S]*?\n([\s\S]*?)\s*\*+": Pattern matches any number white space or non-white space characters that are between: 1. A line starting with a string of asterisks followed by any white space or non-whitespace chacter and ending with a new line break 2. A line starting with a string of asterisks Parameters ---------- section: str The section text produced by _find_sections Returns ------- str Title of section Raises ------ Exception If regex could not find a match """ # pattern to match line between two lines of asterisks title_pattern = R"^\*+[\s\S]*?\n([\s\S]*?)\s*\*+" title_comp = re.compile(title_pattern, re.MULTILINE) s = title_comp.match(section) if s: # if string is found, split line on more two consecutive spaces and pull the first token return s.group(1).split(" ")[0] else: raise Exception(f"Error finding title for section\n{section}") @staticmethod def _split_section(section: str) -> tuple[str, str]: """ Function to split a report section into header and data elements. Relies on regex matching lines with consecutive dashes indicating header lines. Parameters ---------- section: str The section text produced by _find_sections Returns ------- Tuple[str, str] header text and data text Raises ------ Exception If regex could not find a match """ title = Report._find_title(section) subsections = re.split(R"\s*-+\n", section) num_subsections = len(subsections) if num_subsections == 1: header = "Result" # split section on line of asterisks data = re.split(R"\*+", section)[-1] elif num_subsections == 2: header, data = subsections elif num_subsections == 3: notes, header, data = subsections elif num_subsections == 4: notes, header, data, sytem = subsections else: raise Exception(f"Error parsing table {title}") return header, data @staticmethod def _parse_header(header: str) -> list[str]: """ Parse header line produced from _split_section into list of column headers. Uses pandas read_fwf to automatically parse multi line headers present in report file. Parameters ---------- header: str Header text string produced from _split_section Returns ------- List[str] List of column headers """ # substitute single spaces between words with underscores # replace asterisks or dashes with spaces header = [ re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", field[1].dropna().str.cat(sep="_")) for field in read_fwf( StringIO(re.sub(R"\*|-", " ", header)), header=None ).items() ] # split day and time into separate fields to be recombined in to datetime object # when parsing table if "Time_of_Max_Occurrence_days_hr:min" in header: max_idx = header.index("Time_of_Max_Occurrence_days_hr:min") header[max_idx] = "days" header.insert(max_idx + 1, "Time_of_Max") return header @staticmethod def _parse_table( header: list[str], data: str, sep: str = R"\s{2,}|\s:\s", index_col: int = 0 ) -> DataFrame: r""" Function to parse data string produced from _split_section into pandas DataFrame Parameters ---------- header: Sequence[str] Sequence of column names to assign to DataFrame. Mostly can be produced from _parse_header. data: str Data string produced form _split_section sep: str, optional Delimeter to be fed into pandas read_csv function that operates on data string , by default R"\s{2,}|\s:\s" index_col: int, optional Column in data to be used as DataFrame index, by default 0 Returns ------- pd.DataFrame Report data table """ # remove leading spaces on each line and replace long runs of periods with spaces data = re.sub(R"^\s+", "", re.sub(R"\.{2,}", " ", data), flags=re.MULTILINE) # by default read in data with minimum 2-spaces or semicolon flanked by spaces as delimiter df = read_csv( filepath_or_buffer=StringIO(data), header=None, engine="python", sep=sep, index_col=index_col, names=header, ) # convert day and time columns into a single datetime column if "Time_of_Max" in df.columns: # convert time of max to timedelta df["Time_of_Max"] = to_timedelta( df.pop("days").astype(int), unit="D" ) + to_timedelta( df["Time_of_Max"] + ":00" ) # type: ignore return df @property def analysis_options(self) -> Series: """ Pandas series containing the analysis options listed in the report file including units, models, methods, dates, time steps, etc. Returns ------- Series Series of options. """ if not hasattr(self, "_analysis_options"): header, data = self._split_section(self._sections["Analysis Options"]) df = self._parse_table(["Option", "Setting"], data)["Setting"] self._analysis_options = df.dropna() return self._analysis_options @property def runoff_quantity_continuity(self) -> DataFrame: """ Runoff quantity continuity error table in volume and depth units. System wide error is show in percent. Returns ------- pd.DataFrame DataFrame of runoff quantity continuity error table. """ if not hasattr(self, "_runoff_quantity_continuity"): header, data = self._split_section( self._sections["Runoff Quantity Continuity"] ) # substitute spaces between words with underscore so read_fwf works # had to use some regex to not also match new lines header = self._parse_header(re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", header)) self._runoff_quantity_continuity = self._parse_table(header, data) return self._runoff_quantity_continuity @property def runoff_quality_continuity(self) -> DataFrame: """ Runoff quality continuity error table in mass units for each pollutant. System wide error is show in percent. Returns ------- pd.DataFrame DataFrame of runoff quality continuity error table """ if not hasattr(self, "_runoff_quality_continuity"): header, data = self._split_section( self._sections["Runoff Quality Continuity"] ) # substitute spaces between words with underscore so read_fwf works # had to use some regex to not also match new lines header = self._parse_header(re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", header)) self._runoff_quality_continuity = self._parse_table(header, data) return self._runoff_quality_continuity @property def groundwater_continuity(self) -> DataFrame: """ Groundwater quantity continuity error table in volume and depth units. System wide error is show in percent. Returns ------- pd.DataFrame DataFrame of groundwater quantity continuity error table """ if not hasattr(self, "_groundwater_continuity"): header, data = self._split_section(self._sections["Groundwater Continuity"]) # substitute spaces between words with underscore so read_fwf works # had to use some regex to not also match new lines header = self._parse_header(re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", header)) self._groundwater_continuity = self._parse_table(header, data) return self._groundwater_continuity @property def flow_routing_continuity(self) -> DataFrame: """ Flow routing continuity error table in volume units. System wide error is show in percent. Returns ------- pd.DataFrame DataFrame of flow routing continuity error table """ if not hasattr(self, "_flow_routing_continuity"): header, data = self._split_section( self._sections["Flow Routing Continuity"] ) # substitute spaces between words with underscore so read_fwf works # had to use some regex to not also match new lines header = self._parse_header(re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", header)) self._flow_routing_continuity = self._parse_table(header, data) return self._flow_routing_continuity @property def quality_routing_continuity(self) -> DataFrame: """ Quality routing continuity error table in mass units. System wide error is show in percent. Returns ------- pd.DataFrame DataFrame of quality routing continuity error table """ if not hasattr(self, "_quality_routing_continuity"): header, data = self._split_section( self._sections["Quality Routing Continuity"] ) # substitute spaces between words with underscore so read_fwf works # had to use some regex to not also match new lines header = self._parse_header(re.sub(R"(?<=\w)[^\S\r\n](?=\w)", "_", header)) self._quality_routing_continuity = self._parse_table(header, data) return self._quality_routing_continuity @property def highest_continuity_errors(self) -> DataFrame: """ Highest continuity error table in percent. This table shows the model elements with the highest flow routing continuity error. Returns ------- pd.DataFrame DataFrame of highest continuity errors table """ if not hasattr(self, "_highest_errors"): header, data = self._split_section( self._sections["Highest Continuity Errors"] ) df = self._parse_table( ["object_type", "name", "percent_error"], data, sep=R"\s+", index_col=1 ) df["percent_error"] = df["percent_error"].str.strip("()%").astype(float) self._highest_errors = df return self._highest_errors @property def time_step_critical_elements(self) -> DataFrame: """ Time-step critical elements table in percent. This table shows the model elements that were controlling the model time step if a variable one was used. Returns ------- pd.DataFrame DataFrame of time-step critical elements table """ if not hasattr(self, "_ts_critical"): header, data = self._split_section( self._sections["Time-Step Critical Elements"] ) df = self._parse_table( ["object_type", "name", "percent"], data, sep=R"\s+", index_col=1 ) df["percent"] = df["percent"].str.strip("()%").astype(float) self._ts_critical = df return self._ts_critical @property def highest_flow_instability_indexes(self) -> DataFrame: """ Highest flow instability indexes. This table shows the model elements that have the highest flow instability. Returns ------- pd.DataFrame DataFrame of highest flow instability indexes table """ if not hasattr(self, "_highest_flow_instability_indexes"): header, data = self._split_section( self._sections["Highest Flow Instability Indexes"] ) if "All links are stable" in data: data = "" df = self._parse_table( ["object_type", "name", "index"], data, sep=R"\s+", index_col=1 ) df["index"] = df["index"].str.strip("()").astype(int) self._highest_flow_instability_indexes = df return self._highest_flow_instability_indexes @property def routing_time_step_summary(self) -> DataFrame: """ Routing time step summary table that shows the average, minimum, and maximum time steps as well as convergance summary. Returns ------- pd.DataFrame DataFrame of routing time step summary table """ if not hasattr(self, "_routing_time_step_summary"): header, data = self._split_section( self._sections["Routing Time Step Summary"] ) self._routing_time_step_summary = self._parse_table( self._parse_header(header), data, sep=R"\s+:\s+" ) return self._routing_time_step_summary @property def runoff_summary(self) -> DataFrame: """ Runoff summary table for each subcatchment that details rainfall, runon, evap, infil, and runoff. Returns ------- pd.DataFrame DataFrame of subcatchment runoff summary table """ if not hasattr(self, "_runoff_summary"): header, data = self._split_section( self._sections["Subcatchment Runoff Summary"] ) self._runoff_summary = self._parse_table(self._parse_header(header), data) return self._runoff_summary @property def groundwater_summary(self) -> DataFrame: """ Groundwater summary table for each subcatchment that details groundwater inflow, outflow, moisture, and water table. Returns ------- pd.DataFrame DataFrame of subcatchment groundwater summary table """ if not hasattr(self, "_groundwater_summary"): header, data = self._split_section(self._sections["Groundwater Summary"]) self._groundwater_summary = self._parse_table( self._parse_header(header), data ) return self._groundwater_summary @property def washoff_summary(self) -> DataFrame: """ Washoff summary table that details the total pollutant load that was washed off of each subcatchment. Returns ------- pd.DataFrame DataFrame of subcatchment washoff summary table """ if not hasattr(self, "_washoff_summary"): header, data = self._split_section( self._sections["Subcatchment Washoff Summary"] ) self._washoff_summary = self._parse_table(self._parse_header(header), data) return self._washoff_summary @property def node_depth_summary(self) -> DataFrame: """ Node depth summary table that details the average and maximum depth and HGL simulated for each node. Returns ------- pd.DataFrame DataFrame of node depth summary table """ if not hasattr(self, "_node_depth_summary"): header, data = self._split_section(self._sections["Node Depth Summary"]) self._node_depth_summary = self._parse_table( self._parse_header(header), data, sep=R"\s{1,}|\s:\s" ) return self._node_depth_summary @property def node_inflow_summary(self) -> DataFrame: """ Node inflow summary table that details the maximum inflow rates, total inflow volumes, and flow balance error percent for each node. Returns ------- pd.DataFrame DataFrame of node inflow summary table """ if not hasattr(self, "_node_inflow_summary"): header, data = self._split_section(self._sections["Node Inflow Summary"]) self._node_inflow_summary = self._parse_table( self._parse_header(header), data ) return self._node_inflow_summary @property def node_surchage_summary(self) -> DataFrame: """ Node surcharge summary that details the maximum surcharge level and duration of surharge for each node. Returns ------- pd.DataFrame DataFrame of node surcharge summary table """ if not hasattr(self, "_node_surcharge_summary"): header, data = self._split_section(self._sections["Node Surcharge Summary"]) self._node_surcharge_summary = self._parse_table( self._parse_header(header), data ) return self._node_surcharge_summary @property def node_flooding_summary(self) -> DataFrame: """ Node flood summary that details the maximum ponded depth, peak flooding rate, total flood volume, total flood duration for each node. Returns ------- pd.DataFrame DataFrame of node flooding summary table """ if not hasattr(self, "_node_flooding_summary"): header, data = self._split_section(self._sections["Node Flooding Summary"]) self._node_flooding_summary = self._parse_table( self._parse_header(header), data ) return self._node_flooding_summary @property def storage_volume_summary(self) -> DataFrame: """ Storage volume summary that details the frequency of filling, average and peak volumes, losses, and outfall rate for each storage unit. Returns ------- pd.DataFrame DataFrame of storage volume summary table """ if not hasattr(self, "_storage_volume_summary"): header, data = self._split_section(self._sections["Storage Volume Summary"]) header = header.replace("Storage Unit", "Storage ") self._storage_volume_summary = self._parse_table( self._parse_header(header), data ) return self._storage_volume_summary @property def outfall_loading_summary(self) -> DataFrame: """ Outfall loading summary that details the flow frequency, average and peak flow rates, total outflow volume, and pollutant mass loads for each outfall. Returns ------- pd.DataFrame DataFrame of outfall loading summary table """ if not hasattr(self, "_outfall_loading_summary"): header, data = self._split_section( self._sections["Outfall Loading Summary"] ) header = header.replace("Outfall Node", "Outfall ") self._outfall_loading_summary = self._parse_table( self._parse_header(header), data ) return self._outfall_loading_summary @property def link_flow_summary(self) -> DataFrame: """ Link flow summary that details the peak flow, velocity, depth, and capacity for each link. Returns ------- pd.DataFrame DataFrame of link flow summary table """ if not hasattr(self, "_link_flow_summary"): header, data = self._split_section(self._sections["Link Flow Summary"]) header = header.replace("|", " ") self._link_flow_summary = self._parse_table( self._parse_header(header), data, sep=R"\s{1,}|\s:\s" ) return self._link_flow_summary @property def flow_classification_summary(self) -> DataFrame: """ Flow classification summary that details the amount of conduit lengthening during the simualtion and the fraction of simulation time that is dry, subcritical, supercritical, or critical flow for each conduit. Returns ------- pd.DataFrame DataFrame of flow classification summary table """ if not hasattr(self, "_flow_classification_summary"): header, data = self._split_section( self._sections["Flow Classification Summary"] ) to_remove = "---------- Fraction of Time in Flow Class ----------" to_replace = " " header = header.replace(to_remove, to_replace) self._flow_classification_summary = self._parse_table( self._parse_header(header), data ) return self._flow_classification_summary @property def conduit_surcharge_summary(self) -> DataFrame: """ Conduit surcharge summary that details the hours of surcharging and capacity limited conditions. Returns ------- pd.DataFrame DataFrame of conduit surcharge summary table """ if not hasattr(self, "_conduit_surcharge_summary"): header, data = self._split_section( self._sections["Conduit Surcharge Summary"] ) to_remove = "--------- Hours Full --------" to_replace = "HrsFull HoursFull HrsFull " header = header.replace(to_remove, to_replace) self._conduit_surcharge_summary = self._parse_table( self._parse_header(header), data ) return self._conduit_surcharge_summary @property def pumping_summary(self) -> DataFrame: """ Pumping summary that details the utilization, peak flow rates, total flow volume, power usage, and time off pump curve for each pump. Returns ------- pd.DataFrame DataFrame of pumping summary table """ if not hasattr(self, "_pumping_summary"): header, data = self._split_section(self._sections["Pumping Summary"]) header = self._parse_header(header) header[-1] = "Percent_Time_Off_Pump_Curve_Low" header.append("Percent_Time_Off_Pump_Curve_High") self._pumping_summary = self._parse_table(header, data) return self._pumping_summary @property def link_pollutant_load_summary(self) -> DataFrame: """ Link pollutant load summary that details the total pollutant mass discharged from each link. Returns ------- pd.DataFrame DataFrame of link pollutant load summary table """ if not hasattr(self, "_link_pollutant_load_summary"): header, data = self._split_section( self._sections["Link Pollutant Load Summary"] ) self._link_pollutant_load_summary = self._parse_table( self._parse_header(header), data ) return self._link_pollutant_load_summary @property def analysis_begun(self) -> Timestamp: """ Date and time when the simulation was started Returns ------- Timestamp Simulation start time Raises ------ Exception if analysis begun text could not be found in the report file """ if not hasattr(self, "_analysis_begun"): pattern = R"\s+Analysis begun on:\s+([^\n]+)$" s = re.search(pattern, self._rpt_text, flags=re.MULTILINE) if s: self._analysis_begun = to_datetime(s.group(1)) else: raise Exception("Error finding analysis begun") return self._analysis_begun @property def analysis_end(self) -> Timestamp: """ Date and time when the simulation ended Returns ------- Timestamp Simulation end time Raises ------ Exception if analysis ended text could not be found in the report file """ if not hasattr(self, "_analysis_end"): pattern = R"\s+Analysis ended on:\s+([^\n]+)$" s = re.search(pattern, self._rpt_text, flags=re.MULTILINE) if s: self._analysis_end = to_datetime(s.group(1)) else: raise Exception("Error finding analysis end") return self._analysis_end