# View the tableone docstring
TableOne??Init signature:
TableOne(
data: pandas.core.frame.DataFrame,
columns: Optional[list] = None,
categorical: Optional[list] = None,
groupby: Optional[str] = None,
nonnormal: Optional[list] = None,
min_max: Optional[list] = None,
pval: Optional[bool] = False,
pval_adjust: Optional[str] = None,
htest_name: bool = False,
pval_test_name: bool = False,
htest: Optional[dict] = None,
isnull: Optional[bool] = None,
missing: bool = True,
ddof: int = 1,
labels: Optional[dict] = None,
rename: Optional[dict] = None,
sort: Union[bool, str] = False,
limit: Union[int, dict, NoneType] = None,
order: Optional[dict] = None,
remarks: bool = False,
label_suffix: bool = True,
decimals: Union[int, dict] = 1,
smd: bool = False,
overall: bool = True,
row_percent: bool = False,
display_all: bool = False,
dip_test: bool = False,
normal_test: bool = False,
tukey_test: bool = False,
pval_threshold: Optional[float] = None,
) -> None
Source:
class TableOne:
"""
If you use the tableone package, please cite:
Pollard TJ, Johnson AEW, Raffa JD, Mark RG (2018). tableone: An open source
Python package for producing summary statistics for research papers.
JAMIA Open, Volume 1, Issue 1, 1 July 2018, Pages 26-31.
https://doi.org/10.1093/jamiaopen/ooy012
Create an instance of the tableone summary table.
Parameters
----------
data : pandas DataFrame
The dataset to be summarised. Rows are observations, columns are
variables.
columns : list, optional
List of columns in the dataset to be included in the final table.
categorical : list, optional
List of columns that contain categorical variables.
groupby : str, optional
Optional column for stratifying the final table (default: None).
nonnormal : list, optional
List of columns that contain non-normal variables (default: None).
min_max: list, optional
List of variables that should report minimum and maximum, instead of
standard deviation (for normal) or Q1-Q3 (for non-normal).
pval : bool, optional
Display computed P-Values (default: False).
pval_adjust : str, optional
Method used to adjust P-Values for multiple testing.
The P-values from the unadjusted table (default when pval=True)
are adjusted to account for the number of total tests that were
performed.
These adjustments would be useful when many variables are being
screened to assess if their distribution varies by the variable in the
groupby argument.
For a complete list of methods, see documentation for statsmodels
multipletests.
Available methods include ::
`None` : no correction applied.
`bonferroni` : one-step correction
`sidak` : one-step correction
`holm-sidak` : step down method using Sidak adjustments
`simes-hochberg` : step-up method (independent)
`hommel` : closed method based on Simes tests (non-negative)
htest_name : bool, optional
Display a column with the names of hypothesis tests (default: False).
htest : dict, optional
Dictionary of custom hypothesis tests. Keys are variable names and
values are functions. Functions must take a list of Numpy Arrays as
the input argument and must return a test result.
e.g. htest = {'age': myfunc}
missing : bool, optional
Display a count of null values (default: True).
ddof : int, optional
Degrees of freedom for standard deviation calculations (default: 1).
rename : dict, optional
Dictionary of alternative names for variables.
e.g. `rename = {'sex':'gender', 'trt':'treatment'}`
sort : bool or str, optional
If `True`, sort the variables alphabetically. If a string
(e.g. `'P-Value'`), sort by the specified column in ascending order.
Default (`False`) retains the sequence specified in the `columns`
argument. Currently the only columns supported are: `'Missing'`,
`'P-Value'`, `'P-Value (adjusted)'`, and `'Test'`.
limit : int or dict, optional
Limit to the top N most frequent categories. If int, apply to all
categorical variables. If dict, apply to the key (e.g. {'sex': 1}).
order : dict, optional
Specify an order for categorical variables. Key is the variable, value
is a list of values in order. {e.g. 'sex': ['f', 'm', 'other']}
label_suffix : bool, optional
Append summary type (e.g. "mean (SD); median [Q1,Q3], n (%); ") to the
row label (default: True).
decimals : int or dict, optional
Number of decimal places to display. An integer applies the rule to all
variables (default: 1). A dictionary (e.g. `decimals = {'age': 0)`)
applies the rule per variable, defaulting to 1 place for unspecified
variables. For continuous variables, applies to all summary statistics
(e.g. mean and standard deviation). For categorical variables, applies
to percentage only.
overall : bool, optional
If True, add an "overall" column to the table. Smd and p-value
calculations are performed only using stratified columns.
row_percent : bool, optional
If True, compute "n (%)" percentages for categorical variables across
"groupby" rows rather than columns.
display_all : bool, optional
If True, set pd. display_options to display all columns and rows.
(default: False)
dip_test : bool, optional
Run Hartigan's Dip Test for multimodality. If variables are found to
have multimodal distributions, a remark will be added below the
Table 1.
(default: False)
normal_test : bool, optional
Test the null hypothesis that a sample come from a normal distribution.
Uses scipy.stats.normaltest. If variables are found to have non-normal
distributions, a remark will be added below the Table 1.
(default: False)
tukey_test : bool, optional
Run Tukey's test for far outliers. If variables are found to
have far outliers, a remark will be added below the Table 1.
(default: False)
数据来源:https://github.com/tompollard/tableone
原创文章(本站视频密码:66668888),作者:xujunzju,如若转载,请注明出处:https://zyicu.cn/?p=16271
微信扫一扫
支付宝扫一扫