diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 017702b85a..f0993880c0 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -115,6 +115,19 @@ def from_frame( index._linked_frame = frame return index + def to_frame( + self, index: bool = True, name: blocks.Label | None = None + ) -> bigframes.dataframe.DataFrame: + provided_name = name if name else self.name + series = self.to_series() + series.name = provided_name + frame = series.to_frame() + if index: # matching pandas behavior + frame.index.name = self.name + else: + frame = frame.reset_index(drop=True) + return frame + @property def _session(self): return self._block.session diff --git a/bigframes/core/indexes/multi.py b/bigframes/core/indexes/multi.py index 182d1f101c..c81977b0f0 100644 --- a/bigframes/core/indexes/multi.py +++ b/bigframes/core/indexes/multi.py @@ -19,7 +19,9 @@ import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex import pandas +import bigframes.core.blocks as blocks from bigframes.core.indexes.base import Index +import bigframes.dataframe class MultiIndex(Index, vendored_pandas_multindex.MultiIndex): @@ -46,3 +48,28 @@ def from_arrays( pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names) # Index.__new__ should detect multiple levels and properly create a multiindex return cast(MultiIndex, Index(pd_index)) + + def to_frame( + self, + index: bool = True, + name: Sequence[blocks.Label] | blocks.Label | None = None, + ) -> bigframes.dataframe.DataFrame: + columns = [ + [self.values[j][i] for j in range(len(self.values))] + for i in range(len(self.values[0])) + ] + if isinstance(name, Sequence): + if len(name) != len(columns): + raise ValueError( + "Length of provided names must match length of MultiIndex columns" + ) + data = {name[i]: column for i, column in enumerate(columns)} + elif name is None: + data = {i: column for i, column in enumerate(columns)} + else: + raise ValueError("'name' parameter must be of type Sequence") + original_index = columns + result = bigframes.dataframe.DataFrame( + data, index=original_index if index else None + ) + return result diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index d68cf6c3f3..ceca91d580 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -16,6 +16,7 @@ import pandas as pd import pytest +import bigframes.core.indexes as indexes import bigframes.pandas as bpd from tests.system.utils import assert_pandas_index_equal_ignore_index_type @@ -320,6 +321,30 @@ def test_index_to_series( pd.testing.assert_series_equal(bf_result, pd_result) +@pytest.mark.parametrize("index_arg", [True, False]) +@pytest.mark.parametrize("name_arg", [None, "food"]) +def test_index_to_frame(index_arg, name_arg): + pd_idx: pd.Index = pd.Index( + ["Ant", "Bear", "Cow"], name="animal", dtype="string[pyarrow]" + ) + bf_idx = indexes.Index(["Ant", "Bear", "Cow"], name="animal") + + if name_arg is None: + pd_df = pd_idx.to_frame(index=index_arg) + bf_df = bf_idx.to_frame(index=index_arg) + else: + pd_df = pd_idx.to_frame(index=index_arg, name=name_arg) + bf_df = bf_idx.to_frame(index=index_arg, name=name_arg) + pd.testing.assert_frame_equal( + pd_df, bf_df.to_pandas(), check_column_type=False, check_index_type=False + ) + # BigFrames type casting is weird + # automatically casts dtype to string whereas pandas dtype is object + # additionally, pandas uses string[python] and BigFrames uses string[pyarrow] + # so we set dtype in pandas index creation + # similarly, pandas uses int64 dtype for numerical index and BigFrames uses Int64 + + @pytest.mark.parametrize( ("how",), [ diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index ab2a9c19b8..7a2f0b479d 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -16,6 +16,7 @@ import pandas import pytest +import bigframes.core.indexes as indexes import bigframes.pandas as bpd from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas @@ -45,6 +46,27 @@ def test_multi_index_from_arrays(): pandas.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) +@pytest.mark.parametrize("index_arg", [True, False]) +@pytest.mark.parametrize("name_arg", [None, ["x", "y"]]) +def test_multi_index_to_frame(index_arg, name_arg): + + pd_idx = pandas.MultiIndex.from_arrays([["a", "b", "c"], ["d", "e", "f"]]) + bf_idx = indexes.MultiIndex.from_arrays([["a", "b", "c"], ["d", "e", "f"]]) + if name_arg is None: + pd_df = pd_idx.to_frame(index=index_arg) + bf_df = bf_idx.to_frame(index=index_arg) + else: + pd_df = pd_idx.to_frame(index=index_arg, name=name_arg) + bf_df = bf_idx.to_frame(index=index_arg, name=name_arg) + pandas.testing.assert_frame_equal( + pd_df, + bf_df.to_pandas(), + check_dtype=False, + check_column_type=False, + check_index_type=False, + ) + + @skip_legacy_pandas def test_read_pandas_multi_index_axes(): index = pandas.MultiIndex.from_arrays(