docs: code samples for sample, get, Series.round (#295)

shobsi · web-flow · commit c2b189282554 · 2024-01-02T20:28:16.000Z
BEGIN_COMMIT_OVERRIDE docs: code samples for `sample`, `get`, `Series.round` (#295) docs: code samples for DataFrame `set_index`, `items` (#295) END_COMMIT_OVERRIDE Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) - `DataFrame.sample`, `Series.sample`: https://screenshot.googleplex.com/kPy5swVACMeBhSo - `DataFrame.get`, `Series.get`: https://screenshot.googleplex.com/7hirn5oz2b4L6B3 - `DataFrame.set_index`: https://screenshot.googleplex.com/3CXARrp5hwV6gau - `DataFrame.items`: https://screenshot.googleplex.com/bk3HAiXZQq3TYD9 - `Series.round`: https://screenshot.googleplex.com/C9c4m84NWNMnAwS Fixes internal issues 318011542 and 318011745 🦕
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1187,6 +1187,47 @@ def set_index(
         Set the DataFrame index (row labels) using one existing column. The
         index can replace the existing index.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'month': [1, 4, 7, 10],
+            ...                     'year': [2012, 2014, 2013, 2014],
+            ...                     'sale': [55, 40, 84, 31]})
+            >>> df
+               month  year  sale
+            0      1  2012    55
+            1      4  2014    40
+            2      7  2013    84
+            3     10  2014    31
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+        Set the 'month' column to become the index:
+
+            >>> df.set_index('month')
+                   year  sale
+            month
+            1      2012    55
+            4      2014    40
+            7      2013    84
+            10     2014    31
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Create a MultiIndex using columns 'year' and 'month':
+
+            >>> df.set_index(['year', 'month'])
+                        sale
+            year month
+            2012 1        55
+            2014 4        40
+            2013 7        84
+            2014 10       31
+            <BLANKLINE>
+            [4 rows x 1 columns]
+
         Args:
             keys:
                 A label. This parameter can be a single column key.
@@ -1621,6 +1662,39 @@ def items(self):
         Iterates over the DataFrame columns, returning a tuple with
         the column name and the content as a Series.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'species': ['bear', 'bear', 'marsupial'],
+            ...                     'population': [1864, 22000, 80000]},
+            ...                    index=['panda', 'polar', 'koala'])
+            >>> df
+                     species  population
+            panda       bear        1864
+            polar       bear       22000
+            koala  marsupial       80000
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> for label, content in df.items():
+            ...     print(f'--> label: {label}')
+            ...     print(f'--> content:\\n{content}')
+            ...
+            --> label: species
+            --> content:
+            panda         bear
+            polar         bear
+            koala    marsupial
+            Name: species, dtype: string
+            --> label: population
+            --> content:
+            panda     1864
+            polar    22000
+            koala    80000
+            Name: population, dtype: Int64
+
         Returns:
             Iterator: Iterator of label, Series for each column.
         """
@@ -4587,7 +4661,7 @@ def index(self):
             ...                     'Location': ['Seattle', 'New York', 'Kona']},
             ...                    index=([10, 20, 30]))
             >>> df
-                Name  Age  Location
+                  Name  Age  Location
             10   Alice   25   Seattle
             20     Bob   30  New York
             30  Aritra   35      Kona
@@ -4603,7 +4677,7 @@ def index(self):
 
             >>> df1 = df.set_index(["Name", "Location"])
             >>> df1
-                            Age
+                             Age
             Name   Location
             Alice  Seattle    25
             Bob    New York   30
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -254,6 +254,55 @@ def get(self, key, default=None):
 
         Returns default value if not found.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame(
+            ...     [
+            ...         [24.3, 75.7, "high"],
+            ...         [31, 87.8, "high"],
+            ...         [22, 71.6, "medium"],
+            ...         [35, 95, "medium"],
+            ...     ],
+            ...     columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
+            ...     index=["2014-02-12", "2014-02-13", "2014-02-14", "2014-02-15"],
+            ... )
+            >>> df
+                        temp_celsius  temp_fahrenheit windspeed
+            2014-02-12          24.3             75.7      high
+            2014-02-13          31.0             87.8      high
+            2014-02-14          22.0             71.6    medium
+            2014-02-15          35.0             95.0    medium
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+            >>> df.get(["temp_celsius", "windspeed"])
+                        temp_celsius windspeed
+            2014-02-12          24.3      high
+            2014-02-13          31.0      high
+            2014-02-14          22.0    medium
+            2014-02-15          35.0    medium
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            >>> ser = df['windspeed']
+            >>> ser
+            2014-02-12      high
+            2014-02-13      high
+            2014-02-14    medium
+            2014-02-15    medium
+            Name: windspeed, dtype: string
+            >>> ser.get('2014-02-13')
+            'high'
+
+        If the key is not found, the default value will be used.
+
+            >>> df.get(["temp_celsius", "temp_kelvin"])
+            >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")
+            'default_value'
+
         Args:
             key: object
 
@@ -410,6 +459,51 @@ def sample(
 
         You can use `random_state` for reproducibility.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'num_legs': [2, 4, 8, 0],
+            ...                     'num_wings': [2, 0, 0, 0],
+            ...                     'num_specimen_seen': [10, 2, 1, 8]},
+            ...                    index=['falcon', 'dog', 'spider', 'fish'])
+            >>> df
+                    num_legs  num_wings  num_specimen_seen
+            falcon         2          2                 10
+            dog            4          0                  2
+            spider         8          0                  1
+            fish           0          0                  8
+            <BLANKLINE>
+            [4 rows x 3 columns]
+
+        Fetch one random row from the DataFrame (Note that we use `random_state`
+        to ensure reproducibility of the examples):
+
+            >>> df.sample(random_state=1)
+                 num_legs  num_wings  num_specimen_seen
+            dog         4          0                  2
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        A random 50% sample of the DataFrame:
+
+            >>> df.sample(frac=0.5, random_state=1)
+                  num_legs  num_wings  num_specimen_seen
+            dog          4          0                  2
+            fish         0          0                  8
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Extract 3 random elements from the Series `df['num_legs']`:
+
+            >>> s = df['num_legs']
+            >>> s.sample(n=3, random_state=1)
+            dog       4
+            fish      0
+            spider    8
+            Name: num_legs, dtype: Int64
+
         Args:
             n (Optional[int], default None):
                 Number of items from axis to return. Cannot be used with `frac`.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
@@ -722,6 +722,25 @@ def round(self, decimals: int = 0) -> Series:
         """
         Round each value in a Series to the given number of decimals.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([0.1, 1.3, 2.7])
+            >>> s.round()
+            0    0.0
+            1    1.0
+            2    3.0
+            dtype: Float64
+
+            >>> s = bpd.Series([0.123, 1.345, 2.789])
+            >>> s.round(decimals=2)
+            0    0.12
+            1    1.34
+            2    2.79
+            dtype: Float64
+
         Args:
             decimals (int, default 0):
                 Number of decimal places to round to. If decimals is negative,