diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
index c6642c5216262..5acf880a54c58 100644
--- a/doc/source/whatsnew/v0.18.1.txt
+++ b/doc/source/whatsnew/v0.18.1.txt
@@ -129,6 +129,7 @@ API changes
 
 - ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`)
 
+- ``Series.apply`` for category dtype now applies passed function to each ``.categories`` (not ``.codes``), and returns "category" dtype if possible (:issue:`12473`)
 
 
 - The default for ``.query()/.eval()`` is now ``engine=None``, which will use ``numexpr`` if it's installed; otherwise it will fallback to the ``python`` engine. This mimics the pre-0.18.1 behavior if ``numexpr`` is installed (and which Previously, if numexpr was not installed, ``.query()/.eval()`` would raise). (:issue:`12749`)
@@ -324,4 +325,8 @@ Bug Fixes
 - ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
 - ``pd.read_excel()`` now accepts column names associated with keyword argument ``names``(:issue `12870`)
 
+
 - Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue `12723`)
+
+
+- Bug in ``Series.map`` raises ``TypeError`` if its dtype is ``category`` or tz-aware ``datetime`` (:issue:`12473`)
\ No newline at end of file
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 986f7ad55361a..863d68a7c60e5 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -883,6 +883,30 @@ def remove_unused_categories(self, inplace=False):
         if not inplace:
             return cat
 
+    def map(self, mapper):
+        """
+        Apply mapper function to its categories (not codes).
+
+        Parameters
+        ----------
+        mapper : callable
+            Function to be applied. When all categories are mapped
+            to different categories, the result will be Categorical which has
+            the same order property as the original. Otherwise, the result will
+            be np.ndarray.
+
+        Returns
+        -------
+        applied : Categorical or np.ndarray.
+        """
+        new_categories = self.categories.map(mapper)
+        try:
+            return Categorical.from_codes(self._codes.copy(),
+                                          categories=new_categories,
+                                          ordered=self.ordered)
+        except ValueError:
+            return np.take(new_categories, self._codes)
+
     __eq__ = _cat_compare_op('__eq__')
     __ne__ = _cat_compare_op('__ne__')
     __lt__ = _cat_compare_op('__lt__')
diff --git a/pandas/core/common.py b/pandas/core/common.py
index c0f47a48a46a8..f75b1bbce668f 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -705,7 +705,7 @@ def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
     copy : if True always make a copy even if no upcast is required
     """
 
-    if is_internal_type(values):
+    if is_extension_type(values):
         if copy:
             values = values.copy()
     else:
@@ -1714,7 +1714,7 @@ def is_datetimetz(array):
             is_datetime64tz_dtype(array))
 
 
-def is_internal_type(value):
+def is_extension_type(value):
     """
     if we are a klass that is preserved by the internals
     these are internal klasses that we represent (and don't use a np.array)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 96a2b87a1bdb7..c598a2b719f82 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -27,7 +27,7 @@
     isnull, notnull, PandasError, _try_sort, _default_index, _maybe_upcast,
     is_sequence, _infer_dtype_from_scalar, _values_from_object, is_list_like,
     _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype,
-    is_internal_type, is_datetimetz, _possibly_infer_to_datetimelike,
+    is_extension_type, is_datetimetz, _possibly_infer_to_datetimelike,
     _dict_compat)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
@@ -2594,7 +2594,7 @@ def reindexer(value):
             value = com._possibly_cast_to_datetime(value, dtype)
 
         # return internal types directly
-        if is_internal_type(value):
+        if is_extension_type(value):
             return value
 
         # broadcast across multiple columns if necessary
@@ -4094,7 +4094,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
 
             # we cannot reduce using non-numpy dtypes,
             # as demonstrated in gh-12244
-            if not is_internal_type(values):
+            if not is_extension_type(values):
                 # Create a dummy Series from an empty array
                 index = self._get_axis(axis)
                 empty_arr = np.empty(len(index), dtype=values.dtype)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 463a2da529b5d..a74d2fb45cdbc 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -20,7 +20,7 @@
                                 _maybe_convert_string_to_object,
                                 _maybe_convert_scalar,
                                 is_categorical, is_datetimelike_v_numeric,
-                                is_numeric_v_string_like, is_internal_type)
+                                is_numeric_v_string_like, is_extension_type)
 import pandas.core.algorithms as algos
 from pandas.types.api import DatetimeTZDtype
 
@@ -1765,7 +1765,7 @@ def should_store(self, value):
         return not (issubclass(value.dtype.type,
                                (np.integer, np.floating, np.complexfloating,
                                 np.datetime64, np.bool_)) or
-                    is_internal_type(value))
+                    is_extension_type(value))
 
     def replace(self, to_replace, value, inplace=False, filter=None,
                 regex=False, convert=True, mgr=None):
@@ -3388,10 +3388,10 @@ def set(self, item, value, check=False):
         # FIXME: refactor, clearly separate broadcasting & zip-like assignment
         #        can prob also fix the various if tests for sparse/categorical
 
-        value_is_internal_type = is_internal_type(value)
+        value_is_extension_type = is_extension_type(value)
 
         # categorical/spares/datetimetz
-        if value_is_internal_type:
+        if value_is_extension_type:
 
             def value_getitem(placement):
                 return value
@@ -3463,7 +3463,7 @@ def value_getitem(placement):
             unfit_count = len(unfit_mgr_locs)
 
             new_blocks = []
-            if value_is_internal_type:
+            if value_is_extension_type:
                 # This code (ab-)uses the fact that sparse blocks contain only
                 # one item.
                 new_blocks.extend(
diff --git a/pandas/core/series.py b/pandas/core/series.py
index bf20c5d740133..9fc1bc0dbe969 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -20,7 +20,7 @@
                                 is_categorical_dtype,
                                 _possibly_cast_to_datetime,
                                 _possibly_castable, _possibly_convert_platform,
-                                _try_sort, is_internal_type, is_datetimetz,
+                                _try_sort, is_extension_type, is_datetimetz,
                                 _maybe_match_name, ABCSparseArray,
                                 _coerce_to_dtype, SettingWithCopyError,
                                 _maybe_box_datetimelike, ABCDataFrame,
@@ -2063,15 +2063,21 @@ def map(self, arg, na_action=None):
         y : Series
             same index as caller
         """
-        values = self.asobject
 
-        if na_action == 'ignore':
-            mask = isnull(values)
-
-            def map_f(values, f):
-                return lib.map_infer_mask(values, f, mask.view(np.uint8))
+        if is_extension_type(self.dtype):
+            values = self._values
+            if na_action is not None:
+                raise NotImplementedError
+            map_f = lambda values, f: values.map(f)
         else:
-            map_f = lib.map_infer
+            values = self.asobject
+
+            if na_action == 'ignore':
+                def map_f(values, f):
+                    return lib.map_infer_mask(values, f,
+                                              isnull(values).view(np.uint8))
+            else:
+                map_f = lib.map_infer
 
         if isinstance(arg, (dict, Series)):
             if isinstance(arg, dict):
@@ -2079,12 +2085,11 @@ def map_f(values, f):
 
             indexer = arg.index.get_indexer(values)
             new_values = algos.take_1d(arg._values, indexer)
-            return self._constructor(new_values,
-                                     index=self.index).__finalize__(self)
         else:
-            mapped = map_f(values, arg)
-            return self._constructor(mapped,
-                                     index=self.index).__finalize__(self)
+            new_values = map_f(values, arg)
+
+        return self._constructor(new_values,
+                                 index=self.index).__finalize__(self)
 
     def apply(self, func, convert_dtype=True, args=(), **kwds):
         """
@@ -2193,7 +2198,12 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
         if isinstance(f, np.ufunc):
             return f(self)
 
-        mapped = lib.map_infer(self.asobject, f, convert=convert_dtype)
+        if is_extension_type(self.dtype):
+            mapped = self._values.map(f)
+        else:
+            values = self.asobject
+            mapped = lib.map_infer(values, f, convert=convert_dtype)
+
         if len(mapped) and isinstance(mapped[0], Series):
             from pandas.core.frame import DataFrame
             return DataFrame(mapped.tolist(), index=self.index)
@@ -2779,7 +2789,7 @@ def _try_cast(arr, take_fast_path):
 
         try:
             subarr = _possibly_cast_to_datetime(arr, dtype)
-            if not is_internal_type(subarr):
+            if not is_extension_type(subarr):
                 subarr = np.array(subarr, dtype=dtype, copy=copy)
         except (ValueError, TypeError):
             if is_categorical_dtype(dtype):
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
index 644b6720dfaac..f606f4a649047 100644
--- a/pandas/indexes/base.py
+++ b/pandas/indexes/base.py
@@ -2194,11 +2194,22 @@ def groupby(self, to_groupby):
         -------
         groups : dict
             {group name -> group labels}
-
         """
         return self._groupby(self.values, _values_from_object(to_groupby))
 
     def map(self, mapper):
+        """
+        Apply mapper function to its values.
+
+        Parameters
+        ----------
+        mapper : callable
+            Function to be applied.
+
+        Returns
+        -------
+        applied : array
+        """
         return self._arrmap(self.values, mapper)
 
     def isin(self, values, level=None):
diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py
index 16b8fd8df4e2a..98cb028aefae8 100644
--- a/pandas/indexes/category.py
+++ b/pandas/indexes/category.py
@@ -468,6 +468,24 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None):
                                            na_value=-1)
         return self._create_from_codes(taken)
 
+    def map(self, mapper):
+        """
+        Apply mapper function to its categories (not codes).
+
+        Parameters
+        ----------
+        mapper : callable
+            Function to be applied. When all categories are mapped
+            to different categories, the result will be Categorical which has
+            the same order property as the original. Otherwise, the result will
+            be np.ndarray.
+
+        Returns
+        -------
+        applied : Categorical or np.ndarray.
+        """
+        return self.values.map(mapper)
+
     def delete(self, loc):
         """
         Make new Index with passed location(-s) deleted
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index a8534309c115c..fa8f6a291c677 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -201,6 +201,33 @@ def test_min_max(self):
         self.assertEqual(ci.min(), 'c')
         self.assertEqual(ci.max(), 'b')
 
+    def test_map(self):
+        ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'),
+                                 ordered=True)
+        result = ci.map(lambda x: x.lower())
+        exp = pd.Categorical(list('ababc'), categories=list('cba'),
+                             ordered=True)
+        tm.assert_categorical_equal(result, exp)
+
+        ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
+                                 ordered=False, name='XXX')
+        result = ci.map(lambda x: x.lower())
+        exp = pd.Categorical(list('ababc'), categories=list('bac'),
+                             ordered=False)
+        tm.assert_categorical_equal(result, exp)
+
+        tm.assert_numpy_array_equal(ci.map(lambda x: 1), np.array([1] * 5))
+
+        # change categories dtype
+        ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
+                                 ordered=False)
+        def f(x):
+            return {'A': 10, 'B': 20, 'C': 30}.get(x)
+        result = ci.map(f)
+        exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
+                             ordered=False)
+        tm.assert_categorical_equal(result, exp)
+
     def test_append(self):
 
         ci = self.create_index()
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 9182b16d1f5b5..af648d34637df 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1567,6 +1567,25 @@ def test_sortlevel(self):
         res = s.sortlevel(['A', 'B'], sort_remaining=False)
         assert_series_equal(s, res)
 
+    def test_apply_categorical(self):
+        values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
+                                ordered=True)
+        s = pd.Series(values, name='XX', index=list('abcdefg'))
+        result = s.apply(lambda x: x.lower())
+
+        # should be categorical dtype when the number of categories are
+        # the same
+        values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
+                                ordered=True)
+        exp = pd.Series(values, name='XX', index=list('abcdefg'))
+        tm.assert_series_equal(result, exp)
+        tm.assert_categorical_equal(result.values, exp.values)
+
+        result = s.apply(lambda x: 'A')
+        exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
+        tm.assert_series_equal(result, exp)
+        self.assertEqual(result.dtype, np.object)
+
     def test_shift_int(self):
         ts = self.ts.astype(int)
         shifted = ts.shift(1)
diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
index 87369a0e6ef90..154837fc2a3b1 100644
--- a/pandas/tests/series/test_apply.py
+++ b/pandas/tests/series/test_apply.py
@@ -110,6 +110,32 @@ def test_apply_box(self):
         exp = pd.Series(['Period_M', 'Period_M'])
         tm.assert_series_equal(res, exp)
 
+    def test_apply_datetimetz(self):
+        values = pd.date_range('2011-01-01', '2011-01-02',
+                               freq='H').tz_localize('Asia/Tokyo')
+        s = pd.Series(values, name='XX')
+
+        result = s.apply(lambda x: x + pd.offsets.Day())
+        exp_values = pd.date_range('2011-01-02', '2011-01-03',
+                                   freq='H').tz_localize('Asia/Tokyo')
+        exp = pd.Series(exp_values, name='XX')
+        tm.assert_series_equal(result, exp)
+
+        # change dtype
+        result = s.apply(lambda x: x.hour)
+        exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
+        tm.assert_series_equal(result, exp)
+
+        # not vectorized
+        def f(x):
+            if not isinstance(x, pd.Timestamp):
+                raise ValueError
+            return str(x.tz)
+
+        result = s.map(f)
+        exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
+        tm.assert_series_equal(result, exp)
+
 
 class TestSeriesMap(TestData, tm.TestCase):
 
@@ -255,3 +281,53 @@ def test_map_box(self):
                                                x.freqstr))
         exp = pd.Series(['Period_M', 'Period_M'])
         tm.assert_series_equal(res, exp)
+
+    def test_map_categorical(self):
+        values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
+                                ordered=True)
+        s = pd.Series(values, name='XX', index=list('abcdefg'))
+
+        result = s.map(lambda x: x.lower())
+        exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
+                                    ordered=True)
+        exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
+        tm.assert_series_equal(result, exp)
+        tm.assert_categorical_equal(result.values, exp_values)
+
+        result = s.map(lambda x: 'A')
+        exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
+        tm.assert_series_equal(result, exp)
+        self.assertEqual(result.dtype, np.object)
+
+        with tm.assertRaises(NotImplementedError):
+            s.map(lambda x: x, na_action='ignore')
+
+    def test_map_datetimetz(self):
+        values = pd.date_range('2011-01-01', '2011-01-02',
+                               freq='H').tz_localize('Asia/Tokyo')
+        s = pd.Series(values, name='XX')
+
+        # keep tz
+        result = s.map(lambda x: x + pd.offsets.Day())
+        exp_values = pd.date_range('2011-01-02', '2011-01-03',
+                                   freq='H').tz_localize('Asia/Tokyo')
+        exp = pd.Series(exp_values, name='XX')
+        tm.assert_series_equal(result, exp)
+
+        # change dtype
+        result = s.map(lambda x: x.hour)
+        exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
+        tm.assert_series_equal(result, exp)
+
+        with tm.assertRaises(NotImplementedError):
+            s.map(lambda x: x, na_action='ignore')
+
+        # not vectorized
+        def f(x):
+            if not isinstance(x, pd.Timestamp):
+                raise ValueError
+            return str(x.tz)
+
+        result = s.map(f)
+        exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
+        tm.assert_series_equal(result, exp)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index a0e6241383289..a1cc05b0c9873 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1551,6 +1551,24 @@ def test_comparison_with_unknown_scalars(self):
         self.assert_numpy_array_equal(cat == 4, [False, False, False])
         self.assert_numpy_array_equal(cat != 4, [True, True, True])
 
+    def test_map(self):
+        c = pd.Categorical(list('ABABC'), categories=list('CBA'),
+                           ordered=True)
+        result = c.map(lambda x: x.lower())
+        exp = pd.Categorical(list('ababc'), categories=list('cba'),
+                             ordered=True)
+        tm.assert_categorical_equal(result, exp)
+
+        c = pd.Categorical(list('ABABC'), categories=list('ABC'),
+                           ordered=False)
+        result = c.map(lambda x: x.lower())
+        exp = pd.Categorical(list('ababc'), categories=list('abc'),
+                             ordered=False)
+        tm.assert_categorical_equal(result, exp)
+
+        result = c.map(lambda x: 1)
+        tm.assert_numpy_array_equal(result, np.array([1] * 5))
+
 
 class TestCategoricalAsBlock(tm.TestCase):
     _multiprocess_can_split_ = True
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 89200ef79dac9..7a604d0e7341b 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -888,11 +888,11 @@ def assertNotIsInstance(obj, cls, msg=''):
 
 
 def assert_categorical_equal(res, exp):
+    assertIsInstance(res, pd.Categorical, '[Categorical] ')
+    assertIsInstance(exp, pd.Categorical, '[Categorical] ')
+
+    assert_index_equal(res.categories, exp.categories)
 
-    if not array_equivalent(res.categories, exp.categories):
-        raise AssertionError(
-            'categories not equivalent: {0} vs {1}.'.format(res.categories,
-                                                            exp.categories))
     if not array_equivalent(res.codes, exp.codes):
         raise AssertionError(
             'codes not equivalent: {0} vs {1}.'.format(res.codes, exp.codes))