|
21 | 21 |
|
22 | 22 | from swift.utils import get_logger, get_seed, is_dist, is_local_master, read_from_jsonl, transform_jsonl_to_df
|
23 | 23 | from swift.utils.torch_utils import _find_local_mac
|
24 |
| -from .media import MediaCache |
| 24 | +from .media import MediaCache, MediaTag |
25 | 25 | from .preprocess import (AlpacaPreprocessor, ClsPreprocessor, ComposePreprocessor, ConversationsPreprocessor,
|
26 | 26 | ListPreprocessor, PreprocessFunc, RenameColumnsPreprocessor, SmartPreprocessor,
|
27 | 27 | TextGenerationPreprocessor, preprocess_sharegpt)
|
@@ -162,6 +162,8 @@ class DatasetName:
|
162 | 162 | midefics = 'midefics'
|
163 | 163 | gqa = 'gqa'
|
164 | 164 | text_caps = 'text-caps'
|
| 165 | + refcoco_unofficial_caption = 'refcoco-unofficial-caption' |
| 166 | + refcoco_unofficial_grounding = 'refcoco-unofficial-grounding' |
165 | 167 | a_okvqa = 'a-okvqa'
|
166 | 168 | okvqa = 'okvqa'
|
167 | 169 | ocr_vqa = 'ocr-vqa'
|
@@ -1112,6 +1114,79 @@ def preprocess(row):
|
1112 | 1114 | load_from_cache_file=False).filter(lambda row: row.get('response')).rename_columns({'image': 'images'})
|
1113 | 1115 |
|
1114 | 1116 |
|
| 1117 | +def preprocess_refcoco_unofficial_caption(dataset): |
| 1118 | + |
| 1119 | + cache_dir = MediaCache.download( |
| 1120 | + 'https://www.modelscope.cn/api/v1/datasets/we_dont_produce_water/' |
| 1121 | + 'coco_res/repo?Revision=master&FilePath=coco_2014.zip', 'coco2014') |
| 1122 | + |
| 1123 | + def preprocess(row): |
| 1124 | + caption = row['captions'][0] |
| 1125 | + bbox = row['bbox'] |
| 1126 | + image_path = os.path.join(cache_dir, row['image_path'].replace('coco/train2014', 'train2014')) |
| 1127 | + media_tag = MediaTag(media_type='image', task_type='grounding_caption') |
| 1128 | + for i in range(len(bbox)): |
| 1129 | + bbox[i] = round(float(bbox[i])) |
| 1130 | + res = {} |
| 1131 | + |
| 1132 | + objects = [[caption, bbox]] |
| 1133 | + media_tag(res, [image_path]) |
| 1134 | + res['images'] = [image_path] |
| 1135 | + res['objects'] = json.dumps(objects) |
| 1136 | + if not os.path.exists(image_path): |
| 1137 | + res['response'] = '' |
| 1138 | + return res |
| 1139 | + |
| 1140 | + return dataset.map(preprocess, load_from_cache_file=False).filter(lambda row: row.get('response')) |
| 1141 | + |
| 1142 | + |
| 1143 | +register_dataset( |
| 1144 | + DatasetName.refcoco_unofficial, |
| 1145 | + 'swift/refcoco', [], |
| 1146 | + preprocess_func=preprocess_refcoco_unofficial_caption, |
| 1147 | + get_function=get_dataset_from_repo, |
| 1148 | + split=['train', 'validation'], |
| 1149 | + hf_dataset_id='jxu124/refcoco', |
| 1150 | + huge_dataset=True, |
| 1151 | + tags=['multi-modal', 'en', 'caption']) |
| 1152 | + |
| 1153 | + |
| 1154 | +def preprocess_refcoco_unofficial_grounding(dataset): |
| 1155 | + |
| 1156 | + cache_dir = MediaCache.download( |
| 1157 | + 'https://www.modelscope.cn/api/v1/datasets/we_dont_produce_water/' |
| 1158 | + 'coco_res/repo?Revision=master&FilePath=coco_2014.zip', 'coco2014') |
| 1159 | + |
| 1160 | + def preprocess(row): |
| 1161 | + caption = row['captions'][0] |
| 1162 | + bbox = row['bbox'] |
| 1163 | + image_path = os.path.join(cache_dir, row['image_path'].replace('coco/train2014', 'train2014')) |
| 1164 | + media_tag = MediaTag(media_type='image', task_type='ref_grounding') |
| 1165 | + for i in range(len(bbox)): |
| 1166 | + bbox[i] = round(float(bbox[i])) |
| 1167 | + res = {} |
| 1168 | + |
| 1169 | + objects = [[caption, bbox]] |
| 1170 | + media_tag(res, [image_path]) |
| 1171 | + res['images'] = [image_path] |
| 1172 | + res['objects'] = json.dumps(objects) |
| 1173 | + if not os.path.exists(image_path): |
| 1174 | + res['response'] = '' |
| 1175 | + return res |
| 1176 | + |
| 1177 | + return dataset.map(preprocess, load_from_cache_file=False).filter(lambda row: row.get('response')) |
| 1178 | + |
| 1179 | + |
| 1180 | +register_dataset( |
| 1181 | + DatasetName.refcoco_unofficial_grounding, |
| 1182 | + 'swift/refcoco', [], |
| 1183 | + preprocess_func=preprocess_refcoco_unofficial_grounding, |
| 1184 | + get_function=get_dataset_from_repo, |
| 1185 | + split=['train', 'validation'], |
| 1186 | + hf_dataset_id='jxu124/refcoco', |
| 1187 | + huge_dataset=True, |
| 1188 | + tags=['multi-modal', 'en', 'grounding']) |
| 1189 | + |
1115 | 1190 | register_dataset(
|
1116 | 1191 | DatasetName.text_caps,
|
1117 | 1192 | 'swift/TextCaps', [],
|
|
0 commit comments