#!/usr/local/bin/python2 import xml.etree.ElementTree as etree class S(object): def __init__(self): self._item = {} self._item_copy = self._item.copy self._items = [] self._items_append = self._items.append self._depth_from_image = 0 self._depth_from_name = 0 self._depth_from_price = 0 self._depth_from_region = 0 def start(self, tag, attrib): if self._depth_from_image: if tag == 'a': self._item['url'] = attrib['href'] elif tag == 'img': self._item['image_url'] = attrib['src'] self._depth_from_image += 1 elif attrib.get('class') == 'image': self._depth_from_image += 1 elif attrib.get('class') == 'itemName' or self._depth_from_name: self._depth_from_name += 1 elif attrib.get('class') == 'price' or self._depth_from_price: self._depth_from_price += 1 elif attrib.get('class') == 'region' or self._depth_from_region: self._depth_from_region += 1 def end(self, tag): if self._depth_from_image: self._depth_from_image -= 1 elif self._depth_from_name: self._depth_from_name -= 1 elif self._depth_from_price: self._depth_from_price -= 1 elif self._depth_from_region: self._depth_from_region -= 1 elif tag == 'item': self._items_append(self._item_copy()) def data(self, data): if self._depth_from_name: self._item['name'] = data elif self._depth_from_price: self._item['price'] = data.partition(u'円')[0] elif self._depth_from_region: self._item['region'] = data def close(self): return self._items parser = etree.XMLParser((target=S()) parser.feed("""