fasteign/fasteign.py

274 lines
8.1 KiB
Python

#coding: utf8
import argparse
import json
import pprint
import sys
import re
import decimal
from datetime import datetime
import requests
from lxml import html
from sudoisbot import sendmsg
TEMPLATE = """
{name}
{price_short} mkr. || {size}
{extra}
{url}
"""
class Flat(object):
def __init__(self, *args, **kwargs):
self.size = kwargs.get('size')
self.price = kwargs.get('price')
self.name = kwargs.get('name').encode('utf-8')
self.flatid = kwargs.get('flatid')
self.url = "http://www.mbl.is/fasteignir/fasteign/{}/".format(self.flatid)
self.strings = kwargs.get('strings')
# self.timestamp was added relatively recently
self.timestamp = kwargs.get('timestamp', None)
self.img = kwargs.get('img', [])
self.date = kwargs.get('date')
# Now removed but may be relevant if i have to dig into data
# self.price_str
# NOTE: self.size_str never existed so don't go looking for it
def __iter__(self):
return self.__dict__
def __repr__(self):
return "<Flat {}>".format(self.name)
def is_like_mine(self):
return 90.0 <= self.size <= 92.0
def price_per_sqm(self):
return float(self.price) / float(self.size)
def price_short(self):
try:
return decimal.Decimal(self.price) / 1000000
except decimal.InvalidOperation:
# Happens with it's a string (saying Tilbod)
return ""
def template(self):
ps = self.price_short()
if self.is_like_mine():
extra = "(like mine)"
else:
extra = ""
return TEMPLATE.format(price_short=ps, extra=extra, **vars(self))
def short_template(self):
ps = self.price_short()
#ts = self.timestamp.split("T")[0] # lol
ds = self.date
return "{}: {} mkr".format(ds, ps)
def send_notification(self, send_imgs=True):
print self.template()
if self.img and send_imgs:
if args.printall:
print "Sending {} images..".format(len(self.img))
sendmsg.send_to_me("", img=self.img)
# summary on the bottom
sendmsg.send_to_me(self.template())
def parse_flat_pics(flatid):
"""Makes a request to mbl.is and parses the pictures for
the flat.
"""
url = "http://www.mbl.is/fasteignir/fasteign/{}/photos/".format(flatid)
res = requests.get(url)
if not res.status_code == 200:
res.raise_for_status()
tree = html.fromstring(res.text)
xpics = '//div[@class="realestate_photos"]/a/img/@src'
ret = list(tree.xpath(xpics))
return ret
def parse_flat(flatid):
"""Makes a request to mbl.is and parses the flat info
"""
url = "http://www.mbl.is/fasteignir/fasteign/{}/".format(flatid)
res = requests.get(url)
if not res.status_code == 200:
res.raise_for_status()
tree = html.fromstring(res.text)
xentry = ('//*[@id="realestate-infobox-description"]'
'/div[1]/table/tbody/tr[12]/td[2]')
xentry = tree.xpath(xentry)[0]
xtype = ('//*[@id="realestate-infobox-description"]'
'/div[1]/table/tbody/tr[5]/td[2]')
xtype = tree.xpath(xtype)[0]
xprice = ('//*[@id="realestate-infobox-description"]'
'/div[1]/table/tbody/tr[1]/td[2]')
price = tree.xpath(xprice)[0].text.strip()
xsize = ('//*[@id="realestate-infobox-description"]'
'/div[1]/table/tbody/tr[7]/td[2]')
xsize = tree.xpath(xsize)[0]
xname = ('//*[@id="fs-canvas"]/section/div[1]'
'/div/div[1]/span[1]/strong')
xname = tree.xpath(xname)[0]
img = parse_flat_pics(flatid)
d = {
'name': xname.text.strip(),
'size': size_from_string(xsize.text),
'price': price_from_string(price),
'flatid': flatid,
'strings': {'price': price, 'size': xsize.text},
'type': xtype.text.strip(),
'img': img,
'timestamp': datetime.now().isoformat()
}
return Flat(**d)
def price_from_string(price):
if price.strip().startswith("Tilb"):
return "Tilbod"
return int(
"".join([a for a in price.strip() if a.isdigit()]))
def size_from_string(size):
hits = re.findall(r"\d+\.\d+", size)
assert len(hits) == 1, "regex failed for '{}'".format(size)
return float(hits[0])
class MblFasteign(object):
def __init__(self, filename, printall=False):
self.filename = filename
self.printall = printall
self.existing = self.read_json()
self.existing_flats = [Flat(**e[1]) for e in self.existing.items()]
def last_flats_like_mine(self, count=3):
ef = sorted(self.existing_flats, key=lambda a: a.date)
like_mine = [a for a in ef if a.is_like_mine()]
if count:
return like_mine[-count:]
else:
return like_mine
def send_summary(self, count=3):
last = self.last_flats_like_mine(count)
if not last:
print "I don't know about any flats"
return
summary = "\n".join([a.short_template() for a in last])
if self.printall:
print summary
sendmsg.send_to_me(summary)
def search(self, searchurl):
res = requests.get(searchurl)
if not res.status_code == 200:
res.raise_for_status()
tree = html.fromstring(res.text)
try:
resultlist = tree.xpath('//*[@id="resultlist"]')[0]
except IndexError:
raise ValueError("Empty resultlist")
prefix = len("realeastate-result-")-1
return [a.get("id")[prefix:] for a in resultlist]
def update(self):
pass
def read_json(self):
try:
with open(self.filename, "r") as f:
return json.loads(f.read().decode('utf-8'))
except IOError as e:
if e.errno == 2:
print "New file: {}".format(self.filename)
return dict()
else:
raise
def parse_new_flats(self, searchurl):
start_count = len(self.existing)
flatids = self.search(searchurl)
for flatid in flatids:
if flatid not in self.existing:
# Just parse (send a request to mbl) new finds
#
# NOTE: this means we do not observe changes in price listings
# which would be interesting.
flat = parse_flat(flatid)
send_imgs = flat.is_like_mine() or self.printall
flat.send_notification(send_imgs)
self.existing[flatid] = flat.__dict__
elif self.printall:
flat = Flat(**self.existing[flatid])
if flat.is_like_mine():
flat.send_notification()
return len(self.existing) != start_count
def write_json(self):
with open(self.filename, "w") as f:
f.write(json.dumps(self.existing, indent=4))
if self.printall:
print "Saved json: {}".format(self.filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--filename", required=True, type=str)
parser.add_argument("--search", default="breidholt", type=str)
parser.add_argument("--printall", action="store_true")
parser.add_argument("--summary", action="store_true")
parser.add_argument("--summary-count", type=int, default=3)
args = parser.parse_args()
searches = {
'breidholt': "http://www.mbl.is/fasteignir/leit/?q=e09ddca032a239798b5f3c4ac91beb50",
'test': "http://www.mbl.is/fasteignir/leit/?q=80f323c5382397611e72800316f250d1"
}
try:
f = MblFasteign(args.filename, printall=args.printall)
if args.summary:
f.send_summary(args.summary_count)
sys.exit(0)
if args.printall:
print "Looking up flats from search results.."
newflats = f.parse_new_flats(searches[args.search])
if newflats or args.printall:
f.send_summary()
except Exception as e:
sendmsg.send_to_me("fasteign.py: {}".format(e))
raise
finally:
f.write_json()