diff --git a/.gitignore b/.gitignore index de8e61a..e92fd7c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules python2.7 2.7 *.pyc +logs/* diff --git a/ads/__init__.pyc b/ads/__init__.pyc deleted file mode 100644 index a25ef20..0000000 Binary files a/ads/__init__.pyc and /dev/null differ diff --git a/ads/migrations/0003_auto_20151029_1429.py b/ads/migrations/0003_auto_20151029_1429.py new file mode 100644 index 0000000..2835982 --- /dev/null +++ b/ads/migrations/0003_auto_20151029_1429.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('ads', '0002_ads_link'), + ] + + operations = [ + migrations.AlterField( + model_name='ads', + name='link', + field=models.TextField(default=b'', null=True), + preserve_default=True, + ), + ] diff --git a/ads/migrations/0004_ads_placeholders.py b/ads/migrations/0004_ads_placeholders.py new file mode 100644 index 0000000..51aa70b --- /dev/null +++ b/ads/migrations/0004_ads_placeholders.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations +import multiselectfield.db.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('ads', '0003_auto_20151029_1429'), + ] + + operations = [ + migrations.AddField( + model_name='ads', + name='placeholders', + field=multiselectfield.db.fields.MultiSelectField(blank=True, max_length=165, null=True, choices=[(b'actu-top', b'Page Actu, haut de page'), (b'actu-side', b'Page Actu, colonne droite'), (b'actu-bottom', b'Page Actu, base de page'), (b'results-top', b'Pages R\xc3\xa9sultats, haut de page'), (b'results-side', b'Pages R\xc3\xa9sultats, colonne droite'), (b'results-bottom', b'Pages R\xc3\xa9sultats, base de page'), (b'blog-top', b'Pages Blog, haut de page'), (b'blog-side', b'Pages Blog, colonne droite'), (b'blog-bottom', b'Pages Blog, base de page'), (b'shop-top', b'Page Angulation, haut de page'), (b'shop-bottom', b'Page Angulation, base de page'), (b'sponsors-top', b'Page Mentors & Sponsors, haut de page'), (b'sponsors-side', b'Page Mentors & Sponsors, colonne droite'), (b'sponsors-bottom', b'Page Mentors & Sponsors, base de page')]), + preserve_default=True, + ), + ] diff --git a/ads/models.py b/ads/models.py index 49b865e..5a275c9 100644 --- a/ads/models.py +++ b/ads/models.py @@ -1,6 +1,38 @@ from django.db import models +from django.conf import settings +from multiselectfield import MultiSelectField -# Create your models here. + +class AdsManager(models.Manager): + def get_queryset(self): + return super(AdsManager, self).get_queryset() + + def by_placeholder(self, placeholder): + return super(AdsManager, self).get_queryset().filter( + placeholders__contains=placeholder + ) + + def by_category(self, category): + + if category == 'square': + qs = super(AdsManager, self).get_queryset().filter(square=1) + elif category == 'horizontal': + qs = super(AdsManager, self).get_queryset().filter(horizontal=1) + elif category == 'vertical': + qs = super(AdsManager, self).get_queryset().filter(vertical=1) + else: + qs = super(AdsManager, self).get_queryset() + + return qs + + def by_placeholder_and_category(self, placeholder, category): + criteria = { + 'placeholders__contains': placeholder, + category: 1, + } + qs = super(AdsManager, self).get_queryset().filter(**criteria) + + return qs class Ads(models.Model): @@ -8,6 +40,11 @@ class Ads(models.Model): url = models.TextField(null=True) secureUrl = models.TextField(null=True) link = models.TextField(null=True, default='') + placeholders = MultiSelectField( + choices=settings.ADS_PLACEHOLDERS, + null=True, + blank=True, + ) horizontal = models.PositiveSmallIntegerField(null=True) vertical = models.PositiveSmallIntegerField(null=True) square = models.PositiveSmallIntegerField(null=True) @@ -16,5 +53,14 @@ class Ads(models.Model): auto_now=False ) + objects = AdsManager() + def __unicode__(self): return u'%s' % self.name + + +class Placeholder(object): + + def __init__(self, code, label): + self.code = code + self.label = label diff --git a/ads/models.pyc b/ads/models.pyc deleted file mode 100644 index 18c2b2d..0000000 Binary files a/ads/models.pyc and /dev/null differ diff --git a/ads/urls.pyc b/ads/urls.pyc deleted file mode 100644 index b77a591..0000000 Binary files a/ads/urls.pyc and /dev/null differ diff --git a/apiv1/serializers.py b/apiv1/serializers.py index c7b4c28..4d48bb1 100644 --- a/apiv1/serializers.py +++ b/apiv1/serializers.py @@ -23,10 +23,15 @@ class AdsSerializer(serializers.ModelSerializer): class Meta: model = Ads - fields = ('id', 'name', 'link', 'url', 'secureUrl', + fields = ('id', 'name', 'link', 'url', 'secureUrl', 'placeholders', 'horizontal', 'vertical', 'square', 'date', ) +class AdsPlaceholdersSerializer(serializers.Serializer): + code = serializers.CharField(max_length=30) + label = serializers.CharField(max_length=100) + + class BloggersSerializer(serializers.ModelSerializer): class Meta: diff --git a/apiv1/urls.py b/apiv1/urls.py index 076040d..0a42129 100644 --- a/apiv1/urls.py +++ b/apiv1/urls.py @@ -46,6 +46,10 @@ view=apiv1.views.AdsCreateReadView.as_view(), name='REST View'), + url(regex=r'^ads/placeholders/$', + view=apiv1.views.AdsPlaceholdersReadView.as_view(), + name='REST View'), + url(regex=r'^ads/(?P[-\w]+)/$', view=apiv1.views.AdsReadUpdateDeleteView.as_view(), name='REST View'), diff --git a/apiv1/views.py b/apiv1/views.py index 677e15f..ed93de4 100644 --- a/apiv1/views.py +++ b/apiv1/views.py @@ -4,13 +4,13 @@ RetrieveUpdateDestroyAPIView, ) from rest_framework.permissions import ( - IsAuthenticated, IsAuthenticatedOrReadOnly ) from apiv1.serializers import ( NewsSerializer, AdsSerializer, + AdsPlaceholdersSerializer, BloggersSerializer, BlogPostsSerializer, SkiclubsSerializer, @@ -22,9 +22,9 @@ from django.utils import timezone from django.core.cache import cache - +from django.conf import settings from news.models import News -from ads.models import Ads +from ads.models import Ads, Placeholder from skiclubs.models import Skiclubs from pages.models import Pages from rankings.models import Races @@ -43,7 +43,7 @@ class NewsCreateReadView(ListCreateAPIView): def get_queryset(self): now = timezone.now() - return News.objects.filter(date__lte=now).order_by('date').reverse() + return News.objects.filter(date__lte=now).exclude(mag=1).order_by('date').reverse() class MagCreateReadView(NewsCreateReadView): @@ -87,15 +87,38 @@ class AdsCreateReadView(ListCreateAPIView): permission_classes = (IsAuthenticatedOrReadOnly, ) def get_queryset(self): - if self.request.GET.get('category'): - cat = self.request.GET.get('category') - if cat == 'square': - return Ads.objects.filter(square=1) - if cat == 'horizontal': - return Ads.objects.filter(horizontal=1) - if cat == 'vertical': - return Ads.objects.filter(vertical=1) - return Ads.objects.all() + ads = [] + placeholder = self.request.GET.get('placeholder', '') + ads_placeholders = getattr(settings, 'ADS_PLACEHOLDERS', None) + + if placeholder == '' or placeholder in dict(ads_placeholders).keys(): + category = self.request.GET.get('category', None) + + if category is not None: + ads = Ads.objects.by_placeholder_and_category( + placeholder, + category + ) + else: + ads = Ads.objects.all() + + return ads + + +class AdsPlaceholdersReadView(ListAPIView): + serializer_class = AdsPlaceholdersSerializer + permission_classes = (IsAuthenticatedOrReadOnly, ) + + def get_queryset(self): + placeholders = getattr(settings, 'ADS_PLACEHOLDERS', None) + + if placeholders is not None: + return [ + Placeholder(code=code, label=label) + for code, label in placeholders + ] + else: + return [] class AdsReadUpdateDeleteView(RetrieveUpdateDestroyAPIView): diff --git a/blogs/__init__.pyc b/blogs/__init__.pyc deleted file mode 100644 index 1ab5610..0000000 Binary files a/blogs/__init__.pyc and /dev/null differ diff --git a/blogs/models.pyc b/blogs/models.pyc deleted file mode 100644 index 0d6c69d..0000000 Binary files a/blogs/models.pyc and /dev/null differ diff --git a/blogs/urls.pyc b/blogs/urls.pyc deleted file mode 100644 index 251746f..0000000 Binary files a/blogs/urls.pyc and /dev/null differ diff --git a/blogs/views.pyc b/blogs/views.pyc deleted file mode 100644 index 1ae51ca..0000000 Binary files a/blogs/views.pyc and /dev/null differ diff --git a/db.sqlite b/db.sqlite index 6587cdc..485a758 100644 Binary files a/db.sqlite and b/db.sqlite differ diff --git a/logs/.keep b/logs/.keep new file mode 100644 index 0000000..e69de29 diff --git a/news/__init__.pyc b/news/__init__.pyc deleted file mode 100644 index fd80be8..0000000 Binary files a/news/__init__.pyc and /dev/null differ diff --git a/news/models.pyc b/news/models.pyc deleted file mode 100644 index ece4666..0000000 Binary files a/news/models.pyc and /dev/null differ diff --git a/news/urls.pyc b/news/urls.pyc deleted file mode 100644 index 23790d4..0000000 Binary files a/news/urls.pyc and /dev/null differ diff --git a/news/views.pyc b/news/views.pyc deleted file mode 100644 index 8b26f52..0000000 Binary files a/news/views.pyc and /dev/null differ diff --git a/pages/__init__.pyc b/pages/__init__.pyc deleted file mode 100644 index e6a25c7..0000000 Binary files a/pages/__init__.pyc and /dev/null differ diff --git a/pages/models.pyc b/pages/models.pyc deleted file mode 100644 index a4abb04..0000000 Binary files a/pages/models.pyc and /dev/null differ diff --git a/pages/urls.pyc b/pages/urls.pyc deleted file mode 100644 index 0cc8c80..0000000 Binary files a/pages/urls.pyc and /dev/null differ diff --git a/pages/views.pyc b/pages/views.pyc deleted file mode 100644 index d6a417a..0000000 Binary files a/pages/views.pyc and /dev/null differ diff --git a/python_core/__init__.pyc b/python_core/__init__.pyc deleted file mode 100644 index 060c919..0000000 Binary files a/python_core/__init__.pyc and /dev/null differ diff --git a/python_core/settings.py b/python_core/settings.py index adc45a7..8dbe15b 100644 --- a/python_core/settings.py +++ b/python_core/settings.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Django settings for python_core project. @@ -61,6 +62,7 @@ 'skiclubs', 'widgets', 'angulation', + 'multiselectfield', ) MIDDLEWARE_CLASSES = ( @@ -142,3 +144,65 @@ # https://docs.djangoproject.com/en/1.6/howto/static-files/ STATIC_URL = '/static/' + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s' + }, + 'simple': { + 'format': '%(levelname)s %(message)s' + }, + 'spider': { + 'format': '%(asctime)s - %(levelname)s - %(message)s' + } + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'simple' + }, + 'spider': { + 'level': 'DEBUG', + 'class': 'logging.handlers.TimedRotatingFileHandler', + 'filename': os.path.join(BASE_DIR, 'logs', 'spider'), + 'when': 'midnight', + 'formatter': 'spider', + 'backupCount': 5, + }, + }, + 'loggers': { + 'dev': { + 'handlers': ['console'], + 'level': 'DEBUG', + 'propagate': True, + }, + 'spider': { + 'handlers': ['console', 'spider'], + 'level': 'DEBUG', + 'propagate': True, + }, + }, +} + + +ADS_PLACEHOLDERS = ( + ('sides', "Toutes les pages, de chaque côté"), + ('actu-top', "Page Actu, haut de page"), + ('actu-side', "Page Actu, colonne droite"), + ('actu-bottom', "Page Actu, base de page"), + ('results-top', "Pages Résultats, haut de page"), + ('results-side', "Pages Résultats, colonne droite"), + ('results-bottom', "Pages Résultats, base de page"), + ('blog-top', "Pages Blog, haut de page"), + ('blog-side', "Pages Blog, colonne droite"), + ('blog-bottom', "Pages Blog, base de page"), + ('shop-top', "Page Angulation, haut de page"), + ('shop-bottom', "Page Angulation, base de page"), + ('sponsors-top', "Page Mentors & Sponsors, haut de page"), + ('sponsors-side', "Page Mentors & Sponsors, colonne droite"), + ('sponsors-bottom', "Page Mentors & Sponsors, base de page"), +) diff --git a/python_core/settings.pyc b/python_core/settings.pyc deleted file mode 100644 index 393a5d4..0000000 Binary files a/python_core/settings.pyc and /dev/null differ diff --git a/python_core/settings_server.py b/python_core/settings_server.py index 6a7cc27..57617b8 100644 --- a/python_core/settings_server.py +++ b/python_core/settings_server.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Note: Make sure that wsgi.py is referencing this file, in case of error 400. @@ -144,3 +145,55 @@ # https://docs.djangoproject.com/en/1.6/howto/static-files/ STATIC_URL = '/static/' + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s' + }, + 'simple': { + 'format': '%(levelname)s %(message)s' + }, + 'spider': { + 'format': '%(asctime)s - %(levelname)s - %(message)s' + } + }, + 'handlers': { + 'spider': { + 'level': 'INFO', + 'class': 'logging.handlers.TimedRotatingFileHandler', + 'filename': os.path.join(BASE_DIR, 'logs', 'spider'), + 'when': 'midnight', + 'formatter': 'spider', + 'backupCount': 15, + }, + }, + 'loggers': { + 'spider': { + 'handlers': ['spider'], + 'level': 'INFO', + 'propagate': True, + }, + }, +} + + +ADS_PLACEHOLDERS = ( + ('sides', "Toutes les pages, de chaque côté"), + ('actu-top', "Page Actu, haut de page"), + ('actu-side', "Page Actu, colonne droite"), + ('actu-bottom', "Page Actu, base de page"), + ('results-top', "Pages Résultats, haut de page"), + ('results-side', "Pages Résultats, colonne droite"), + ('results-bottom', "Pages Résultats, base de page"), + ('blog-top', "Pages Blog, haut de page"), + ('blog-side', "Pages Blog, colonne droite"), + ('blog-bottom', "Pages Blog, base de page"), + ('shop-top', "Page Angulation, haut de page"), + ('shop-bottom', "Page Angulation, base de page"), + ('sponsors-top', "Page Mentors & Sponsors, haut de page"), + ('sponsors-side', "Page Mentors & Sponsors, colonne droite"), + ('sponsors-bottom', "Page Mentors & Sponsors, base de page"), +) diff --git a/python_core/urls.pyc b/python_core/urls.pyc deleted file mode 100644 index 1d49895..0000000 Binary files a/python_core/urls.pyc and /dev/null differ diff --git a/python_core/wsgi.pyc b/python_core/wsgi.pyc deleted file mode 100644 index 635cfa3..0000000 Binary files a/python_core/wsgi.pyc and /dev/null differ diff --git a/rankings/__init__.pyc b/rankings/__init__.pyc deleted file mode 100644 index 5c306d9..0000000 Binary files a/rankings/__init__.pyc and /dev/null differ diff --git a/rankings/fis/fis/__init__.pyc b/rankings/fis/fis/__init__.pyc deleted file mode 100644 index 380de96..0000000 Binary files a/rankings/fis/fis/__init__.pyc and /dev/null differ diff --git a/rankings/fis/fis/items.py b/rankings/fis/fis/items.py index 43f20a2..c9a4805 100644 --- a/rankings/fis/fis/items.py +++ b/rankings/fis/fis/items.py @@ -17,6 +17,19 @@ class FisRaces(Item): discipline = Field() table = Field() + # def __repr__(self): + # """only print out few data after exiting the Pipeline""" + # return repr({ + # "id": self["id"], + # "category": self["category"], + # "date": self["date"], + # "discipline": self["discipline"], + # "genre": self["genre"], + # "info": self["info"], + # "location": self["location"], + # "link": self["link"], + # }) + class FisRanking(Item): id = Field() diff --git a/rankings/fis/fis/items.pyc b/rankings/fis/fis/items.pyc deleted file mode 100644 index 6b43676..0000000 Binary files a/rankings/fis/fis/items.pyc and /dev/null differ diff --git a/rankings/fis/fis/pipelines.py b/rankings/fis/fis/pipelines.py index 41f1d1f..804f0ab 100644 --- a/rankings/fis/fis/pipelines.py +++ b/rankings/fis/fis/pipelines.py @@ -6,6 +6,9 @@ import os import sys from time import strptime, mktime +from rankings.models import Races + + CURRENT_DIR = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) SCRAPY_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir)) MODULE_DIR = os.path.abspath(os.path.join(SCRAPY_DIR, os.pardir)) @@ -20,8 +23,6 @@ # os.environ['DJANGO_SETTINGS_MODULE'] = 'python_core.settings' os.environ['DJANGO_SETTINGS_MODULE'] = 'python_core.settings_server' -from rankings.models import Races - class FisPipeline(object): @@ -30,7 +31,12 @@ def process_item(self, item, spider): return item # Here we'll register the current item in the database: - race, created = Races.objects.get_or_create(raceId=int(item['id'])) + try: + race = Races.objects.get(raceId=int(item['id'])) + except Races.DoesNotExist: + race = Races() + + race.raceId = int(item['id']) race.info = item['info'].strip() race.category = item['category'].strip() race.genre = item['genre'].strip() @@ -38,7 +44,10 @@ def process_item(self, item, spider): race.location = item['location'].strip() race.discipline = item['discipline'].strip() race.raceId = item['id'] - race.table = item['table'].strip() + race.table = item['table'] race.date = mktime(strptime(item['date'].strip(), '%d.%m.%Y')) - race.save() + try: + race.save() + except: + import pdb; pdb.set_trace() return item diff --git a/rankings/fis/fis/pipelines.pyc b/rankings/fis/fis/pipelines.pyc deleted file mode 100644 index a1bdfda..0000000 Binary files a/rankings/fis/fis/pipelines.pyc and /dev/null differ diff --git a/rankings/fis/fis/settings.py b/rankings/fis/fis/settings.py index 79749ab..f785eef 100644 --- a/rankings/fis/fis/settings.py +++ b/rankings/fis/fis/settings.py @@ -37,3 +37,5 @@ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", ] + +LOG_LEVEL = 'ERROR' diff --git a/rankings/fis/fis/settings.pyc b/rankings/fis/fis/settings.pyc deleted file mode 100644 index 086198d..0000000 Binary files a/rankings/fis/fis/settings.pyc and /dev/null differ diff --git a/rankings/fis/fis/spiders/__init__.pyc b/rankings/fis/fis/spiders/__init__.pyc deleted file mode 100644 index 7a29eeb..0000000 Binary files a/rankings/fis/fis/spiders/__init__.pyc and /dev/null differ diff --git a/rankings/fis/fis/spiders/races.py b/rankings/fis/fis/spiders/races.py index d0e744d..1642ff1 100644 --- a/rankings/fis/fis/spiders/races.py +++ b/rankings/fis/fis/spiders/races.py @@ -24,14 +24,18 @@ # Other imports from scrapy.selector import Selector -from scrapy.spider import BaseSpider +from scrapy.spiders import Spider from scrapy.http import Request from fis.items import FisRaces from rankings.models import Races +import logging -class MyCrawlerSpider(BaseSpider): +logger = logging.getLogger('spider') + + +class MyCrawlerSpider(Spider): # nom du crawler à spécifier lors de l'exécution name = 'races' @@ -45,10 +49,12 @@ class MyCrawlerSpider(BaseSpider): jump = 3000 def start_requests(self): - for i in xrange(self.max_newsid, self.max_newsid + self.jump): + for i in xrange(self.max_newsid - self.jump, self.max_newsid + self.jump): yield Request( - 'http://data.fis-ski.com/dynamic/results.html?sector=AL&raceid=%d' % i, - callback=self.parse_item) + 'http://data.fis-ski.com/dynamic/' + 'results.html?sector=AL&raceid=%d' % i, + callback=self.parse_item + ) def parse_item(self, response): hxs = Selector(response) @@ -56,13 +62,27 @@ def parse_item(self, response): url = response.url item['link'] = url item['id'] = url[url.index('raceid=') + 7:] - item['date'] = hxs.xpath( - '//div[contains(@class, "padding-content")]/h3/span/text()').extract()[0].strip() - item['location'] = hxs.xpath( - '//div[contains(@class, "padding-content")]/h3/a/text()').extract()[0].strip() - info = hxs.xpath( - '//div[contains(@class, "padding-content")]/div/div/h4/text()').extract()[0].strip() + try: + item['date'] = hxs.xpath( + '//div[contains(@class, "padding-content")]' + '/h3/span/text()' + ).extract()[0].strip() + + item['location'] = hxs.xpath( + '//div[contains(@class, "padding-content")]' + '/h3/a/text()' + ).extract()[0].strip() + + info = hxs.xpath( + '//div[contains(@class, "padding-content")]' + '/div/div/h4/text()' + ).extract()[0].strip() + + except IndexError: + logger.warning('No data to parse on race #%s' % item['id']) + return None + item['info'] = info item['genre'] = 'H' if 'Men' in info else 'F' @@ -95,9 +115,15 @@ def parse_item(self, response): item['discipline'] = 'Other' tables = hxs.xpath( - '//table[contains(@class, "footable table-datas table-withpadding")]') + '//table[contains(@class, "footable table-datas ' + 'table-withpadding")]' + ) + place = (len(tables) - 1) item['table'] = tables.extract()[place].strip() + if 'No results available' in item['table']: + logger.warning('No result available for race #%s' % item['id']) return None + return item diff --git a/rankings/fis/fis/spiders/races.pyc b/rankings/fis/fis/spiders/races.pyc deleted file mode 100644 index 9498c95..0000000 Binary files a/rankings/fis/fis/spiders/races.pyc and /dev/null differ diff --git a/rankings/fis/fis/spiders/ranking.py b/rankings/fis/fis/spiders/ranking.py index 82d4782..d911810 100644 --- a/rankings/fis/fis/spiders/ranking.py +++ b/rankings/fis/fis/spiders/ranking.py @@ -1,11 +1,15 @@ -# -*- coding: utf-8 -*- + # -*- coding: utf-8 -*- import urlparse from scrapy.selector import Selector -from scrapy.spider import BaseSpider +from scrapy.spiders import Spider from scrapy.http import Request from fis.items import FisRanking +import logging + + +logger = logging.getLogger('spider') def board_section(url): @@ -17,7 +21,7 @@ def board_section(url): return path_sections[-1] -class RankingSpider(BaseSpider): +class RankingSpider(Spider): # nom du crawler à spécifier lors de l'exécution name = 'ranking' @@ -41,6 +45,7 @@ def start_requests(self): 'http://www.fis-ski.com/alpine-skiing/leader-board/%s' % path, callback=self.parse_item ) + logger.warning(path) def parse_item(self, response): hxs = Selector(response) @@ -58,20 +63,29 @@ def parse_item(self, response): women = [] men_flag = False previous_row = 0 + for row in rows: name = row.xpath( - 'td/div/a/span[contains(@class, "dcm-athName")]/text()').extract() + 'td/div/a/span[contains(@class, "dcm-athName")]/text()' + ).extract() + country = row.xpath( - 'td/div/div[contains(@class, "dcm-noc")]/text()').extract() + 'td/div/div[contains(@class, "dcm-noc")]/text()' + ).extract() + row = row.xpath('td/text()').extract() + if row and int(row[0]) <= 25: place = row[0] score = row[2] name = name[0] country = country[0] + if int(row[0]) < previous_row: men_flag = True + previous_row = int(row[0]) + if men_flag: men.append([place, name, country, score]) else: diff --git a/rankings/fis/fis/spiders/ranking.pyc b/rankings/fis/fis/spiders/ranking.pyc deleted file mode 100644 index 1357301..0000000 Binary files a/rankings/fis/fis/spiders/ranking.pyc and /dev/null differ diff --git a/rankings/management/commands/updateraces.py b/rankings/management/commands/updateraces.py index df489a6..3a3202f 100644 --- a/rankings/management/commands/updateraces.py +++ b/rankings/management/commands/updateraces.py @@ -4,22 +4,20 @@ from django.core.management.base import BaseCommand, CommandError -# CURRENT_DIR = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) -# BACKEND_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir)) -# APP_DIR = os.path.abspath(os.path.join(BACKEND_DIR, os.pardir)) -# WEBAPPS_DIR = os.path.abspath(os.path.join(APP_DIR, os.pardir)) -# For production: -CURRENT_DIR = '/home/tooski/webapps/python_core/python_core/rankings/' -BACKEND_DIR = '/home/tooski/webapps/python_core/python_core/' -APP_DIR = '/home/tooski/webapps/python_core/' -WEBAPPS_DIR = '/home/tooski/webapps/' +WEBAPPS_DIR = os.path.abspath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + '..', '..', '..', '..' + ) +) +RANKINGS_DIR = os.path.abspath( + os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'fis') +) +WEBSITE_DIR = os.path.join(WEBAPPS_DIR, 'website') -# For dev: -# CURRENT_DIR = '/home/seba-1511/Dropbox/Dev/tooski/python_core/rankings/' -# BACKEND_DIR = '/home/seba-1511/Dropbox/Dev/tooski/python_core/' -# APP_DIR = '/home/seba-1511/Dropbox/Dev/tooski/' -# WEBAPPS_DIR = '/home/seba-1511/Dropbox/Dev/tooski/' +FIS_RANKING_PATH = os.path.join(RANKINGS_DIR, 'ranking.json') +WEBSITE_RANKING_PATH = os.path.join(WEBSITE_DIR, 'ranking.json') class Command(BaseCommand): @@ -27,17 +25,17 @@ class Command(BaseCommand): help = 'Updates the table to the latest races, directly scrapped from the FIS website.' def handle(self, *args, **options): - os.system('rm ' + WEBAPPS_DIR + 'website/ranking.json') - os.system('rm ' + CURRENT_DIR + 'fis/ranking.json') -# We get the leaderboard rankings and move them to the Apache server: - os.system('cd ' + CURRENT_DIR + - '/fis/ && scrapy crawl ranking -o ranking.json -t json') - # Testing: - shutil.copy(CURRENT_DIR + 'fis/ranking.json', - WEBAPPS_DIR + 'website/ranking.json') - # Server - # shutil.copy(CURRENT_DIR + '/fis/ranking.json', - # WEBAPPS_DIR + '/website/ranking.json') - - # We should use the pipeline system of scrapy with the races. - os.system('cd ' + CURRENT_DIR + '/fis/ && scrapy crawl races') + try: + os.remove(WEBSITE_RANKING_PATH) + os.remove(FIS_RANKING_PATH) + except OSError: + pass + + # We get the leaderboard rankings and move them to the Apache server: + os.chdir(RANKINGS_DIR) + os.system('scrapy crawl ranking -o ranking.json -t json') + try: + shutil.copy(FIS_RANKING_PATH, WEBSITE_RANKING_PATH) + except IOError: + print 'File ranking.json is missing' + os.system('scrapy crawl races') diff --git a/rankings/migrations/0003_auto_20151029_1429.py b/rankings/migrations/0003_auto_20151029_1429.py new file mode 100644 index 0000000..24c8fa4 --- /dev/null +++ b/rankings/migrations/0003_auto_20151029_1429.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('rankings', '0002_auto_20150109_1724'), + ] + + operations = [ + migrations.AlterField( + model_name='races', + name='table', + field=models.TextField(null=True, blank=True), + preserve_default=True, + ), + ] diff --git a/rankings/models.pyc b/rankings/models.pyc deleted file mode 100644 index 09e270b..0000000 Binary files a/rankings/models.pyc and /dev/null differ diff --git a/req.txt b/req.txt index baccf07..01a7766 100644 --- a/req.txt +++ b/req.txt @@ -5,3 +5,4 @@ ujson==1.33 cloudinary==1.0.17 Scrapy==1.0.3 service-identity==14.0.0 +django-multiselectfield==0.1.3 diff --git a/users/__init__.pyc b/users/__init__.pyc deleted file mode 100644 index e08c758..0000000 Binary files a/users/__init__.pyc and /dev/null differ diff --git a/users/models.pyc b/users/models.pyc deleted file mode 100644 index 1a6976e..0000000 Binary files a/users/models.pyc and /dev/null differ diff --git a/users/urls.pyc b/users/urls.pyc deleted file mode 100644 index 811b60e..0000000 Binary files a/users/urls.pyc and /dev/null differ diff --git a/users/views.pyc b/users/views.pyc deleted file mode 100644 index 61e626d..0000000 Binary files a/users/views.pyc and /dev/null differ