source: publico/colab/trunk/colab/solrutils.py @ 6017

Última Alteração nesse arquivo desde 6017 foi 6017, incluída por seocam, 8 anos atrás

Entrega de numero 5 do Edital ATU-COLAB publicado em 2011

File size: 7.1 KB
Linha 
1#!/usr/bin/env python
2# encoding: utf-8
3
4import math
5import json
6import urllib
7import socket
8import logging
9import httplib
10
11from dateutil.parser import parse as parse_timestamp
12
13from django.conf import settings
14
15from colab.super_archives.models import EmailAddress
16
17
18def build_query(user_query, filters=None):
19    """Build the query that will be sent to Solr"""   
20
21    if not user_query:
22        user_query = '*'
23
24    query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
25    if filters:
26        query = "(%s)" % query
27
28        for (key, value) in filters.items():
29            if value:
30                query += " AND %s:%s" % (key, value)
31   
32    logging.info(query)
33    return query.encode('utf-8')
34
35
36def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
37    """Converts the `modified' and `created' dates from
38    ISO 8601 format to a date time object for the given
39    document.
40   
41    """
42   
43    for date in date_attrs:
44        date_str = doc.get(date)
45        try:
46            date_obj = parse_timestamp(date_str)
47        except ValueError:
48            logging.error('Error trying to parse "%s"', date_str)
49            date_obj = None
50        doc.update({date: date_obj})
51   
52    return doc
53
54
55def get_document_url(doc):
56    """Set the url attribute for a document using the path_string.
57    In case the resource comes from an external domain it will
58    be prepended to this URL.
59
60    """
61    doc_type = doc.get('Type')
62   
63    url = ''
64    if doc_type in ('ticket', 'wiki', 'changeset'):
65        url += settings.SOLR_COLAB_URI
66   
67    url += doc.get('path_string', '')
68    doc.update({'url': url})
69   
70    return doc
71   
72
73def get_document_from_addr(doc):
74    """Get a EmailAddress instance for the given document if
75    its available.
76   
77    """
78   
79    username = doc.get('Creator')
80    from_addresses = EmailAddress.objects.filter(user__username=username)
81    if username and from_addresses:
82        doc.update({'from_address': from_addresses[0]})
83   
84
85def add_attrs_to_doc(doc):
86    """Wraps the call of functions that adds or modifies keys
87    of the giving doc (which should be a dict).
88   
89    """
90    get_document_url(doc)
91    parse_document_timestamps(doc)
92    get_document_from_addr(doc)
93
94
95class SolrPaginator(list):
96   
97    def __init__(self, response_dict, current_page):
98        super(SolrPaginator, self).__init__()
99       
100        responseHeader = response_dict.get('responseHeader', {})
101        response = response_dict.get('response', {})
102        request_params = responseHeader.get('params', {})
103       
104        docs = response.get('docs', [])
105        self.extend(docs)
106
107        self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
108       
109        self.per_page = int(request_params.get('rows', 10))
110        self.numFound = int(response.get('numFound', 0))
111        self.page_num = current_page
112   
113        self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
114       
115        self.has_previous = self.page_num > 1
116        if self.has_previous:
117            self.previous_page_number = self.page_num - 1
118        else:
119            self.previous_page_number = None
120       
121        self.has_next = self.page_num < self.num_of_pages
122        if self.has_next:
123            self.next_page_number = self.page_num + 1       
124        else:
125            self.next_page_number = None
126   
127    @property
128    def last_page(self):
129        return self.num_of_pages
130
131
132def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
133    """Perform a select in a Solr instance using the configuration
134    set in settings.py.
135   
136    """
137   
138    data = {
139        'q': query,
140        'wt': 'json',
141    }
142   
143    # Number of results per page
144    if results_per_page:
145        data.update({'rows': results_per_page})
146       
147        # Page number
148        if page_number:
149            data.update({
150                'start': (page_number - 1) * results_per_page,
151            })
152           
153    # Sort order
154    if sort:
155        data.update({
156            'sort': sort,
157        })
158   
159    # Only select those fields
160    if fields:
161        data.update({
162            'fl': ','.join(fields),
163        })
164    # First version of this was implemented using urllib2 and was
165    #   a milion times easier but unfortunatelly urllib2.urlopen
166    #   does not support http headers. Without setting http headers
167    #   for charset the solr server tries to decode utf-8 params
168    #   as ASCII causing it to crash. HTTPConnection deals with
169    #   encodings automagically.
170    solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,         
171                                       settings.SOLR_PORT)
172    query_params = urllib.urlencode(data)
173    solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
174   
175    # Socks proxy configuration. Only required for development
176    #   if the solr server is behind a firewall.
177    socks_server = getattr(settings, "SOCKS_SERVER", None)
178    if socks_server:
179        import socks
180        logging.debug('Socks enabled: %s:%s', settings.SOCKS_SERVER,
181                                              settings.SOLR_PORT)
182
183        socks.setdefaultproxy(settings.SOCKS_TYPE,
184                              settings.SOCKS_SERVER,
185                              settings.SOCKS_PORT)
186        socket.socket = socks.socksocket
187
188    try:
189        solr_conn.request('GET', solr_select_uri)
190        solr_response = solr_conn.getresponse()
191    except socket.error as err:
192        solr_response = None
193        logging.exception(err)
194
195    if solr_response and solr_response.status == 200:
196        #TODO: Log error connecting to solr
197        solr_json_resp = solr_response.read()
198        solr_dict_resp = json.loads(solr_json_resp)
199    else:
200        solr_dict_resp = {}
201   
202    docs = solr_dict_resp.get('response', {}).get("docs", [])
203
204    if link_attrs:
205        # Loop over all documents adding or linking its information
206        #   with the data from this app or database
207        map(add_attrs_to_doc, docs)
208   
209    return solr_dict_resp
210   
211
212def get_latest_collaborations(number=10, username=None):
213    """Get the n documents recently modified that this username
214    has helped in somehow.
215   
216    """
217   
218    if username:
219        filters = {'collaborator': username}
220    else:
221        filters = None
222   
223    query = build_query('*', filters)
224    solr_response = select(
225        query=query,
226        results_per_page=number,
227        sort='modified desc'
228    )
229   
230    return solr_response.get('response', {}).get('docs', [])
231
232
233def count_types(sample=100, filters=None):
234    """Count the type of the last modifications returning the
235    results in dict.
236   
237    Example: {
238        'wiki' 30,
239        'thread': 40,
240        'ticket', 10,
241        'changeset' 20,
242    }
243   
244    """
245   
246    query = build_query('*', filters)
247    solr_response = select(
248        query=query,
249        results_per_page=sample,
250        sort='modified desc',
251        link_attrs=False,
252    )
253
254    docs = solr_response.get('response', {}).get('docs', [])
255
256    type_count = {}
257    for doc in docs:
258        doc_type = doc.get('Type')
259        count = type_count.get(doc_type, 0) + 1
260        type_count.update({doc_type: count})
261
262    return type_count
263   
264   
Note: Veja TracBrowser para ajuda no uso do navegador do trac.
 

The contents and data of this website are published under license:
Creative Commons 4.0 Brasil - Atribuir Fonte - Compartilhar Igual.