Package mobi :: Package mtld :: Package da :: Package carrier :: Module bucket_handler
[hide private]
[frames] | no frames]

Source Code for Module mobi.mtld.da.carrier.bucket_handler

  1  from mobi.mtld.da.carrier.bucket_type import BucketType 
  2  from mobi.mtld.da.carrier.byte_reader import ByteReader 
  3  from mobi.mtld.da.carrier.carrier_data_type import CarrierDataType 
  4  from mobi.mtld.da.exception.data_file_exception import DataFileException 
  5  from mobi.mtld.da.data_type import DataType 
  6  from mobi.mtld.da.property import Property 
  7  from mobi.mtld.da.property_name import PropertyName 
  8   
  9  from binascii import crc32 
 10   
11 -class BucketHandler(object):
12 _NO_VALUE = -1 13 _NO_CONTAINER = 0 14 _ORDER_SET_CONTAINER = 1 15 CRC32_DOES_NOT_MATCH = 'CRC-32 does not match for bucket "%s".' 16 _property_names = None 17 _propertyStringNames = None 18 _propertyValues = None 19 _properties = None 20 _treeLefts = None 21 _treeRights = None 22 _treeProperties = None 23
24 - def __init__(self):
25 self._property_names = None 26 self._propertyStringNames = None 27 self._propertyValues = None 28 self._properties = None 29 self._treeLefts = None 30 self._treeRights = None 31 self._treeProperties = None
32
33 - def _needsBuckets(self):
34 """ 35 Checks if all necearry buckets have been supplied and processed 36 """ 37 if(self._property_names != None and self._propertyValues != None and 38 self._properties != None and self._treeLefts != None): 39 return False 40 41 return True
42
43 - def getTreeLefts(self):
44 """ 45 Returns the Radix Trie "left" pointers 46 """ 47 return self._treeLefts
48
49 - def getTreeRights(self):
50 """ 51 Returns the Radix Trie "right" pointers 52 """ 53 return self._treeRights
54
55 - def getTreeProperties(self):
56 """ 57 Returns the properties used in the Radix Trie nodes 58 """ 59 return self._treeProperties
60
61 - def getPropertyNames(self):
62 """ 63 Returns the property names array 64 """ 65 return self._property_names
66
68 """ 69 Returns the property names array 70 """ 71 return self._propertyStringNames
72
73 - def processBucket(self, bucketId, fileCrc32, bucketData):
74 """ 75 Process a bucket identified by bucketId. The bucket CRC-32 76 hash is verified before parsing the bucket data. 77 """ 78 if(fileCrc32 != (crc32(bucketData) & 0xffffffff)): 79 raise DataFileException((self.CRC32_DOES_NOT_MATCH % bucketId)) 80 81 if(bucketId == BucketType.PROPERTY_NAMES): 82 self._processPropertyNamesBucket(bucketData) 83 elif(bucketId == BucketType.PROPERTY_VALUES): 84 self._processPropertyValuesBucket(bucketData) 85 elif(bucketId == BucketType.PROPERTIES): 86 self._processPropertiesBucket(bucketData) 87 elif(bucketId == BucketType.IPV4_TREE): 88 self._processIpv4TreeBucket(bucketData)
89
90 - def _processPropertyNamesBucket(self, data):
91 """ 92 The following is the structure of this bucket : 93 2b Num of indexed items 94 <repeating> 95 1b data type of property value 96 1b length of name 97 ?b property name - ascii string 98 </repeating> 99 """ 100 reader = ByteReader(data) 101 numItems = reader.getShort() 102 self._property_names = [] 103 self._propertyStringNames = [] 104 105 for i in range(0, numItems): 106 valueDataType = reader.getByte() 107 name = reader.getStringAscii(reader.getByte()) 108 self._property_names.insert(i, PropertyName(name, valueDataType)) 109 self._propertyStringNames.insert(i, name.decode("utf-8"))
110
111 - def _processPropertyValuesBucket(self, data):
112 """ 113 The following is the structure of this bucket: 114 2b Number of indexed items 115 <repeating> 116 1b container type ID: no container, ordered set etc 117 <if container="no container"> 118 1B property type - int, boolean, string etc 119 1B/2B/4B length of value bytes --OPTIONAL-- (only applies to some string types) 120 ?B the converted value, some data types have a fixed length such as (boolean len=1, byte len=1, short len=2, int len=4, float len=4) 121 </if> 122 123 <elseif container="ordered set"> 124 1B property type - int, boolean, string etc 125 2B number of items in the set 126 <repeat> 127 <if type=string> 128 1B property type - the type of string - 129 1B/2B/4B length of value bytes --OPTIONAL-- (only applies to some string types) 130 </if> 131 ?B the converted value, some data types have a fixed length such as (boolean len=1, byte len=1, short len=2, int len=4, float len=4) 132 </repeat> 133 </if> 134 </repeating> 135 """ 136 reader = ByteReader(data) 137 numItems = reader.getShort() 138 self._propertyValues = [] 139 140 for i in range(0, numItems): 141 prop = None 142 containerType = reader.getByte() 143 if containerType == self._NO_CONTAINER: 144 prop = self._getSingleValueProperty(reader) 145 elif containerType == self._ORDER_SET_CONTAINER: 146 prop = self._getMultipleValueProperty(reader) 147 148 self._propertyValues.insert(i, prop)
149
150 - def _getSingleValueProperty(self, reader):
151 """ 152 Read a single-value property value 153 """ 154 dataType = reader.getByte() 155 value = self._getPropertyValue(dataType, reader) 156 dataType = CarrierDataType.getBaseDataType(dataType) 157 158 return Property(value, dataType)
159
160 - def _getMultipleValueProperty(self, reader):
161 """ 162 Read a multi-value property. If the type is a string then the string type 163 is read from the data file for each value in order to know how many bytes 164 to read. 165 """ 166 dataType = reader.getByte() 167 numItems = reader.getShort() 168 169 values = [] 170 for i in range(0, numItems): 171 tempdataType = dataType 172 if dataType == DataType.STRING: 173 tempdataType = reader.getByte() 174 175 value = self._getPropertyValue(tempdataType, reader) 176 values.insert(i, value) 177 178 dataType = CarrierDataType.getBaseDataType(dataType) 179 return Property(values, dataType)
180
181 - def _processPropertiesBucket(self, data):
182 """ 183 Properties - nameid:valueid 184 The following is the structure of this bucket 185 186 187 The following is the structure of this bucket: 188 189 2B Num of indexed items 190 <repeating> 191 2B num items in collection 192 <repeating> 193 4B property name ID 194 4B property value ID 195 </repeating> 196 </repeating> 197 198 199 The order of the properties is taken as the index for each item. As each 200 """ 201 reader = ByteReader(data) 202 numItems = reader.getShort() 203 self._properties = [] 204 205 for i in range(0, numItems): 206 props = {} 207 numPropVals = reader.getShort() 208 209 for s in range(0, numPropVals): 210 propId = reader.getInt() 211 valId = reader.getInt() 212 213 if self._property_names[propId] != None: 214 propName = self._property_names[propId] 215 propValue = None 216 217 if self._propertyValues[valId] != None: 218 propValue = self._propertyValues[valId] 219 props[propName.name] = propValue 220 221 self._properties.insert(i, props)
222
223 - def _processIpv4TreeBucket(self, data):
224 """ 225 Load the data for the IPv4 Tree bucket. This bucket has 226 the following structure: 227 228 These 3 ints repeat for the entire bucket: 229 <repeating> 230 4B properties ID value 231 4B Left value 232 4B Right value 233 </repeating> 234 """ 235 reader = ByteReader(data) 236 size = int(len(data) / 12) 237 self._treeRights = [] 238 self._treeLefts = [] 239 self._treeProperties = [] 240 241 for i in range(0, size): 242 propsId = reader.getInt() 243 self._treeLefts.insert(i, reader.getInt()) 244 self._treeRights.insert(i, reader.getInt()) 245 prop = None 246 247 if(propsId != self._NO_VALUE): 248 prop = self._properties[propsId] 249 250 self._treeProperties.insert(i, prop)
251
252 - def _getPropertyValue(self, dataType, reader):
253 """ 254 Read the appropriate property from the ByteReader depending on the 255 data type. All of the primitive types are fixed length. In addition 256 there are five fixed length UTF8 string values and other special types 257 for strings that are less than certain lengths. 258 """ 259 value = None 260 261 if(dataType == DataType.BOOLEAN): 262 value = reader.getBoolean() 263 elif(dataType == DataType.BYTE): 264 value = reader.getByte() 265 elif(dataType == DataType.SHORT): 266 value = reader.getShort() 267 elif(dataType == DataType.INTEGER): 268 value = reader.getInt() 269 elif(dataType == DataType.LONG): 270 value = reader.getLong() 271 elif(dataType == DataType.FLOAT): 272 value = reader.getFloat() 273 elif(dataType == DataType.DOUBLE): 274 value = reader.getDouble() 275 elif(dataType == CarrierDataType.STRING_1_BYTE_FIXED): 276 value = reader.getStringUtf8(1) 277 elif(dataType == CarrierDataType.STRING_2_BYTE_FIXED): 278 value = reader.getStringUtf8(2) 279 elif(dataType == CarrierDataType.STRING_3_BYTE_FIXED): 280 value = reader.getStringUtf8(3) 281 elif(dataType == CarrierDataType.STRING_4_BYTE_FIXED): 282 value = reader.getStringUtf8(4) 283 elif(dataType == CarrierDataType.STRING_5_BYTE_FIXED): 284 value = reader.getStringUtf8(5) 285 elif(dataType == CarrierDataType.STRING_LEN_BYTE): 286 value = reader.getStringUtf8(reader.getByte()) 287 elif(dataType == CarrierDataType.STRING_LEN_SHORT): 288 value = reader.getStringUtf8(reader.getShort()) 289 elif(dataType == CarrierDataType.STRING_LEN_INT): 290 value = reader.getStringUtf8(reader.getInt()) 291 elif(dataType == DataType.STRING): 292 value = reader.getStringUtf8(reader.getShort()) 293 else: 294 reader.skip(reader.getShort()) 295 296 return value
297