how to avoid instagram http error 429 while scrapping instagram - http

I've written a code for following instagram users. I feed thih code with a list, and it goes user's profile and send request in a loop. But after adding 100-150 people it always gives http 429 error and it says " Sorry, link maybe broken.." in briefly. I already slow down my code it has lots of time.sleep(random(20,50)) but i still get this error. I also add my code scrapy i dont know it is useful. Please help me to find a solution.
My code:
class instaFollowSpider(scrapy.Spider):
name = 'instaFollowSpider'
start_urls = ['https://instagram.com']
def parse(self, response):
#Giriş yapma
chrome_options = Options()
chrome_options.add_argument("user-data-dir=C:\\Users\\merta\\AppData\\Local\\Google\\Chrome\\User Data\\Default")
driver = webdriver.Chrome('chromedriver.exe',options=chrome_options)
driver.get('https://instagram.com') # Already authenticated
time.sleep(15)
print("Giriş Yapıldı")
a=0
b=0
baslangic =datetime.now()
#follow process
for i in followlist:
try:
driver.get(f'https://www.instagram.com/{i}/')
time.sleep(random.randint(15,43))
if driver.find_element(By.XPATH, "//div[#class='_aacl _aaco _aacw _aad6 _aade']").text == "Takip Et":
driver.find_element(By.XPATH, "//div[#class='_aacl _aaco _aacw _aad6 _aade']").click()
WebDriverWait(driver, 45).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='_aacl _aaco _aacw _aad6 _aade']")))
b += 1
a = 0
print(i)
time.sleep(random.randint(6, 35))
except NoSuchElementException:
a += 1
if a == 3:
time.sleep(7500)
a=0
continue
else:
time.sleep(random.randint(121, 200))
continue
except:
driver.get_screenshot_as_file(f"screenshot{i}.png")
time.sleep(random.randint(601, 905))
continue
print("Toplam eklenen:", b)
son =datetime.now()
print("Son:",son," |Toplam Çalışma Süresi:",son-baslangic)
process = CrawlerProcess()
process.crawl(instaFollowSpider)
process.start()

Related

Why is the YouTube API v3 inconsistent with the amount of comments it lets you download before an error 400?

I am downloading YouTube comments with a python script that uses API keys and the YouTube Data API V3, but sooner or later I run into the following error:
{'error': {'code': 400, 'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'errors': [{'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'domain': 'youtube.commentThread', 'reason': 'processingFailure', 'location': 'body', 'locationType': 'other'}]}}
I am using the following code:
import argparse
import requests
import json
import time
start_time = time.time()
class YouTubeApi():
YOUTUBE_COMMENTS_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
comment_counter = 0
def is_error_response(self, response):
error = response.get('error')
if error is None:
return False
print("API Error: "
f"code={error['code']} "
f"domain={error['errors'][0]['domain']} "
f"reason={error['errors'][0]['reason']} "
f"message={error['errors'][0]['message']!r}")
print(self.comment_counter)
return True
def format_comments(self, results, likes_required):
comments_list = []
try:
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]
likes = comment["snippet"]["likeCount"]
if likes < likes_required:
continue
author = comment["snippet"]["authorDisplayName"]
text = comment["snippet"]["textDisplay"]
str = "Comment by {}:\n \"{}\"\n\n".format(author, text)
str = str.encode('ascii', 'replace').decode()
comments_list.append(str)
self.comment_counter += 1
print("Comments downloaded:", self.comment_counter, end="\r")
except(KeyError):
print(results)
return comments_list
def get_video_comments(self, video_id, likes_required):
with open("API_keys.txt", "r") as f:
key_list = f.readlines()
comments_list = []
key_list = [key.strip('/n') for key in key_list]
params = {
'part': 'snippet,replies',
'maxResults': 100,
'videoId': video_id,
'textFormat': 'plainText',
'key': key_list[0]
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
results = comments_data.json()
if self.is_error_response(results):
return []
nextPageToken = results.get("nextPageToken")
comments_list = []
comments_list += self.format_comments(results, likes_required)
while nextPageToken:
params.update({'pageToken': nextPageToken})
if self.comment_counter <= 900000:
params.update({'key': key_list[0]})
elif self.comment_counter <= 1800000:
params.update({'key': key_list[1]})
elif self.comment_counter <= 2700000:
params.update({'key': key_list[2]})
elif self.comment_counter <= 3600000:
params.update({'key': key_list[3]})
elif self.comment_counter <= 4500000:
params.update({'key': key_list[4]})
else:
params.update({'key': key_list[5]})
if self.comment_counter % 900001 == 0:
print(params["key"])
comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
results = comments_data.json()
if self.is_error_response(results):
return comments_list
nextPageToken = results.get("nextPageToken")
comments_list += self.format_comments(results, likes_required)
return comments_list
def get_video_id_list(self, filename):
try:
with open(filename, 'r') as file:
URL_list = file.readlines()
except FileNotFoundError:
exit("File \"" + filename + "\" not found")
list = []
for url in URL_list:
if url == "\n": # ignore empty lines
continue
if url[-1] == '\n': # delete '\n' at the end of line
url = url[:-1]
if url.find('='): # get id
id = url[url.find('=') + 1:]
list.append(id)
else:
print("Wrong URL")
return list
def main():
yt = YouTubeApi()
parser = argparse.ArgumentParser(add_help=False, description=("Download youtube comments from many videos into txt file"))
required = parser.add_argument_group("required arguments")
optional = parser.add_argument_group("optional arguments")
optional.add_argument("--likes", '-l', help="The amount of likes a comment needs to be saved", type=int)
optional.add_argument("--input", '-i', help="URL list file name")
optional.add_argument("--output", '-o', help="Output file name")
optional.add_argument("--help", '-h', help="Help", action='help')
args = parser.parse_args()
# --------------------------------------------------------------------- #
likes = 0
if args.likes:
likes = args.likes
input_file = "URL_list.txt"
if args.input:
input_file = args.input
output_file = "Comments.txt"
if args.output:
output_file = args.output
list = yt.get_video_id_list(input_file)
if not list:
exit("No URLs in input file")
try:
vid_counter = 0
with open(output_file, "a") as f:
for video_id in list:
vid_counter += 1
print("Downloading comments for video ", vid_counter, ", id: ", video_id, sep='')
comments = yt.get_video_comments(video_id, likes)
if comments:
for comment in comments:
f.write(comment)
print('\nDone!')
except KeyboardInterrupt:
exit("User Aborted the Operation")
# --------------------------------------------------------------------- #
if __name__ == '__main__':
main()
In another thread, it was discovered that google does not currently permit downloading all the comments on a popular video, however you would expect it to cut off at the same point. Instead, I have found that it can range anywhere betweek 1.5 million to 200k comments downloaded before it returns a code 400. Is this to do with a bug in my code, or is the YouTube API rejecting my request as it is clear that is a script? Would adding a time.sleep clause help with this?
(I bring forward this answer -- that I prepared to the question above at the time of its initial post -- because my assertions below seems to be confirmed once again by recent SO posts of this very kind.)
Your observations are correct. But, unfortunately, nobody but Google itself is able to provide a sound and complete answer to your question. Us -- non-Googlers (as myself!), or even the Googlers themselves (since they all sign NDAs) -- can only guess about the things implied.
Here is my educated guess, based on the investigations I made recently when responding to a very much related question (which you quoted above, yourself!):
As you already know, the API uses pagination for to return to callers sets of items of which cardinality exceed the internal limit of 50, or, by case, 100 items to be returned by each and every API endpoint invocation that provides result sets.
If you'll log the nextPageToken property that you obtain from CommentThreads.list via your object results, you'll see that those page tokens get bigger and bigger. Each and every such page token has to be passed on to the next CommentThreads.list call as the parameter pageToken.
The problem is that internally (not specified publicly, not documented) the API has a limit on the sheer length of the HTTP requests it accepts from its callers. (This happens for various reasons; e.g. security.) Therefore, when a given page token is sufficiently long, the HTTP request that the API user issues will exceed that internal limit, producing an internal error. That error surfaces to the API caller as the processingFailure error that you've encountered.
Many questions remain to be answered (e.g. why is that the page tokens have unbounded length?), but, again, those questions belong very much to the internal realm of the back-end system that's behind the API we're using. And those questions cannot be answered publicly, since are very much Google's internal business.

Python script runs but does not work and no errors are thrown

This program is an API based program that has been working for a few months and all of a sudden has went days without pushing anything to Discord. The script looks fine in CMD, but no errors are being thrown. I was wondering if there was a way to eliminate possible issues such as an API instability issue or something obvious. The program is supposed to go to the site www.bitskins.com and pull skins based on parameters set and push them as an embed to a Discord channel every 10 minutes.
There are two files that run this program.
Here is the one that uses Bitskins API (bitskins.py):
import requests, json
from datetime import datetime, timedelta
class Item:
def __init__(self, item):
withdrawable_at= item['withdrawable_at']
price= float(item['price'])
self.available_in= withdrawable_at- datetime.timestamp(datetime.now())
if self.available_in< 0:
self.available= True
else:
self.available= False
self.suggested_price= float(item['suggested_price'])
self.price= price
self.margin= round(self.suggested_price- self.price, 2)
self.reduction= round((1- (self.price/self.suggested_price))*100, 2)
self.image= item['image']
self.name= item['market_hash_name']
self.item_id= item['item_id']
def __str__(self):
if self.available:
return "Name: {}\nPrice: {}\nSuggested Price: {}\nReduction: {}%\nAvailable Now!\nLink: https://bitskins.com/view_item?app_id=730&item_id={}".format(self.name, self.price, self.suggested_price, self.reduction, self.item_id)
else:
return "Name: {}\nPrice: {}\nSuggested Price: {}\nReduction: {}%\nAvailable in: {}\nLink: https://bitskins.com/view_item?app_id=730&item_id={}".format(self.name, self.price, self.suggested_price, self.reduction, str(timedelta(seconds= self.available_in)), self.item_id)
def __lt__(self, other):
return self.reduction < other.reduction
def __gt__(self, other):
return self.reduction > other.reduction
def get_url(API_KEY, code):
PER_PAGE= 30 # the number of items to retrieve. Either 30 or 480.
return "https://bitskins.com/api/v1/get_inventory_on_sale/?api_key="+ API_KEY+"&code=" + code + "&per_page="+ str(PER_PAGE)
def get_data(url):
r= requests.get(url)
data= r.json()
return data
def get_items(code, API_KEY):
url= get_url(API_KEY, code)
try:
data= get_data(url)
if data['status']=="success":
items= []
items_dic= data['data']['items']
for item in items_dic:
tmp= Item(item)
if tmp.reduction>=25 and tmp.price<=200: # Minimum discount and maximum price to look for when grabbing items. Currently set at minimum discount of 25% and maxmimum price of $200.
items.append(tmp)
return items
else:
raise Exception(data["data"]["error_message"])
except:
raise Exception("Couldn't connect to BitSkins.")
# my_token = pyotp.TOTP(my_secret)
# print(my_token.now()) # in python3
And here is the file with Discord's API (solution.py):
#!/bin/env python3.6
import bitskins
import discord
import pyotp, base64, asyncio
from datetime import timedelta, datetime
TOKEN= "Not input for obvious reasons"
API_KEY= "Not input for obvious reasons"
my_secret= 'Not input for obvious reasons'
client = discord.Client()
def get_embed(item):
embed=discord.Embed(title=item.name, url= "https://bitskins.com/view_item?app_id=730&item_id={}".format(item.item_id), color=0xA3FFE8)
embed.set_author(name="Skin Bot", url="http://www.reactor.gg/",icon_url="https://pbs.twimg.com/profile_images/1050077525471158272/4_R8PsrC_400x400.jpg")
embed.set_thumbnail(url=item.image)
embed.add_field(name="Price:", value="${}".format(item.price))
embed.add_field(name="Discount:", value="{}%".format(item.reduction), inline=True)
if item.available:
tmp= "Instantly Withdrawable"
else:
tmp= str(timedelta(seconds= item.available_in))
embed.add_field(name="Availability:", value=tmp, inline=True)
embed.add_field(name="Suggested Price:", value="${}".format(item.suggested_price), inline=True)
embed.add_field(name="Profit:", value="${}".format(item.margin), inline=True)
embed.set_footer(text="Made by Aqyl#0001 | {}".format(datetime.now()), icon_url="https://www.discordapp.com/assets/6debd47ed13483642cf09e832ed0bc1b.png")
return embed
async def status_task(wait_time= 60* 5):
while True:
print("Updated on: {}".format(datetime.now()))
code= pyotp.TOTP(my_secret)
try:
items= bitskins.get_items(code.now(), API_KEY)
for item in items:
await client.send_message(client.get_channel("656913641832185878"), embed=get_embed(item))
except:
pass
await asyncio.sleep(wait_time)
#client.event
async def on_ready():
wait_time= 60 * 10 # 10 mins in this case
print('CSGO BitSkins Bot')
print('Made by Aqyl#0001')
print('Version 1.0.6')
print('')
print('Logged in as:')
print(client.user.name)
print('------------------------------------------')
client.loop.create_task(status_task(wait_time))
try:
client.run(TOKEN)
except:
print("Couldn't connect to the Discord Server.")
You have a general exception, this will lead to catching exceptions that you really don't want to catch.
try:
items= bitskins.get_items(code.now(), API_KEY)
for item in items:
await client.send_message(client.get_channel("656913641832185878"), embed=get_embed(item))
except:
pass
This is the same as catching any exception that appears there (Exceptions that inherit BaseException
To avoid those problems, you should always catch specific exceptions. (i.e. TypeError).
Example:
try:
raise Exception("Example exc")
except Exception as e:
print(f"Exception caught! {e}")

Emitting dronekit.io vehicle's attribute changes using flask-socket.io

I'm trying to send data from my dronekit.io vehicle using flask-socket.io. Unfortunately, I got this log:
Starting copter simulator (SITL)
SITL already Downloaded and Extracted.
Ready to boot.
Connecting to vehicle on: tcp:127.0.0.1:5760
>>> APM:Copter V3.3 (d6053245)
>>> Frame: QUAD
>>> Calibrating barometer
>>> Initialising APM...
>>> barometer calibration complete
>>> GROUND START
* Restarting with stat
latitude -35.363261
>>> Exception in attribute handler for location.global_relative_frame
>>> Working outside of request context.
This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
longitude 149.1652299
>>> Exception in attribute handler for location.global_relative_frame
>>> Working outside of request context.
This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
Here is my code:
sample.py
from dronekit import connect, VehicleMode
from flask import Flask
from flask_socketio import SocketIO, emit
import dronekit_sitl
import time
sitl = dronekit_sitl.start_default()
connection_string = sitl.connection_string()
print("Connecting to vehicle on: %s" % (connection_string,))
vehicle = connect(connection_string, wait_ready=True)
def arm_and_takeoff(aTargetAltitude):
print "Basic pre-arm checks"
while not vehicle.is_armable:
print " Waiting for vehicle to initialise..."
time.sleep(1)
print "Arming motors"
vehicle.mode = VehicleMode("GUIDED")
vehicle.armed = True
while not vehicle.armed:
print " Waiting for arming..."
time.sleep(1)
print "Taking off!"
vehicle.simple_takeoff(aTargetAltitude)
while True:
if vehicle.location.global_relative_frame.alt>=aTargetAltitude*0.95:
print "Reached target altitude"
break
time.sleep(1)
last_latitude = 0.0
last_longitude = 0.0
last_altitude = 0.0
#vehicle.on_attribute('location.global_relative_frame')
def location_callback(self, attr_name, value):
global last_latitude
global last_longitude
global last_altitude
if round(value.lat, 6) != round(last_latitude, 6):
last_latitude = value.lat
print "latitude ", value.lat, "\n"
emit("latitude", value.lat)
if round(value.lon, 6) != round(last_longitude, 6):
last_longitude = value.lon
print "longitude ", value.lon, "\n"
emit("longitude", value.lon)
if round(value.alt) != round(last_altitude):
last_altitude = value.alt
print "altitude ", value.alt, "\n"
emit("altitude", value.alt)
app = Flask(__name__)
socketio = SocketIO(app)
if __name__ == '__main__':
socketio.run(app, host='0.0.0.0', port=5000, debug=True)
arm_and_takeoff(20)
I know because of the logs that I should not do any HTTP request inside "vehicle.on_attribute" decorator method and I should search for information on how to solve this problem but I didn't found any info about the error.
Hope you could help me.
Thank you very much,
Raniel
The emit() function by default returns an event back to the active client. If you call this function outside of a request context there is no concept of active client, so you get this error.
You have a couple of options:
indicate the recipient of the event and the namespace that you are using, so that there is no need to look them up in the context. You can do this by adding room and namespace arguments. Use '/' for the namespace if you are using the default namespace.
emit to all clients by adding broadcast=True as an argument, plus the namespace as indicated in #1.

How to get the Tor ExitNode IP with Python and Stem

I'm trying to get the external IP that Tor uses, as mentioned here. When using something like myip.dnsomatic.com, this is very slow. I tried what was suggested in the aforementioned link (python + stem to control tor through the control port), but all you get is circuit's IPs with no assurance of which one is the one on the exitnode, and, sometimes the real IP is not even among the results.
Any help would be appreciated.
Also, from here, at the bottom, Amine suggests a way to renew the identity in Tor. There is an instruction, controller.get_newnym_wait(), which he uses to wait until the new connection is ready (controller is from Control in steam.control), isn't there any thing like that in Steam (sorry, I checked and double/triple checked and couldn't find nothing) that tells you that Tor is changing its identity?
You can get the exit node ip without calling a geoip site.
This is however on a different stackexchange site here - https://tor.stackexchange.com/questions/3253/how-do-i-trap-circuit-id-none-errors-in-the-stem-script-exit-used-py
As posted by #mirimir his code below essentially attaches a stream event listener function, which is then used to get the circuit id, circuit fingerprint, then finally the exit ip address -
#!/usr/bin/python
import functools
import time
from stem import StreamStatus
from stem.control import EventType, Controller
def main():
print "Tracking requests for tor exits. Press 'enter' to end."
print
with Controller.from_port() as controller:
controller.authenticate()
stream_listener = functools.partial(stream_event, controller)
controller.add_event_listener(stream_listener, EventType.STREAM)
raw_input() # wait for user to press enter
def stream_event(controller, event):
if event.status == StreamStatus.SUCCEEDED and event.circ_id:
circ = controller.get_circuit(event.circ_id)
exit_fingerprint = circ.path[-1][0]
exit_relay = controller.get_network_status(exit_fingerprint)
t = time.localtime()
print "datetime|%d-%02d-%02d %02d:%02d:%02d % (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)
print "website|%s" % (event.target)
print "exitip|%s" % (exit_relay.address)
print "exitport|%i" % (exit_relay.or_port)
print "fingerprint|%s" % exit_relay.fingerprint
print "nickname|%s" % exit_relay.nickname
print "locale|%s" % controller.get_info("ip-to-country/%s" % exit_relay.address, 'unknown')
print
You can use this code for check current IP (change SOCKS_PORT value to yours):
import re
import stem.process
import requesocks
SOCKS_PORT = 9053
tor_process = stem.process.launch_tor()
proxy_address = 'socks5://127.0.0.1:{}'.format(SOCKS_PORT)
proxies = {
'http': proxy_address,
'https': proxy_address
}
response = requesocks.get("http://httpbin.org/ip", proxies=proxies)
print re.findall(r'[\d.-]+', response.text)[0]
tor_process.kill()
If you want to use socks you should do:
pip install requests[socks]
Then you can do:
import requests
import json
import stem.process
import stem
SOCKS_PORT = "9999"
tor = stem.process.launch_tor_with_config(
config={
'SocksPort': SOCKS_PORT,
},
tor_cmd= 'absolute_path/to/tor.exe',
)
r = requests.Session()
proxies = {
'http': 'socks5://localhost:' + SOCKS_PORT,
'https': 'socks5://localhost:' + SOCKS_PORT
}
response = r.get("http://httpbin.org/ip", proxies=proxies)
self.current_ip = response.json()['origin']

Create a portal_user_catalog and have it used (Plone)

I'm creating a fork of my Plone site (which has not been forked for a long time). This site has a special catalog object for user profiles (a special Archetypes-based object type) which is called portal_user_catalog:
$ bin/instance debug
>>> portal = app.Plone
>>> print [d for d in portal.objectMap() if d['meta_type'] == 'Plone Catalog Tool']
[{'meta_type': 'Plone Catalog Tool', 'id': 'portal_catalog'},
{'meta_type': 'Plone Catalog Tool', 'id': 'portal_user_catalog'}]
This looks reasonable because the user profiles don't have most of the indexes of the "normal" objects, but have a small set of own indexes.
Since I found no way how to create this object from scratch, I exported it from the old site (as portal_user_catalog.zexp) and imported it in the new site. This seemed to work, but I can't add objects to the imported catalog, not even by explicitly calling the catalog_object method. Instead, the user profiles are added to the standard portal_catalog.
Now I found a module in my product which seems to serve the purpose (Products/myproduct/exportimport/catalog.py):
"""Catalog tool setup handlers.
$Id: catalog.py 77004 2007-06-24 08:57:54Z yuppie $
"""
from Products.GenericSetup.utils import exportObjects
from Products.GenericSetup.utils import importObjects
from Products.CMFCore.utils import getToolByName
from zope.component import queryMultiAdapter
from Products.GenericSetup.interfaces import IBody
def importCatalogTool(context):
"""Import catalog tool.
"""
site = context.getSite()
obj = getToolByName(site, 'portal_user_catalog')
parent_path=''
if obj and not obj():
importer = queryMultiAdapter((obj, context), IBody)
path = '%s%s' % (parent_path, obj.getId().replace(' ', '_'))
__traceback_info__ = path
print [importer]
if importer:
print importer.name
if importer.name:
path = '%s%s' % (parent_path, 'usercatalog')
print path
filename = '%s%s' % (path, importer.suffix)
print filename
body = context.readDataFile(filename)
if body is not None:
importer.filename = filename # for error reporting
importer.body = body
if getattr(obj, 'objectValues', False):
for sub in obj.objectValues():
importObjects(sub, path+'/', context)
def exportCatalogTool(context):
"""Export catalog tool.
"""
site = context.getSite()
obj = getToolByName(site, 'portal_user_catalog', None)
if tool is None:
logger = context.getLogger('catalog')
logger.info('Nothing to export.')
return
parent_path=''
exporter = queryMultiAdapter((obj, context), IBody)
path = '%s%s' % (parent_path, obj.getId().replace(' ', '_'))
if exporter:
if exporter.name:
path = '%s%s' % (parent_path, 'usercatalog')
filename = '%s%s' % (path, exporter.suffix)
body = exporter.body
if body is not None:
context.writeDataFile(filename, body, exporter.mime_type)
if getattr(obj, 'objectValues', False):
for sub in obj.objectValues():
exportObjects(sub, path+'/', context)
I tried to use it, but I have no idea how it is supposed to be done;
I can't call it TTW (should I try to publish the methods?!).
I tried it in a debug session:
$ bin/instance debug
>>> portal = app.Plone
>>> from Products.myproduct.exportimport.catalog import exportCatalogTool
>>> exportCatalogTool(portal)
Traceback (most recent call last):
File "<console>", line 1, in <module>
File ".../Products/myproduct/exportimport/catalog.py", line 58, in exportCatalogTool
site = context.getSite()
AttributeError: getSite
So, if this is the way to go, it looks like I need a "real" context.
Update: To get this context, I tried an External Method:
# -*- coding: utf-8 -*-
from Products.myproduct.exportimport.catalog import exportCatalogTool
from pdb import set_trace
def p(dt, dd):
print '%-16s%s' % (dt+':', dd)
def main(self):
"""
Export the portal_user_catalog
"""
g = globals()
print '#' * 79
for a in ('__package__', '__module__'):
if a in g:
p(a, g[a])
p('self', self)
set_trace()
exportCatalogTool(self)
However, wenn I called it, I got the same <PloneSite at /Plone> object as the argument to the main function, which didn't have the getSite attribute. Perhaps my site doesn't call such External Methods correctly?
Or would I need to mention this module somehow in my configure.zcml, but how? I searched my directory tree (especially below Products/myproduct/profiles) for exportimport, the module name, and several other strings, but I couldn't find anything; perhaps there has been an integration once but was broken ...
So how do I make this portal_user_catalog work?
Thank you!
Update: Another debug session suggests the source of the problem to be some transaction matter:
>>> portal = app.Plone
>>> puc = portal.portal_user_catalog
>>> puc._catalog()
[]
>>> profiles_folder = portal.some_folder_with_profiles
>>> for o in profiles_folder.objectValues():
... puc.catalog_object(o)
...
>>> puc._catalog()
[<Products.ZCatalog.Catalog.mybrains object at 0x69ff8d8>, ...]
This population of the portal_user_catalog doesn't persist; after termination of the debug session and starting fg, the brains are gone.
It looks like the problem was indeed related with transactions.
I had
import transaction
...
class Browser(BrowserView):
...
def processNewUser(self):
....
transaction.commit()
before, but apparently this was not good enough (and/or perhaps not done correctly).
Now I start the transaction explicitly with transaction.begin(), save intermediate results with transaction.savepoint(), abort the transaction explicitly with transaction.abort() in case of errors (try / except), and have exactly one transaction.commit() at the end, in the case of success. Everything seems to work.
Of course, Plone still doesn't take this non-standard catalog into account; when I "clear and rebuild" it, it is empty afterwards. But for my application it works well enough.

Resources